1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static bool arm_memory_load_p (rtx);
170 static bool arm_cirrus_insn_p (rtx);
171 static void cirrus_reorg (rtx);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_encode_section_info (tree, rtx, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
266 static void arm_conditional_register_usage (void);
267 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
273 /* Table of machine attributes. */
274 static const struct attribute_spec arm_attribute_table[] =
276 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
277 affects_type_identity } */
278 /* Function calls made to this symbol must be done indirectly, because
279 it may lie outside of the 26 bit addressing range of a normal function
281 { "long_call", 0, 0, false, true, true, NULL, false },
282 /* Whereas these functions are always known to reside within the 26 bit
284 { "short_call", 0, 0, false, true, true, NULL, false },
285 /* Specify the procedure call conventions for a function. */
286 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 /* Interrupt Service Routines have special prologue and epilogue requirements. */
289 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
296 /* ARM/PE has three new attributes:
298 dllexport - for exporting a function/variable that will live in a dll
299 dllimport - for importing a function/variable from a dll
301 Microsoft allows multiple declspecs in one __declspec, separating
302 them with spaces. We do NOT support this. Instead, use __declspec
305 { "dllimport", 0, 0, true, false, false, NULL, false },
306 { "dllexport", 0, 0, true, false, false, NULL, false },
307 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
310 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
311 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
315 { NULL, 0, 0, false, false, false, NULL, false }
318 /* Initialize the GCC target structure. */
319 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
320 #undef TARGET_MERGE_DECL_ATTRIBUTES
321 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
324 #undef TARGET_LEGITIMIZE_ADDRESS
325 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327 #undef TARGET_ATTRIBUTE_TABLE
328 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330 #undef TARGET_ASM_FILE_START
331 #define TARGET_ASM_FILE_START arm_file_start
332 #undef TARGET_ASM_FILE_END
333 #define TARGET_ASM_FILE_END arm_file_end
335 #undef TARGET_ASM_ALIGNED_SI_OP
336 #define TARGET_ASM_ALIGNED_SI_OP NULL
337 #undef TARGET_ASM_INTEGER
338 #define TARGET_ASM_INTEGER arm_assemble_integer
340 #undef TARGET_PRINT_OPERAND
341 #define TARGET_PRINT_OPERAND arm_print_operand
342 #undef TARGET_PRINT_OPERAND_ADDRESS
343 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
344 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
345 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
348 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350 #undef TARGET_ASM_FUNCTION_PROLOGUE
351 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353 #undef TARGET_ASM_FUNCTION_EPILOGUE
354 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE arm_option_override
359 #undef TARGET_COMP_TYPE_ATTRIBUTES
360 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
363 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365 #undef TARGET_SCHED_ADJUST_COST
366 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368 #undef TARGET_REGISTER_MOVE_COST
369 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371 #undef TARGET_MEMORY_MOVE_COST
372 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
381 #undef TARGET_STRIP_NAME_ENCODING
382 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384 #undef TARGET_ASM_INTERNAL_LABEL
385 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
388 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390 #undef TARGET_FUNCTION_VALUE
391 #define TARGET_FUNCTION_VALUE arm_function_value
393 #undef TARGET_LIBCALL_VALUE
394 #define TARGET_LIBCALL_VALUE arm_libcall_value
396 #undef TARGET_FUNCTION_VALUE_REGNO_P
397 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399 #undef TARGET_ASM_OUTPUT_MI_THUNK
400 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
401 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
402 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS arm_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST arm_address_cost
409 #undef TARGET_SHIFT_TRUNCATION_MASK
410 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
411 #undef TARGET_VECTOR_MODE_SUPPORTED_P
412 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
413 #undef TARGET_ARRAY_MODE_SUPPORTED_P
414 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
415 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
416 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
417 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
418 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
419 arm_autovectorize_vector_sizes
421 #undef TARGET_MACHINE_DEPENDENT_REORG
422 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424 #undef TARGET_INIT_BUILTINS
425 #define TARGET_INIT_BUILTINS arm_init_builtins
426 #undef TARGET_EXPAND_BUILTIN
427 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
428 #undef TARGET_BUILTIN_DECL
429 #define TARGET_BUILTIN_DECL arm_builtin_decl
431 #undef TARGET_INIT_LIBFUNCS
432 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434 #undef TARGET_PROMOTE_FUNCTION_MODE
435 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
436 #undef TARGET_PROMOTE_PROTOTYPES
437 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
438 #undef TARGET_PASS_BY_REFERENCE
439 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
440 #undef TARGET_ARG_PARTIAL_BYTES
441 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
442 #undef TARGET_FUNCTION_ARG
443 #define TARGET_FUNCTION_ARG arm_function_arg
444 #undef TARGET_FUNCTION_ARG_ADVANCE
445 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
446 #undef TARGET_FUNCTION_ARG_BOUNDARY
447 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449 #undef TARGET_SETUP_INCOMING_VARARGS
450 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
453 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
456 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
457 #undef TARGET_TRAMPOLINE_INIT
458 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
459 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
460 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462 #undef TARGET_DEFAULT_SHORT_ENUMS
463 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
465 #undef TARGET_ALIGN_ANON_BITFIELD
466 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
468 #undef TARGET_NARROW_VOLATILE_BITFIELD
469 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
471 #undef TARGET_CXX_GUARD_TYPE
472 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
474 #undef TARGET_CXX_GUARD_MASK_BIT
475 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
477 #undef TARGET_CXX_GET_COOKIE_SIZE
478 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
480 #undef TARGET_CXX_COOKIE_HAS_SIZE
481 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
483 #undef TARGET_CXX_CDTOR_RETURNS_THIS
484 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
486 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
487 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
489 #undef TARGET_CXX_USE_AEABI_ATEXIT
490 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
492 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
493 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
494 arm_cxx_determine_class_data_visibility
496 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
497 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
499 #undef TARGET_RETURN_IN_MSB
500 #define TARGET_RETURN_IN_MSB arm_return_in_msb
502 #undef TARGET_RETURN_IN_MEMORY
503 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
505 #undef TARGET_MUST_PASS_IN_STACK
506 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
509 #undef TARGET_ASM_UNWIND_EMIT
510 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
512 /* EABI unwinding tables use a different format for the typeinfo tables. */
513 #undef TARGET_ASM_TTYPE
514 #define TARGET_ASM_TTYPE arm_output_ttype
516 #undef TARGET_ARM_EABI_UNWINDER
517 #define TARGET_ARM_EABI_UNWINDER true
519 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
520 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
522 #undef TARGET_ASM_INIT_SECTIONS
523 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
524 #endif /* ARM_UNWIND_INFO */
526 #undef TARGET_DWARF_REGISTER_SPAN
527 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
529 #undef TARGET_CANNOT_COPY_INSN_P
530 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
533 #undef TARGET_HAVE_TLS
534 #define TARGET_HAVE_TLS true
537 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
538 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
540 #undef TARGET_LEGITIMATE_CONSTANT_P
541 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
543 #undef TARGET_CANNOT_FORCE_CONST_MEM
544 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
546 #undef TARGET_MAX_ANCHOR_OFFSET
547 #define TARGET_MAX_ANCHOR_OFFSET 4095
549 /* The minimum is set such that the total size of the block
550 for a particular anchor is -4088 + 1 + 4095 bytes, which is
551 divisible by eight, ensuring natural spacing of anchors. */
552 #undef TARGET_MIN_ANCHOR_OFFSET
553 #define TARGET_MIN_ANCHOR_OFFSET -4088
555 #undef TARGET_SCHED_ISSUE_RATE
556 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
558 #undef TARGET_MANGLE_TYPE
559 #define TARGET_MANGLE_TYPE arm_mangle_type
561 #undef TARGET_BUILD_BUILTIN_VA_LIST
562 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
563 #undef TARGET_EXPAND_BUILTIN_VA_START
564 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
565 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
566 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
569 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
570 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
573 #undef TARGET_LEGITIMATE_ADDRESS_P
574 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
576 #undef TARGET_INVALID_PARAMETER_TYPE
577 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
579 #undef TARGET_INVALID_RETURN_TYPE
580 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
582 #undef TARGET_PROMOTED_TYPE
583 #define TARGET_PROMOTED_TYPE arm_promoted_type
585 #undef TARGET_CONVERT_TO_TYPE
586 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
588 #undef TARGET_SCALAR_MODE_SUPPORTED_P
589 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
591 #undef TARGET_FRAME_POINTER_REQUIRED
592 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
594 #undef TARGET_CAN_ELIMINATE
595 #define TARGET_CAN_ELIMINATE arm_can_eliminate
597 #undef TARGET_CONDITIONAL_REGISTER_USAGE
598 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
600 #undef TARGET_CLASS_LIKELY_SPILLED_P
601 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
603 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
604 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
605 arm_vector_alignment_reachable
607 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
608 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
609 arm_builtin_support_vector_misalignment
611 #undef TARGET_PREFERRED_RENAME_CLASS
612 #define TARGET_PREFERRED_RENAME_CLASS \
613 arm_preferred_rename_class
615 struct gcc_target targetm = TARGET_INITIALIZER;
617 /* Obstack for minipool constant handling. */
618 static struct obstack minipool_obstack;
619 static char * minipool_startobj;
621 /* The maximum number of insns skipped which
622 will be conditionalised if possible. */
623 static int max_insns_skipped = 5;
625 extern FILE * asm_out_file;
627 /* True if we are currently building a constant table. */
628 int making_const_table;
630 /* The processor for which instructions should be scheduled. */
631 enum processor_type arm_tune = arm_none;
633 /* The current tuning set. */
634 const struct tune_params *current_tune;
636 /* Which floating point hardware to schedule for. */
639 /* Which floating popint hardware to use. */
640 const struct arm_fpu_desc *arm_fpu_desc;
642 /* Used for Thumb call_via trampolines. */
643 rtx thumb_call_via_label[14];
644 static int thumb_call_reg_needed;
646 /* Bit values used to identify processor capabilities. */
647 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
648 #define FL_ARCH3M (1 << 1) /* Extended multiply */
649 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
650 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
651 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
652 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
653 #define FL_THUMB (1 << 6) /* Thumb aware */
654 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
655 #define FL_STRONG (1 << 8) /* StrongARM */
656 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
657 #define FL_XSCALE (1 << 10) /* XScale */
658 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
659 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
660 media instructions. */
661 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
662 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
663 Note: ARM6 & 7 derivatives only. */
664 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
665 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
666 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
668 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
669 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
670 #define FL_NEON (1 << 20) /* Neon instructions. */
671 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
673 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
674 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
676 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
678 /* Flags that only effect tuning, not available instructions. */
679 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
682 #define FL_FOR_ARCH2 FL_NOTM
683 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
684 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
685 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
686 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
687 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
688 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
689 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
690 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
691 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
692 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
693 #define FL_FOR_ARCH6J FL_FOR_ARCH6
694 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
695 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
696 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
697 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
698 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
699 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
700 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
701 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
702 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
703 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
705 /* The bits in this mask specify which
706 instructions we are allowed to generate. */
707 static unsigned long insn_flags = 0;
709 /* The bits in this mask specify which instruction scheduling options should
711 static unsigned long tune_flags = 0;
713 /* The following are used in the arm.md file as equivalents to bits
714 in the above two flag variables. */
716 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
719 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
722 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
725 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
728 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
731 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
734 /* Nonzero if this chip supports the ARM 6K extensions. */
737 /* Nonzero if this chip supports the ARM 7 extensions. */
740 /* Nonzero if instructions not present in the 'M' profile can be used. */
741 int arm_arch_notm = 0;
743 /* Nonzero if instructions present in ARMv7E-M can be used. */
746 /* Nonzero if this chip can benefit from load scheduling. */
747 int arm_ld_sched = 0;
749 /* Nonzero if this chip is a StrongARM. */
750 int arm_tune_strongarm = 0;
752 /* Nonzero if this chip is a Cirrus variant. */
753 int arm_arch_cirrus = 0;
755 /* Nonzero if this chip supports Intel Wireless MMX technology. */
756 int arm_arch_iwmmxt = 0;
758 /* Nonzero if this chip is an XScale. */
759 int arm_arch_xscale = 0;
761 /* Nonzero if tuning for XScale */
762 int arm_tune_xscale = 0;
764 /* Nonzero if we want to tune for stores that access the write-buffer.
765 This typically means an ARM6 or ARM7 with MMU or MPU. */
766 int arm_tune_wbuf = 0;
768 /* Nonzero if tuning for Cortex-A9. */
769 int arm_tune_cortex_a9 = 0;
771 /* Nonzero if generating Thumb instructions. */
774 /* Nonzero if generating Thumb-1 instructions. */
777 /* Nonzero if we should define __THUMB_INTERWORK__ in the
779 XXX This is a bit of a hack, it's intended to help work around
780 problems in GLD which doesn't understand that armv5t code is
781 interworking clean. */
782 int arm_cpp_interwork = 0;
784 /* Nonzero if chip supports Thumb 2. */
787 /* Nonzero if chip supports integer division instruction. */
788 int arm_arch_arm_hwdiv;
789 int arm_arch_thumb_hwdiv;
791 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
792 we must report the mode of the memory reference from
793 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
794 enum machine_mode output_memory_reference_mode;
796 /* The register number to be used for the PIC offset register. */
797 unsigned arm_pic_register = INVALID_REGNUM;
799 /* Set to 1 after arm_reorg has started. Reset to start at the start of
800 the next function. */
801 static int after_arm_reorg = 0;
803 enum arm_pcs arm_pcs_default;
805 /* For an explanation of these variables, see final_prescan_insn below. */
807 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
808 enum arm_cond_code arm_current_cc;
811 int arm_target_label;
812 /* The number of conditionally executed insns, including the current insn. */
813 int arm_condexec_count = 0;
814 /* A bitmask specifying the patterns for the IT block.
815 Zero means do not output an IT block before this insn. */
816 int arm_condexec_mask = 0;
817 /* The number of bits used in arm_condexec_mask. */
818 int arm_condexec_masklen = 0;
820 /* The condition codes of the ARM, and the inverse function. */
821 static const char * const arm_condition_codes[] =
823 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
824 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
827 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
828 int arm_regs_in_sequence[] =
830 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
833 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
834 #define streq(string1, string2) (strcmp (string1, string2) == 0)
836 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
837 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
838 | (1 << PIC_OFFSET_TABLE_REGNUM)))
840 /* Initialization code. */
844 const char *const name;
845 enum processor_type core;
847 const unsigned long flags;
848 const struct tune_params *const tune;
852 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
853 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
858 const struct tune_params arm_slowmul_tune =
860 arm_slowmul_rtx_costs,
862 3, /* Constant limit. */
863 5, /* Max cond insns. */
864 ARM_PREFETCH_NOT_BENEFICIAL,
865 true, /* Prefer constant pool. */
866 arm_default_branch_cost
869 const struct tune_params arm_fastmul_tune =
871 arm_fastmul_rtx_costs,
873 1, /* Constant limit. */
874 5, /* Max cond insns. */
875 ARM_PREFETCH_NOT_BENEFICIAL,
876 true, /* Prefer constant pool. */
877 arm_default_branch_cost
880 /* StrongARM has early execution of branches, so a sequence that is worth
881 skipping is shorter. Set max_insns_skipped to a lower value. */
883 const struct tune_params arm_strongarm_tune =
885 arm_fastmul_rtx_costs,
887 1, /* Constant limit. */
888 3, /* Max cond insns. */
889 ARM_PREFETCH_NOT_BENEFICIAL,
890 true, /* Prefer constant pool. */
891 arm_default_branch_cost
894 const struct tune_params arm_xscale_tune =
896 arm_xscale_rtx_costs,
897 xscale_sched_adjust_cost,
898 2, /* Constant limit. */
899 3, /* Max cond insns. */
900 ARM_PREFETCH_NOT_BENEFICIAL,
901 true, /* Prefer constant pool. */
902 arm_default_branch_cost
905 const struct tune_params arm_9e_tune =
909 1, /* Constant limit. */
910 5, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost
916 const struct tune_params arm_v6t2_tune =
920 1, /* Constant limit. */
921 5, /* Max cond insns. */
922 ARM_PREFETCH_NOT_BENEFICIAL,
923 false, /* Prefer constant pool. */
924 arm_default_branch_cost
927 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
928 const struct tune_params arm_cortex_tune =
932 1, /* Constant limit. */
933 5, /* Max cond insns. */
934 ARM_PREFETCH_NOT_BENEFICIAL,
935 false, /* Prefer constant pool. */
936 arm_default_branch_cost
939 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
940 less appealing. Set max_insns_skipped to a low value. */
942 const struct tune_params arm_cortex_a5_tune =
946 1, /* Constant limit. */
947 1, /* Max cond insns. */
948 ARM_PREFETCH_NOT_BENEFICIAL,
949 false, /* Prefer constant pool. */
950 arm_cortex_a5_branch_cost
953 const struct tune_params arm_cortex_a9_tune =
956 cortex_a9_sched_adjust_cost,
957 1, /* Constant limit. */
958 5, /* Max cond insns. */
959 ARM_PREFETCH_BENEFICIAL(4,32,32),
960 false, /* Prefer constant pool. */
961 arm_default_branch_cost
964 const struct tune_params arm_fa726te_tune =
967 fa726te_sched_adjust_cost,
968 1, /* Constant limit. */
969 5, /* Max cond insns. */
970 ARM_PREFETCH_NOT_BENEFICIAL,
971 true, /* Prefer constant pool. */
972 arm_default_branch_cost
976 /* Not all of these give usefully different compilation alternatives,
977 but there is no simple way of generalizing them. */
978 static const struct processors all_cores[] =
981 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
982 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
983 #include "arm-cores.def"
985 {NULL, arm_none, NULL, 0, NULL}
988 static const struct processors all_architectures[] =
990 /* ARM Architectures */
991 /* We don't specify tuning costs here as it will be figured out
994 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
995 {NAME, CORE, #ARCH, FLAGS, NULL},
996 #include "arm-arches.def"
998 {NULL, arm_none, NULL, 0 , NULL}
1002 /* These are populated as commandline arguments are processed, or NULL
1003 if not specified. */
1004 static const struct processors *arm_selected_arch;
1005 static const struct processors *arm_selected_cpu;
1006 static const struct processors *arm_selected_tune;
1008 /* The name of the preprocessor macro to define for this architecture. */
1010 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1012 /* Available values for -mfpu=. */
1014 static const struct arm_fpu_desc all_fpus[] =
1016 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1017 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1018 #include "arm-fpus.def"
1023 /* Supported TLS relocations. */
1031 TLS_DESCSEQ /* GNU scheme */
1034 /* The maximum number of insns to be used when loading a constant. */
1036 arm_constant_limit (bool size_p)
1038 return size_p ? 1 : current_tune->constant_limit;
1041 /* Emit an insn that's a simple single-set. Both the operands must be known
1044 emit_set_insn (rtx x, rtx y)
1046 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1049 /* Return the number of bits set in VALUE. */
1051 bit_count (unsigned long value)
1053 unsigned long count = 0;
1058 value &= value - 1; /* Clear the least-significant set bit. */
1066 enum machine_mode mode;
1068 } arm_fixed_mode_set;
1070 /* A small helper for setting fixed-point library libfuncs. */
1073 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1074 const char *funcname, const char *modename,
1079 if (num_suffix == 0)
1080 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1082 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1084 set_optab_libfunc (optable, mode, buffer);
1088 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1089 enum machine_mode from, const char *funcname,
1090 const char *toname, const char *fromname)
1093 const char *maybe_suffix_2 = "";
1095 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1096 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1097 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1098 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1099 maybe_suffix_2 = "2";
1101 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1104 set_conv_libfunc (optable, to, from, buffer);
1107 /* Set up library functions unique to ARM. */
1110 arm_init_libfuncs (void)
1112 /* For Linux, we have access to kernel support for atomic operations. */
1113 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1114 init_sync_libfuncs (2 * UNITS_PER_WORD);
1116 /* There are no special library functions unless we are using the
1121 /* The functions below are described in Section 4 of the "Run-Time
1122 ABI for the ARM architecture", Version 1.0. */
1124 /* Double-precision floating-point arithmetic. Table 2. */
1125 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1126 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1127 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1128 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1129 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1131 /* Double-precision comparisons. Table 3. */
1132 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1133 set_optab_libfunc (ne_optab, DFmode, NULL);
1134 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1135 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1136 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1137 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1138 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1140 /* Single-precision floating-point arithmetic. Table 4. */
1141 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1142 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1143 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1144 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1145 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1147 /* Single-precision comparisons. Table 5. */
1148 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1149 set_optab_libfunc (ne_optab, SFmode, NULL);
1150 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1151 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1152 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1153 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1154 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1156 /* Floating-point to integer conversions. Table 6. */
1157 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1158 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1159 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1160 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1161 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1162 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1163 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1164 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1166 /* Conversions between floating types. Table 7. */
1167 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1168 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1170 /* Integer to floating-point conversions. Table 8. */
1171 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1172 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1173 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1174 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1175 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1176 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1177 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1178 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1180 /* Long long. Table 9. */
1181 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1182 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1183 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1184 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1185 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1186 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1187 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1188 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1190 /* Integer (32/32->32) division. \S 4.3.1. */
1191 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1192 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1194 /* The divmod functions are designed so that they can be used for
1195 plain division, even though they return both the quotient and the
1196 remainder. The quotient is returned in the usual location (i.e.,
1197 r0 for SImode, {r0, r1} for DImode), just as would be expected
1198 for an ordinary division routine. Because the AAPCS calling
1199 conventions specify that all of { r0, r1, r2, r3 } are
1200 callee-saved registers, there is no need to tell the compiler
1201 explicitly that those registers are clobbered by these
1203 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1204 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1206 /* For SImode division the ABI provides div-without-mod routines,
1207 which are faster. */
1208 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1209 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1211 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1212 divmod libcalls instead. */
1213 set_optab_libfunc (smod_optab, DImode, NULL);
1214 set_optab_libfunc (umod_optab, DImode, NULL);
1215 set_optab_libfunc (smod_optab, SImode, NULL);
1216 set_optab_libfunc (umod_optab, SImode, NULL);
1218 /* Half-precision float operations. The compiler handles all operations
1219 with NULL libfuncs by converting the SFmode. */
1220 switch (arm_fp16_format)
1222 case ARM_FP16_FORMAT_IEEE:
1223 case ARM_FP16_FORMAT_ALTERNATIVE:
1226 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1227 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1229 : "__gnu_f2h_alternative"));
1230 set_conv_libfunc (sext_optab, SFmode, HFmode,
1231 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1233 : "__gnu_h2f_alternative"));
1236 set_optab_libfunc (add_optab, HFmode, NULL);
1237 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1238 set_optab_libfunc (smul_optab, HFmode, NULL);
1239 set_optab_libfunc (neg_optab, HFmode, NULL);
1240 set_optab_libfunc (sub_optab, HFmode, NULL);
1243 set_optab_libfunc (eq_optab, HFmode, NULL);
1244 set_optab_libfunc (ne_optab, HFmode, NULL);
1245 set_optab_libfunc (lt_optab, HFmode, NULL);
1246 set_optab_libfunc (le_optab, HFmode, NULL);
1247 set_optab_libfunc (ge_optab, HFmode, NULL);
1248 set_optab_libfunc (gt_optab, HFmode, NULL);
1249 set_optab_libfunc (unord_optab, HFmode, NULL);
1256 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1258 const arm_fixed_mode_set fixed_arith_modes[] =
1279 const arm_fixed_mode_set fixed_conv_modes[] =
1309 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1311 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1312 "add", fixed_arith_modes[i].name, 3);
1313 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1314 "ssadd", fixed_arith_modes[i].name, 3);
1315 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1316 "usadd", fixed_arith_modes[i].name, 3);
1317 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1318 "sub", fixed_arith_modes[i].name, 3);
1319 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1320 "sssub", fixed_arith_modes[i].name, 3);
1321 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1322 "ussub", fixed_arith_modes[i].name, 3);
1323 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1324 "mul", fixed_arith_modes[i].name, 3);
1325 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1326 "ssmul", fixed_arith_modes[i].name, 3);
1327 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1328 "usmul", fixed_arith_modes[i].name, 3);
1329 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1330 "div", fixed_arith_modes[i].name, 3);
1331 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1332 "udiv", fixed_arith_modes[i].name, 3);
1333 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1334 "ssdiv", fixed_arith_modes[i].name, 3);
1335 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1336 "usdiv", fixed_arith_modes[i].name, 3);
1337 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1338 "neg", fixed_arith_modes[i].name, 2);
1339 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1340 "ssneg", fixed_arith_modes[i].name, 2);
1341 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1342 "usneg", fixed_arith_modes[i].name, 2);
1343 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1344 "ashl", fixed_arith_modes[i].name, 3);
1345 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1346 "ashr", fixed_arith_modes[i].name, 3);
1347 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1348 "lshr", fixed_arith_modes[i].name, 3);
1349 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1350 "ssashl", fixed_arith_modes[i].name, 3);
1351 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1352 "usashl", fixed_arith_modes[i].name, 3);
1353 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1354 "cmp", fixed_arith_modes[i].name, 2);
1357 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1358 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1361 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1362 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1365 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1366 fixed_conv_modes[j].mode, "fract",
1367 fixed_conv_modes[i].name,
1368 fixed_conv_modes[j].name);
1369 arm_set_fixed_conv_libfunc (satfract_optab,
1370 fixed_conv_modes[i].mode,
1371 fixed_conv_modes[j].mode, "satfract",
1372 fixed_conv_modes[i].name,
1373 fixed_conv_modes[j].name);
1374 arm_set_fixed_conv_libfunc (fractuns_optab,
1375 fixed_conv_modes[i].mode,
1376 fixed_conv_modes[j].mode, "fractuns",
1377 fixed_conv_modes[i].name,
1378 fixed_conv_modes[j].name);
1379 arm_set_fixed_conv_libfunc (satfractuns_optab,
1380 fixed_conv_modes[i].mode,
1381 fixed_conv_modes[j].mode, "satfractuns",
1382 fixed_conv_modes[i].name,
1383 fixed_conv_modes[j].name);
1387 if (TARGET_AAPCS_BASED)
1388 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1391 /* On AAPCS systems, this is the "struct __va_list". */
1392 static GTY(()) tree va_list_type;
1394 /* Return the type to use as __builtin_va_list. */
1396 arm_build_builtin_va_list (void)
1401 if (!TARGET_AAPCS_BASED)
1402 return std_build_builtin_va_list ();
1404 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1412 The C Library ABI further reinforces this definition in \S
1415 We must follow this definition exactly. The structure tag
1416 name is visible in C++ mangled names, and thus forms a part
1417 of the ABI. The field name may be used by people who
1418 #include <stdarg.h>. */
1419 /* Create the type. */
1420 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1421 /* Give it the required name. */
1422 va_list_name = build_decl (BUILTINS_LOCATION,
1424 get_identifier ("__va_list"),
1426 DECL_ARTIFICIAL (va_list_name) = 1;
1427 TYPE_NAME (va_list_type) = va_list_name;
1428 TYPE_STUB_DECL (va_list_type) = va_list_name;
1429 /* Create the __ap field. */
1430 ap_field = build_decl (BUILTINS_LOCATION,
1432 get_identifier ("__ap"),
1434 DECL_ARTIFICIAL (ap_field) = 1;
1435 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1436 TYPE_FIELDS (va_list_type) = ap_field;
1437 /* Compute its layout. */
1438 layout_type (va_list_type);
1440 return va_list_type;
1443 /* Return an expression of type "void *" pointing to the next
1444 available argument in a variable-argument list. VALIST is the
1445 user-level va_list object, of type __builtin_va_list. */
1447 arm_extract_valist_ptr (tree valist)
1449 if (TREE_TYPE (valist) == error_mark_node)
1450 return error_mark_node;
1452 /* On an AAPCS target, the pointer is stored within "struct
1454 if (TARGET_AAPCS_BASED)
1456 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1457 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1458 valist, ap_field, NULL_TREE);
1464 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1466 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1468 valist = arm_extract_valist_ptr (valist);
1469 std_expand_builtin_va_start (valist, nextarg);
1472 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1474 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1477 valist = arm_extract_valist_ptr (valist);
1478 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1481 /* Fix up any incompatible options that the user has specified. */
1483 arm_option_override (void)
1485 if (global_options_set.x_arm_arch_option)
1486 arm_selected_arch = &all_architectures[arm_arch_option];
1488 if (global_options_set.x_arm_cpu_option)
1489 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1491 if (global_options_set.x_arm_tune_option)
1492 arm_selected_tune = &all_cores[(int) arm_tune_option];
1494 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1495 SUBTARGET_OVERRIDE_OPTIONS;
1498 if (arm_selected_arch)
1500 if (arm_selected_cpu)
1502 /* Check for conflict between mcpu and march. */
1503 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1505 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1506 arm_selected_cpu->name, arm_selected_arch->name);
1507 /* -march wins for code generation.
1508 -mcpu wins for default tuning. */
1509 if (!arm_selected_tune)
1510 arm_selected_tune = arm_selected_cpu;
1512 arm_selected_cpu = arm_selected_arch;
1516 arm_selected_arch = NULL;
1519 /* Pick a CPU based on the architecture. */
1520 arm_selected_cpu = arm_selected_arch;
1523 /* If the user did not specify a processor, choose one for them. */
1524 if (!arm_selected_cpu)
1526 const struct processors * sel;
1527 unsigned int sought;
1529 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1530 if (!arm_selected_cpu->name)
1532 #ifdef SUBTARGET_CPU_DEFAULT
1533 /* Use the subtarget default CPU if none was specified by
1535 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1537 /* Default to ARM6. */
1538 if (!arm_selected_cpu->name)
1539 arm_selected_cpu = &all_cores[arm6];
1542 sel = arm_selected_cpu;
1543 insn_flags = sel->flags;
1545 /* Now check to see if the user has specified some command line
1546 switch that require certain abilities from the cpu. */
1549 if (TARGET_INTERWORK || TARGET_THUMB)
1551 sought |= (FL_THUMB | FL_MODE32);
1553 /* There are no ARM processors that support both APCS-26 and
1554 interworking. Therefore we force FL_MODE26 to be removed
1555 from insn_flags here (if it was set), so that the search
1556 below will always be able to find a compatible processor. */
1557 insn_flags &= ~FL_MODE26;
1560 if (sought != 0 && ((sought & insn_flags) != sought))
1562 /* Try to locate a CPU type that supports all of the abilities
1563 of the default CPU, plus the extra abilities requested by
1565 for (sel = all_cores; sel->name != NULL; sel++)
1566 if ((sel->flags & sought) == (sought | insn_flags))
1569 if (sel->name == NULL)
1571 unsigned current_bit_count = 0;
1572 const struct processors * best_fit = NULL;
1574 /* Ideally we would like to issue an error message here
1575 saying that it was not possible to find a CPU compatible
1576 with the default CPU, but which also supports the command
1577 line options specified by the programmer, and so they
1578 ought to use the -mcpu=<name> command line option to
1579 override the default CPU type.
1581 If we cannot find a cpu that has both the
1582 characteristics of the default cpu and the given
1583 command line options we scan the array again looking
1584 for a best match. */
1585 for (sel = all_cores; sel->name != NULL; sel++)
1586 if ((sel->flags & sought) == sought)
1590 count = bit_count (sel->flags & insn_flags);
1592 if (count >= current_bit_count)
1595 current_bit_count = count;
1599 gcc_assert (best_fit);
1603 arm_selected_cpu = sel;
1607 gcc_assert (arm_selected_cpu);
1608 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1609 if (!arm_selected_tune)
1610 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1612 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1613 insn_flags = arm_selected_cpu->flags;
1615 arm_tune = arm_selected_tune->core;
1616 tune_flags = arm_selected_tune->flags;
1617 current_tune = arm_selected_tune->tune;
1619 /* Make sure that the processor choice does not conflict with any of the
1620 other command line choices. */
1621 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1622 error ("target CPU does not support ARM mode");
1624 /* BPABI targets use linker tricks to allow interworking on cores
1625 without thumb support. */
1626 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1628 warning (0, "target CPU does not support interworking" );
1629 target_flags &= ~MASK_INTERWORK;
1632 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1634 warning (0, "target CPU does not support THUMB instructions");
1635 target_flags &= ~MASK_THUMB;
1638 if (TARGET_APCS_FRAME && TARGET_THUMB)
1640 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1641 target_flags &= ~MASK_APCS_FRAME;
1644 /* Callee super interworking implies thumb interworking. Adding
1645 this to the flags here simplifies the logic elsewhere. */
1646 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1647 target_flags |= MASK_INTERWORK;
1649 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1650 from here where no function is being compiled currently. */
1651 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1652 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1654 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1655 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1657 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1659 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1660 target_flags |= MASK_APCS_FRAME;
1663 if (TARGET_POKE_FUNCTION_NAME)
1664 target_flags |= MASK_APCS_FRAME;
1666 if (TARGET_APCS_REENT && flag_pic)
1667 error ("-fpic and -mapcs-reent are incompatible");
1669 if (TARGET_APCS_REENT)
1670 warning (0, "APCS reentrant code not supported. Ignored");
1672 /* If this target is normally configured to use APCS frames, warn if they
1673 are turned off and debugging is turned on. */
1675 && write_symbols != NO_DEBUG
1676 && !TARGET_APCS_FRAME
1677 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1678 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1680 if (TARGET_APCS_FLOAT)
1681 warning (0, "passing floating point arguments in fp regs not yet supported");
1683 if (TARGET_LITTLE_WORDS)
1684 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1685 "will be removed in a future release");
1687 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1688 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1689 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1690 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1691 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1692 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1693 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1694 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1695 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1696 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1697 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1698 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1699 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1700 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1702 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1703 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1704 thumb_code = TARGET_ARM == 0;
1705 thumb1_code = TARGET_THUMB1 != 0;
1706 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1707 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1708 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1709 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1710 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1711 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1713 /* If we are not using the default (ARM mode) section anchor offset
1714 ranges, then set the correct ranges now. */
1717 /* Thumb-1 LDR instructions cannot have negative offsets.
1718 Permissible positive offset ranges are 5-bit (for byte loads),
1719 6-bit (for halfword loads), or 7-bit (for word loads).
1720 Empirical results suggest a 7-bit anchor range gives the best
1721 overall code size. */
1722 targetm.min_anchor_offset = 0;
1723 targetm.max_anchor_offset = 127;
1725 else if (TARGET_THUMB2)
1727 /* The minimum is set such that the total size of the block
1728 for a particular anchor is 248 + 1 + 4095 bytes, which is
1729 divisible by eight, ensuring natural spacing of anchors. */
1730 targetm.min_anchor_offset = -248;
1731 targetm.max_anchor_offset = 4095;
1734 /* V5 code we generate is completely interworking capable, so we turn off
1735 TARGET_INTERWORK here to avoid many tests later on. */
1737 /* XXX However, we must pass the right pre-processor defines to CPP
1738 or GLD can get confused. This is a hack. */
1739 if (TARGET_INTERWORK)
1740 arm_cpp_interwork = 1;
1743 target_flags &= ~MASK_INTERWORK;
1745 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1746 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1748 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1749 error ("iwmmxt abi requires an iwmmxt capable cpu");
1751 if (!global_options_set.x_arm_fpu_index)
1753 const char *target_fpu_name;
1756 #ifdef FPUTYPE_DEFAULT
1757 target_fpu_name = FPUTYPE_DEFAULT;
1759 if (arm_arch_cirrus)
1760 target_fpu_name = "maverick";
1762 target_fpu_name = "fpe2";
1765 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1770 arm_fpu_desc = &all_fpus[arm_fpu_index];
1772 switch (arm_fpu_desc->model)
1774 case ARM_FP_MODEL_FPA:
1775 if (arm_fpu_desc->rev == 2)
1776 arm_fpu_attr = FPU_FPE2;
1777 else if (arm_fpu_desc->rev == 3)
1778 arm_fpu_attr = FPU_FPE3;
1780 arm_fpu_attr = FPU_FPA;
1783 case ARM_FP_MODEL_MAVERICK:
1784 arm_fpu_attr = FPU_MAVERICK;
1787 case ARM_FP_MODEL_VFP:
1788 arm_fpu_attr = FPU_VFP;
1795 if (TARGET_AAPCS_BASED
1796 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1797 error ("FPA is unsupported in the AAPCS");
1799 if (TARGET_AAPCS_BASED)
1801 if (TARGET_CALLER_INTERWORKING)
1802 error ("AAPCS does not support -mcaller-super-interworking");
1804 if (TARGET_CALLEE_INTERWORKING)
1805 error ("AAPCS does not support -mcallee-super-interworking");
1808 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1809 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1810 will ever exist. GCC makes no attempt to support this combination. */
1811 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1812 sorry ("iWMMXt and hardware floating point");
1814 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1815 if (TARGET_THUMB2 && TARGET_IWMMXT)
1816 sorry ("Thumb-2 iWMMXt");
1818 /* __fp16 support currently assumes the core has ldrh. */
1819 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1820 sorry ("__fp16 and no ldrh");
1822 /* If soft-float is specified then don't use FPU. */
1823 if (TARGET_SOFT_FLOAT)
1824 arm_fpu_attr = FPU_NONE;
1826 if (TARGET_AAPCS_BASED)
1828 if (arm_abi == ARM_ABI_IWMMXT)
1829 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1830 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1831 && TARGET_HARD_FLOAT
1833 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1835 arm_pcs_default = ARM_PCS_AAPCS;
1839 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1840 sorry ("-mfloat-abi=hard and VFP");
1842 if (arm_abi == ARM_ABI_APCS)
1843 arm_pcs_default = ARM_PCS_APCS;
1845 arm_pcs_default = ARM_PCS_ATPCS;
1848 /* For arm2/3 there is no need to do any scheduling if there is only
1849 a floating point emulator, or we are doing software floating-point. */
1850 if ((TARGET_SOFT_FLOAT
1851 || (TARGET_FPA && arm_fpu_desc->rev))
1852 && (tune_flags & FL_MODE32) == 0)
1853 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1855 /* Use the cp15 method if it is available. */
1856 if (target_thread_pointer == TP_AUTO)
1858 if (arm_arch6k && !TARGET_THUMB1)
1859 target_thread_pointer = TP_CP15;
1861 target_thread_pointer = TP_SOFT;
1864 if (TARGET_HARD_TP && TARGET_THUMB1)
1865 error ("can not use -mtp=cp15 with 16-bit Thumb");
1867 /* Override the default structure alignment for AAPCS ABI. */
1868 if (!global_options_set.x_arm_structure_size_boundary)
1870 if (TARGET_AAPCS_BASED)
1871 arm_structure_size_boundary = 8;
1875 if (arm_structure_size_boundary != 8
1876 && arm_structure_size_boundary != 32
1877 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1879 if (ARM_DOUBLEWORD_ALIGN)
1881 "structure size boundary can only be set to 8, 32 or 64");
1883 warning (0, "structure size boundary can only be set to 8 or 32");
1884 arm_structure_size_boundary
1885 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1889 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1891 error ("RTP PIC is incompatible with Thumb");
1895 /* If stack checking is disabled, we can use r10 as the PIC register,
1896 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1897 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1899 if (TARGET_VXWORKS_RTP)
1900 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1901 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1904 if (flag_pic && TARGET_VXWORKS_RTP)
1905 arm_pic_register = 9;
1907 if (arm_pic_register_string != NULL)
1909 int pic_register = decode_reg_name (arm_pic_register_string);
1912 warning (0, "-mpic-register= is useless without -fpic");
1914 /* Prevent the user from choosing an obviously stupid PIC register. */
1915 else if (pic_register < 0 || call_used_regs[pic_register]
1916 || pic_register == HARD_FRAME_POINTER_REGNUM
1917 || pic_register == STACK_POINTER_REGNUM
1918 || pic_register >= PC_REGNUM
1919 || (TARGET_VXWORKS_RTP
1920 && (unsigned int) pic_register != arm_pic_register))
1921 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1923 arm_pic_register = pic_register;
1926 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1927 if (fix_cm3_ldrd == 2)
1929 if (arm_selected_cpu->core == cortexm3)
1935 /* Enable -munaligned-access by default for
1936 - all ARMv6 architecture-based processors
1937 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1939 Disable -munaligned-access by default for
1940 - all pre-ARMv6 architecture-based processors
1941 - ARMv6-M architecture-based processors. */
1943 if (unaligned_access == 2)
1945 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1946 unaligned_access = 1;
1948 unaligned_access = 0;
1950 else if (unaligned_access == 1
1951 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1953 warning (0, "target CPU does not support unaligned accesses");
1954 unaligned_access = 0;
1957 if (TARGET_THUMB1 && flag_schedule_insns)
1959 /* Don't warn since it's on by default in -O2. */
1960 flag_schedule_insns = 0;
1965 /* If optimizing for size, bump the number of instructions that we
1966 are prepared to conditionally execute (even on a StrongARM). */
1967 max_insns_skipped = 6;
1970 max_insns_skipped = current_tune->max_insns_skipped;
1972 /* Hot/Cold partitioning is not currently supported, since we can't
1973 handle literal pool placement in that case. */
1974 if (flag_reorder_blocks_and_partition)
1976 inform (input_location,
1977 "-freorder-blocks-and-partition not supported on this architecture");
1978 flag_reorder_blocks_and_partition = 0;
1979 flag_reorder_blocks = 1;
1983 /* Hoisting PIC address calculations more aggressively provides a small,
1984 but measurable, size reduction for PIC code. Therefore, we decrease
1985 the bar for unrestricted expression hoisting to the cost of PIC address
1986 calculation, which is 2 instructions. */
1987 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1988 global_options.x_param_values,
1989 global_options_set.x_param_values);
1991 /* ARM EABI defaults to strict volatile bitfields. */
1992 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
1993 && abi_version_at_least(2))
1994 flag_strict_volatile_bitfields = 1;
1996 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1997 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1998 if (flag_prefetch_loop_arrays < 0
2001 && current_tune->num_prefetch_slots > 0)
2002 flag_prefetch_loop_arrays = 1;
2004 /* Set up parameters to be used in prefetching algorithm. Do not override the
2005 defaults unless we are tuning for a core we have researched values for. */
2006 if (current_tune->num_prefetch_slots > 0)
2007 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2008 current_tune->num_prefetch_slots,
2009 global_options.x_param_values,
2010 global_options_set.x_param_values);
2011 if (current_tune->l1_cache_line_size >= 0)
2012 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2013 current_tune->l1_cache_line_size,
2014 global_options.x_param_values,
2015 global_options_set.x_param_values);
2016 if (current_tune->l1_cache_size >= 0)
2017 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2018 current_tune->l1_cache_size,
2019 global_options.x_param_values,
2020 global_options_set.x_param_values);
2022 /* Register global variables with the garbage collector. */
2023 arm_add_gc_roots ();
2027 arm_add_gc_roots (void)
2029 gcc_obstack_init(&minipool_obstack);
2030 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2033 /* A table of known ARM exception types.
2034 For use with the interrupt function attribute. */
2038 const char *const arg;
2039 const unsigned long return_value;
2043 static const isr_attribute_arg isr_attribute_args [] =
2045 { "IRQ", ARM_FT_ISR },
2046 { "irq", ARM_FT_ISR },
2047 { "FIQ", ARM_FT_FIQ },
2048 { "fiq", ARM_FT_FIQ },
2049 { "ABORT", ARM_FT_ISR },
2050 { "abort", ARM_FT_ISR },
2051 { "ABORT", ARM_FT_ISR },
2052 { "abort", ARM_FT_ISR },
2053 { "UNDEF", ARM_FT_EXCEPTION },
2054 { "undef", ARM_FT_EXCEPTION },
2055 { "SWI", ARM_FT_EXCEPTION },
2056 { "swi", ARM_FT_EXCEPTION },
2057 { NULL, ARM_FT_NORMAL }
2060 /* Returns the (interrupt) function type of the current
2061 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2063 static unsigned long
2064 arm_isr_value (tree argument)
2066 const isr_attribute_arg * ptr;
2070 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2072 /* No argument - default to IRQ. */
2073 if (argument == NULL_TREE)
2076 /* Get the value of the argument. */
2077 if (TREE_VALUE (argument) == NULL_TREE
2078 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2079 return ARM_FT_UNKNOWN;
2081 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2083 /* Check it against the list of known arguments. */
2084 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2085 if (streq (arg, ptr->arg))
2086 return ptr->return_value;
2088 /* An unrecognized interrupt type. */
2089 return ARM_FT_UNKNOWN;
2092 /* Computes the type of the current function. */
2094 static unsigned long
2095 arm_compute_func_type (void)
2097 unsigned long type = ARM_FT_UNKNOWN;
2101 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2103 /* Decide if the current function is volatile. Such functions
2104 never return, and many memory cycles can be saved by not storing
2105 register values that will never be needed again. This optimization
2106 was added to speed up context switching in a kernel application. */
2108 && (TREE_NOTHROW (current_function_decl)
2109 || !(flag_unwind_tables
2111 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2112 && TREE_THIS_VOLATILE (current_function_decl))
2113 type |= ARM_FT_VOLATILE;
2115 if (cfun->static_chain_decl != NULL)
2116 type |= ARM_FT_NESTED;
2118 attr = DECL_ATTRIBUTES (current_function_decl);
2120 a = lookup_attribute ("naked", attr);
2122 type |= ARM_FT_NAKED;
2124 a = lookup_attribute ("isr", attr);
2126 a = lookup_attribute ("interrupt", attr);
2129 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2131 type |= arm_isr_value (TREE_VALUE (a));
2136 /* Returns the type of the current function. */
2139 arm_current_func_type (void)
2141 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2142 cfun->machine->func_type = arm_compute_func_type ();
2144 return cfun->machine->func_type;
2148 arm_allocate_stack_slots_for_args (void)
2150 /* Naked functions should not allocate stack slots for arguments. */
2151 return !IS_NAKED (arm_current_func_type ());
2155 /* Output assembler code for a block containing the constant parts
2156 of a trampoline, leaving space for the variable parts.
2158 On the ARM, (if r8 is the static chain regnum, and remembering that
2159 referencing pc adds an offset of 8) the trampoline looks like:
2162 .word static chain value
2163 .word function's address
2164 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2167 arm_asm_trampoline_template (FILE *f)
2171 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2172 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2174 else if (TARGET_THUMB2)
2176 /* The Thumb-2 trampoline is similar to the arm implementation.
2177 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2178 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2179 STATIC_CHAIN_REGNUM, PC_REGNUM);
2180 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2184 ASM_OUTPUT_ALIGN (f, 2);
2185 fprintf (f, "\t.code\t16\n");
2186 fprintf (f, ".Ltrampoline_start:\n");
2187 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2188 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2189 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2190 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2191 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2192 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2194 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2195 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2198 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2201 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2203 rtx fnaddr, mem, a_tramp;
2205 emit_block_move (m_tramp, assemble_trampoline_template (),
2206 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2208 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2209 emit_move_insn (mem, chain_value);
2211 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2212 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2213 emit_move_insn (mem, fnaddr);
2215 a_tramp = XEXP (m_tramp, 0);
2216 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2217 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2218 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2221 /* Thumb trampolines should be entered in thumb mode, so set
2222 the bottom bit of the address. */
2225 arm_trampoline_adjust_address (rtx addr)
2228 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2229 NULL, 0, OPTAB_LIB_WIDEN);
2233 /* Return 1 if it is possible to return using a single instruction.
2234 If SIBLING is non-null, this is a test for a return before a sibling
2235 call. SIBLING is the call insn, so we can examine its register usage. */
2238 use_return_insn (int iscond, rtx sibling)
2241 unsigned int func_type;
2242 unsigned long saved_int_regs;
2243 unsigned HOST_WIDE_INT stack_adjust;
2244 arm_stack_offsets *offsets;
2246 /* Never use a return instruction before reload has run. */
2247 if (!reload_completed)
2250 func_type = arm_current_func_type ();
2252 /* Naked, volatile and stack alignment functions need special
2254 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2257 /* So do interrupt functions that use the frame pointer and Thumb
2258 interrupt functions. */
2259 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2262 offsets = arm_get_frame_offsets ();
2263 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2265 /* As do variadic functions. */
2266 if (crtl->args.pretend_args_size
2267 || cfun->machine->uses_anonymous_args
2268 /* Or if the function calls __builtin_eh_return () */
2269 || crtl->calls_eh_return
2270 /* Or if the function calls alloca */
2271 || cfun->calls_alloca
2272 /* Or if there is a stack adjustment. However, if the stack pointer
2273 is saved on the stack, we can use a pre-incrementing stack load. */
2274 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2275 && stack_adjust == 4)))
2278 saved_int_regs = offsets->saved_regs_mask;
2280 /* Unfortunately, the insn
2282 ldmib sp, {..., sp, ...}
2284 triggers a bug on most SA-110 based devices, such that the stack
2285 pointer won't be correctly restored if the instruction takes a
2286 page fault. We work around this problem by popping r3 along with
2287 the other registers, since that is never slower than executing
2288 another instruction.
2290 We test for !arm_arch5 here, because code for any architecture
2291 less than this could potentially be run on one of the buggy
2293 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2295 /* Validate that r3 is a call-clobbered register (always true in
2296 the default abi) ... */
2297 if (!call_used_regs[3])
2300 /* ... that it isn't being used for a return value ... */
2301 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2304 /* ... or for a tail-call argument ... */
2307 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2309 if (find_regno_fusage (sibling, USE, 3))
2313 /* ... and that there are no call-saved registers in r0-r2
2314 (always true in the default ABI). */
2315 if (saved_int_regs & 0x7)
2319 /* Can't be done if interworking with Thumb, and any registers have been
2321 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2324 /* On StrongARM, conditional returns are expensive if they aren't
2325 taken and multiple registers have been stacked. */
2326 if (iscond && arm_tune_strongarm)
2328 /* Conditional return when just the LR is stored is a simple
2329 conditional-load instruction, that's not expensive. */
2330 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2334 && arm_pic_register != INVALID_REGNUM
2335 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2339 /* If there are saved registers but the LR isn't saved, then we need
2340 two instructions for the return. */
2341 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2344 /* Can't be done if any of the FPA regs are pushed,
2345 since this also requires an insn. */
2346 if (TARGET_HARD_FLOAT && TARGET_FPA)
2347 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2348 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2351 /* Likewise VFP regs. */
2352 if (TARGET_HARD_FLOAT && TARGET_VFP)
2353 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2354 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2357 if (TARGET_REALLY_IWMMXT)
2358 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2359 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2365 /* Return TRUE if int I is a valid immediate ARM constant. */
2368 const_ok_for_arm (HOST_WIDE_INT i)
2372 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2373 be all zero, or all one. */
2374 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2375 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2376 != ((~(unsigned HOST_WIDE_INT) 0)
2377 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2380 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2382 /* Fast return for 0 and small values. We must do this for zero, since
2383 the code below can't handle that one case. */
2384 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2387 /* Get the number of trailing zeros. */
2388 lowbit = ffs((int) i) - 1;
2390 /* Only even shifts are allowed in ARM mode so round down to the
2391 nearest even number. */
2395 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2400 /* Allow rotated constants in ARM mode. */
2402 && ((i & ~0xc000003f) == 0
2403 || (i & ~0xf000000f) == 0
2404 || (i & ~0xfc000003) == 0))
2411 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2414 if (i == v || i == (v | (v << 8)))
2417 /* Allow repeated pattern 0xXY00XY00. */
2427 /* Return true if I is a valid constant for the operation CODE. */
2429 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2431 if (const_ok_for_arm (i))
2437 /* See if we can use movw. */
2438 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2441 /* Otherwise, try mvn. */
2442 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2445 /* See if we can use addw or subw. */
2447 && ((i & 0xfffff000) == 0
2448 || ((-i) & 0xfffff000) == 0))
2450 /* else fall through. */
2470 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2472 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2478 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2482 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2489 /* Emit a sequence of insns to handle a large constant.
2490 CODE is the code of the operation required, it can be any of SET, PLUS,
2491 IOR, AND, XOR, MINUS;
2492 MODE is the mode in which the operation is being performed;
2493 VAL is the integer to operate on;
2494 SOURCE is the other operand (a register, or a null-pointer for SET);
2495 SUBTARGETS means it is safe to create scratch registers if that will
2496 either produce a simpler sequence, or we will want to cse the values.
2497 Return value is the number of insns emitted. */
2499 /* ??? Tweak this for thumb2. */
2501 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2502 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2506 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2507 cond = COND_EXEC_TEST (PATTERN (insn));
2511 if (subtargets || code == SET
2512 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2513 && REGNO (target) != REGNO (source)))
2515 /* After arm_reorg has been called, we can't fix up expensive
2516 constants by pushing them into memory so we must synthesize
2517 them in-line, regardless of the cost. This is only likely to
2518 be more costly on chips that have load delay slots and we are
2519 compiling without running the scheduler (so no splitting
2520 occurred before the final instruction emission).
2522 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2524 if (!after_arm_reorg
2526 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2528 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2533 /* Currently SET is the only monadic value for CODE, all
2534 the rest are diadic. */
2535 if (TARGET_USE_MOVT)
2536 arm_emit_movpair (target, GEN_INT (val));
2538 emit_set_insn (target, GEN_INT (val));
2544 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2546 if (TARGET_USE_MOVT)
2547 arm_emit_movpair (temp, GEN_INT (val));
2549 emit_set_insn (temp, GEN_INT (val));
2551 /* For MINUS, the value is subtracted from, since we never
2552 have subtraction of a constant. */
2554 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2556 emit_set_insn (target,
2557 gen_rtx_fmt_ee (code, mode, source, temp));
2563 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2567 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2568 ARM/THUMB2 immediates, and add up to VAL.
2569 Thr function return value gives the number of insns required. */
2571 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2572 struct four_ints *return_sequence)
2574 int best_consecutive_zeros = 0;
2578 struct four_ints tmp_sequence;
2580 /* If we aren't targetting ARM, the best place to start is always at
2581 the bottom, otherwise look more closely. */
2584 for (i = 0; i < 32; i += 2)
2586 int consecutive_zeros = 0;
2588 if (!(val & (3 << i)))
2590 while ((i < 32) && !(val & (3 << i)))
2592 consecutive_zeros += 2;
2595 if (consecutive_zeros > best_consecutive_zeros)
2597 best_consecutive_zeros = consecutive_zeros;
2598 best_start = i - consecutive_zeros;
2605 /* So long as it won't require any more insns to do so, it's
2606 desirable to emit a small constant (in bits 0...9) in the last
2607 insn. This way there is more chance that it can be combined with
2608 a later addressing insn to form a pre-indexed load or store
2609 operation. Consider:
2611 *((volatile int *)0xe0000100) = 1;
2612 *((volatile int *)0xe0000110) = 2;
2614 We want this to wind up as:
2618 str rB, [rA, #0x100]
2620 str rB, [rA, #0x110]
2622 rather than having to synthesize both large constants from scratch.
2624 Therefore, we calculate how many insns would be required to emit
2625 the constant starting from `best_start', and also starting from
2626 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2627 yield a shorter sequence, we may as well use zero. */
2628 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2630 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2632 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2633 if (insns2 <= insns1)
2635 *return_sequence = tmp_sequence;
2643 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2645 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2646 struct four_ints *return_sequence, int i)
2648 int remainder = val & 0xffffffff;
2651 /* Try and find a way of doing the job in either two or three
2654 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2655 location. We start at position I. This may be the MSB, or
2656 optimial_immediate_sequence may have positioned it at the largest block
2657 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2658 wrapping around to the top of the word when we drop off the bottom.
2659 In the worst case this code should produce no more than four insns.
2661 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2662 constants, shifted to any arbitrary location. We should always start
2667 unsigned int b1, b2, b3, b4;
2668 unsigned HOST_WIDE_INT result;
2671 gcc_assert (insns < 4);
2676 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2677 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2680 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2681 /* We can use addw/subw for the last 12 bits. */
2685 /* Use an 8-bit shifted/rotated immediate. */
2689 result = remainder & ((0x0ff << end)
2690 | ((i < end) ? (0xff >> (32 - end))
2697 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2698 arbitrary shifts. */
2699 i -= TARGET_ARM ? 2 : 1;
2703 /* Next, see if we can do a better job with a thumb2 replicated
2706 We do it this way around to catch the cases like 0x01F001E0 where
2707 two 8-bit immediates would work, but a replicated constant would
2710 TODO: 16-bit constants that don't clear all the bits, but still win.
2711 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2714 b1 = (remainder & 0xff000000) >> 24;
2715 b2 = (remainder & 0x00ff0000) >> 16;
2716 b3 = (remainder & 0x0000ff00) >> 8;
2717 b4 = remainder & 0xff;
2721 /* The 8-bit immediate already found clears b1 (and maybe b2),
2722 but must leave b3 and b4 alone. */
2724 /* First try to find a 32-bit replicated constant that clears
2725 almost everything. We can assume that we can't do it in one,
2726 or else we wouldn't be here. */
2727 unsigned int tmp = b1 & b2 & b3 & b4;
2728 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2730 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2731 + (tmp == b3) + (tmp == b4);
2733 && (matching_bytes >= 3
2734 || (matching_bytes == 2
2735 && const_ok_for_op (remainder & ~tmp2, code))))
2737 /* At least 3 of the bytes match, and the fourth has at
2738 least as many bits set, or two of the bytes match
2739 and it will only require one more insn to finish. */
2747 /* Second, try to find a 16-bit replicated constant that can
2748 leave three of the bytes clear. If b2 or b4 is already
2749 zero, then we can. If the 8-bit from above would not
2750 clear b2 anyway, then we still win. */
2751 else if (b1 == b3 && (!b2 || !b4
2752 || (remainder & 0x00ff0000 & ~result)))
2754 result = remainder & 0xff00ff00;
2760 /* The 8-bit immediate already found clears b2 (and maybe b3)
2761 and we don't get here unless b1 is alredy clear, but it will
2762 leave b4 unchanged. */
2764 /* If we can clear b2 and b4 at once, then we win, since the
2765 8-bits couldn't possibly reach that far. */
2768 result = remainder & 0x00ff00ff;
2774 return_sequence->i[insns++] = result;
2775 remainder &= ~result;
2777 if (code == SET || code == MINUS)
2785 /* Emit an instruction with the indicated PATTERN. If COND is
2786 non-NULL, conditionalize the execution of the instruction on COND
2790 emit_constant_insn (rtx cond, rtx pattern)
2793 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2794 emit_insn (pattern);
2797 /* As above, but extra parameter GENERATE which, if clear, suppresses
2801 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2802 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2807 int final_invert = 0;
2809 int set_sign_bit_copies = 0;
2810 int clear_sign_bit_copies = 0;
2811 int clear_zero_bit_copies = 0;
2812 int set_zero_bit_copies = 0;
2813 int insns = 0, neg_insns, inv_insns;
2814 unsigned HOST_WIDE_INT temp1, temp2;
2815 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2816 struct four_ints *immediates;
2817 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2819 /* Find out which operations are safe for a given CODE. Also do a quick
2820 check for degenerate cases; these can occur when DImode operations
2833 if (remainder == 0xffffffff)
2836 emit_constant_insn (cond,
2837 gen_rtx_SET (VOIDmode, target,
2838 GEN_INT (ARM_SIGN_EXTEND (val))));
2844 if (reload_completed && rtx_equal_p (target, source))
2848 emit_constant_insn (cond,
2849 gen_rtx_SET (VOIDmode, target, source));
2858 emit_constant_insn (cond,
2859 gen_rtx_SET (VOIDmode, target, const0_rtx));
2862 if (remainder == 0xffffffff)
2864 if (reload_completed && rtx_equal_p (target, source))
2867 emit_constant_insn (cond,
2868 gen_rtx_SET (VOIDmode, target, source));
2877 if (reload_completed && rtx_equal_p (target, source))
2880 emit_constant_insn (cond,
2881 gen_rtx_SET (VOIDmode, target, source));
2885 if (remainder == 0xffffffff)
2888 emit_constant_insn (cond,
2889 gen_rtx_SET (VOIDmode, target,
2890 gen_rtx_NOT (mode, source)));
2897 /* We treat MINUS as (val - source), since (source - val) is always
2898 passed as (source + (-val)). */
2902 emit_constant_insn (cond,
2903 gen_rtx_SET (VOIDmode, target,
2904 gen_rtx_NEG (mode, source)));
2907 if (const_ok_for_arm (val))
2910 emit_constant_insn (cond,
2911 gen_rtx_SET (VOIDmode, target,
2912 gen_rtx_MINUS (mode, GEN_INT (val),
2923 /* If we can do it in one insn get out quickly. */
2924 if (const_ok_for_op (val, code))
2927 emit_constant_insn (cond,
2928 gen_rtx_SET (VOIDmode, target,
2930 ? gen_rtx_fmt_ee (code, mode, source,
2936 /* Calculate a few attributes that may be useful for specific
2938 /* Count number of leading zeros. */
2939 for (i = 31; i >= 0; i--)
2941 if ((remainder & (1 << i)) == 0)
2942 clear_sign_bit_copies++;
2947 /* Count number of leading 1's. */
2948 for (i = 31; i >= 0; i--)
2950 if ((remainder & (1 << i)) != 0)
2951 set_sign_bit_copies++;
2956 /* Count number of trailing zero's. */
2957 for (i = 0; i <= 31; i++)
2959 if ((remainder & (1 << i)) == 0)
2960 clear_zero_bit_copies++;
2965 /* Count number of trailing 1's. */
2966 for (i = 0; i <= 31; i++)
2968 if ((remainder & (1 << i)) != 0)
2969 set_zero_bit_copies++;
2977 /* See if we can do this by sign_extending a constant that is known
2978 to be negative. This is a good, way of doing it, since the shift
2979 may well merge into a subsequent insn. */
2980 if (set_sign_bit_copies > 1)
2982 if (const_ok_for_arm
2983 (temp1 = ARM_SIGN_EXTEND (remainder
2984 << (set_sign_bit_copies - 1))))
2988 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2989 emit_constant_insn (cond,
2990 gen_rtx_SET (VOIDmode, new_src,
2992 emit_constant_insn (cond,
2993 gen_ashrsi3 (target, new_src,
2994 GEN_INT (set_sign_bit_copies - 1)));
2998 /* For an inverted constant, we will need to set the low bits,
2999 these will be shifted out of harm's way. */
3000 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3001 if (const_ok_for_arm (~temp1))
3005 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3006 emit_constant_insn (cond,
3007 gen_rtx_SET (VOIDmode, new_src,
3009 emit_constant_insn (cond,
3010 gen_ashrsi3 (target, new_src,
3011 GEN_INT (set_sign_bit_copies - 1)));
3017 /* See if we can calculate the value as the difference between two
3018 valid immediates. */
3019 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3021 int topshift = clear_sign_bit_copies & ~1;
3023 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3024 & (0xff000000 >> topshift));
3026 /* If temp1 is zero, then that means the 9 most significant
3027 bits of remainder were 1 and we've caused it to overflow.
3028 When topshift is 0 we don't need to do anything since we
3029 can borrow from 'bit 32'. */
3030 if (temp1 == 0 && topshift != 0)
3031 temp1 = 0x80000000 >> (topshift - 1);
3033 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3035 if (const_ok_for_arm (temp2))
3039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3040 emit_constant_insn (cond,
3041 gen_rtx_SET (VOIDmode, new_src,
3043 emit_constant_insn (cond,
3044 gen_addsi3 (target, new_src,
3052 /* See if we can generate this by setting the bottom (or the top)
3053 16 bits, and then shifting these into the other half of the
3054 word. We only look for the simplest cases, to do more would cost
3055 too much. Be careful, however, not to generate this when the
3056 alternative would take fewer insns. */
3057 if (val & 0xffff0000)
3059 temp1 = remainder & 0xffff0000;
3060 temp2 = remainder & 0x0000ffff;
3062 /* Overlaps outside this range are best done using other methods. */
3063 for (i = 9; i < 24; i++)
3065 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3066 && !const_ok_for_arm (temp2))
3068 rtx new_src = (subtargets
3069 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3071 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3072 source, subtargets, generate);
3080 gen_rtx_ASHIFT (mode, source,
3087 /* Don't duplicate cases already considered. */
3088 for (i = 17; i < 24; i++)
3090 if (((temp1 | (temp1 >> i)) == remainder)
3091 && !const_ok_for_arm (temp1))
3093 rtx new_src = (subtargets
3094 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3096 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3097 source, subtargets, generate);
3102 gen_rtx_SET (VOIDmode, target,
3105 gen_rtx_LSHIFTRT (mode, source,
3116 /* If we have IOR or XOR, and the constant can be loaded in a
3117 single instruction, and we can find a temporary to put it in,
3118 then this can be done in two instructions instead of 3-4. */
3120 /* TARGET can't be NULL if SUBTARGETS is 0 */
3121 || (reload_completed && !reg_mentioned_p (target, source)))
3123 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3127 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3129 emit_constant_insn (cond,
3130 gen_rtx_SET (VOIDmode, sub,
3132 emit_constant_insn (cond,
3133 gen_rtx_SET (VOIDmode, target,
3134 gen_rtx_fmt_ee (code, mode,
3145 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3146 and the remainder 0s for e.g. 0xfff00000)
3147 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3149 This can be done in 2 instructions by using shifts with mov or mvn.
3154 mvn r0, r0, lsr #12 */
3155 if (set_sign_bit_copies > 8
3156 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3160 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3161 rtx shift = GEN_INT (set_sign_bit_copies);
3165 gen_rtx_SET (VOIDmode, sub,
3167 gen_rtx_ASHIFT (mode,
3172 gen_rtx_SET (VOIDmode, target,
3174 gen_rtx_LSHIFTRT (mode, sub,
3181 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3183 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3185 For eg. r0 = r0 | 0xfff
3190 if (set_zero_bit_copies > 8
3191 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3195 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3196 rtx shift = GEN_INT (set_zero_bit_copies);
3200 gen_rtx_SET (VOIDmode, sub,
3202 gen_rtx_LSHIFTRT (mode,
3207 gen_rtx_SET (VOIDmode, target,
3209 gen_rtx_ASHIFT (mode, sub,
3215 /* This will never be reached for Thumb2 because orn is a valid
3216 instruction. This is for Thumb1 and the ARM 32 bit cases.
3218 x = y | constant (such that ~constant is a valid constant)
3220 x = ~(~y & ~constant).
3222 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3226 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3227 emit_constant_insn (cond,
3228 gen_rtx_SET (VOIDmode, sub,
3229 gen_rtx_NOT (mode, source)));
3232 sub = gen_reg_rtx (mode);
3233 emit_constant_insn (cond,
3234 gen_rtx_SET (VOIDmode, sub,
3235 gen_rtx_AND (mode, source,
3237 emit_constant_insn (cond,
3238 gen_rtx_SET (VOIDmode, target,
3239 gen_rtx_NOT (mode, sub)));
3246 /* See if two shifts will do 2 or more insn's worth of work. */
3247 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3249 HOST_WIDE_INT shift_mask = ((0xffffffff
3250 << (32 - clear_sign_bit_copies))
3253 if ((remainder | shift_mask) != 0xffffffff)
3257 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3258 insns = arm_gen_constant (AND, mode, cond,
3259 remainder | shift_mask,
3260 new_src, source, subtargets, 1);
3265 rtx targ = subtargets ? NULL_RTX : target;
3266 insns = arm_gen_constant (AND, mode, cond,
3267 remainder | shift_mask,
3268 targ, source, subtargets, 0);
3274 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3275 rtx shift = GEN_INT (clear_sign_bit_copies);
3277 emit_insn (gen_ashlsi3 (new_src, source, shift));
3278 emit_insn (gen_lshrsi3 (target, new_src, shift));
3284 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3286 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3288 if ((remainder | shift_mask) != 0xffffffff)
3292 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3294 insns = arm_gen_constant (AND, mode, cond,
3295 remainder | shift_mask,
3296 new_src, source, subtargets, 1);
3301 rtx targ = subtargets ? NULL_RTX : target;
3303 insns = arm_gen_constant (AND, mode, cond,
3304 remainder | shift_mask,
3305 targ, source, subtargets, 0);
3311 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3312 rtx shift = GEN_INT (clear_zero_bit_copies);
3314 emit_insn (gen_lshrsi3 (new_src, source, shift));
3315 emit_insn (gen_ashlsi3 (target, new_src, shift));
3327 /* Calculate what the instruction sequences would be if we generated it
3328 normally, negated, or inverted. */
3330 /* AND cannot be split into multiple insns, so invert and use BIC. */
3333 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3336 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3341 if (can_invert || final_invert)
3342 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3347 immediates = &pos_immediates;
3349 /* Is the negated immediate sequence more efficient? */
3350 if (neg_insns < insns && neg_insns <= inv_insns)
3353 immediates = &neg_immediates;
3358 /* Is the inverted immediate sequence more efficient?
3359 We must allow for an extra NOT instruction for XOR operations, although
3360 there is some chance that the final 'mvn' will get optimized later. */
3361 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3364 immediates = &inv_immediates;
3372 /* Now output the chosen sequence as instructions. */
3375 for (i = 0; i < insns; i++)
3377 rtx new_src, temp1_rtx;
3379 temp1 = immediates->i[i];
3381 if (code == SET || code == MINUS)
3382 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3383 else if ((final_invert || i < (insns - 1)) && subtargets)
3384 new_src = gen_reg_rtx (mode);
3390 else if (can_negate)
3393 temp1 = trunc_int_for_mode (temp1, mode);
3394 temp1_rtx = GEN_INT (temp1);
3398 else if (code == MINUS)
3399 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3401 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3403 emit_constant_insn (cond,
3404 gen_rtx_SET (VOIDmode, new_src,
3410 can_negate = can_invert;
3414 else if (code == MINUS)
3422 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3423 gen_rtx_NOT (mode, source)));
3430 /* Canonicalize a comparison so that we are more likely to recognize it.
3431 This can be done for a few constant compares, where we can make the
3432 immediate value easier to load. */
3435 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3437 enum machine_mode mode;
3438 unsigned HOST_WIDE_INT i, maxval;
3440 mode = GET_MODE (*op0);
3441 if (mode == VOIDmode)
3442 mode = GET_MODE (*op1);
3444 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3446 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3447 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3448 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3449 for GTU/LEU in Thumb mode. */
3454 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3456 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3459 if (code == GT || code == LE
3460 || (!TARGET_ARM && (code == GTU || code == LEU)))
3462 /* Missing comparison. First try to use an available
3464 if (GET_CODE (*op1) == CONST_INT)
3472 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3474 *op1 = GEN_INT (i + 1);
3475 return code == GT ? GE : LT;
3480 if (i != ~((unsigned HOST_WIDE_INT) 0)
3481 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3483 *op1 = GEN_INT (i + 1);
3484 return code == GTU ? GEU : LTU;
3492 /* If that did not work, reverse the condition. */
3496 return swap_condition (code);
3502 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3503 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3504 to facilitate possible combining with a cmp into 'ands'. */
3506 && GET_CODE (*op0) == ZERO_EXTEND
3507 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3508 && GET_MODE (XEXP (*op0, 0)) == QImode
3509 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3510 && subreg_lowpart_p (XEXP (*op0, 0))
3511 && *op1 == const0_rtx)
3512 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3515 /* Comparisons smaller than DImode. Only adjust comparisons against
3516 an out-of-range constant. */
3517 if (GET_CODE (*op1) != CONST_INT
3518 || const_ok_for_arm (INTVAL (*op1))
3519 || const_ok_for_arm (- INTVAL (*op1)))
3533 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3535 *op1 = GEN_INT (i + 1);
3536 return code == GT ? GE : LT;
3543 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3545 *op1 = GEN_INT (i - 1);
3546 return code == GE ? GT : LE;
3552 if (i != ~((unsigned HOST_WIDE_INT) 0)
3553 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3555 *op1 = GEN_INT (i + 1);
3556 return code == GTU ? GEU : LTU;
3563 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3565 *op1 = GEN_INT (i - 1);
3566 return code == GEU ? GTU : LEU;
3578 /* Define how to find the value returned by a function. */
3581 arm_function_value(const_tree type, const_tree func,
3582 bool outgoing ATTRIBUTE_UNUSED)
3584 enum machine_mode mode;
3585 int unsignedp ATTRIBUTE_UNUSED;
3586 rtx r ATTRIBUTE_UNUSED;
3588 mode = TYPE_MODE (type);
3590 if (TARGET_AAPCS_BASED)
3591 return aapcs_allocate_return_reg (mode, type, func);
3593 /* Promote integer types. */
3594 if (INTEGRAL_TYPE_P (type))
3595 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3597 /* Promotes small structs returned in a register to full-word size
3598 for big-endian AAPCS. */
3599 if (arm_return_in_msb (type))
3601 HOST_WIDE_INT size = int_size_in_bytes (type);
3602 if (size % UNITS_PER_WORD != 0)
3604 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3605 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3609 return arm_libcall_value_1 (mode);
3613 libcall_eq (const void *p1, const void *p2)
3615 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3619 libcall_hash (const void *p1)
3621 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3625 add_libcall (htab_t htab, rtx libcall)
3627 *htab_find_slot (htab, libcall, INSERT) = libcall;
3631 arm_libcall_uses_aapcs_base (const_rtx libcall)
3633 static bool init_done = false;
3634 static htab_t libcall_htab;
3640 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3642 add_libcall (libcall_htab,
3643 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3644 add_libcall (libcall_htab,
3645 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3651 add_libcall (libcall_htab,
3652 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3653 add_libcall (libcall_htab,
3654 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3655 add_libcall (libcall_htab,
3656 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3657 add_libcall (libcall_htab,
3658 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3660 add_libcall (libcall_htab,
3661 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3668 add_libcall (libcall_htab,
3669 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3670 add_libcall (libcall_htab,
3671 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3673 /* Values from double-precision helper functions are returned in core
3674 registers if the selected core only supports single-precision
3675 arithmetic, even if we are using the hard-float ABI. The same is
3676 true for single-precision helpers, but we will never be using the
3677 hard-float ABI on a CPU which doesn't support single-precision
3678 operations in hardware. */
3679 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3680 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3681 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3682 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3683 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3684 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3685 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3686 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3687 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3688 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3689 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3690 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3692 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3696 return libcall && htab_find (libcall_htab, libcall) != NULL;
3700 arm_libcall_value_1 (enum machine_mode mode)
3702 if (TARGET_AAPCS_BASED)
3703 return aapcs_libcall_value (mode);
3704 else if (TARGET_32BIT
3705 && TARGET_HARD_FLOAT_ABI
3707 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3708 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3709 else if (TARGET_32BIT
3710 && TARGET_HARD_FLOAT_ABI
3712 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3713 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3714 else if (TARGET_IWMMXT_ABI
3715 && arm_vector_mode_supported_p (mode))
3716 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3718 return gen_rtx_REG (mode, ARG_REGISTER (1));
3721 /* Define how to find the value returned by a library function
3722 assuming the value has mode MODE. */
3725 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3727 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3728 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3730 /* The following libcalls return their result in integer registers,
3731 even though they return a floating point value. */
3732 if (arm_libcall_uses_aapcs_base (libcall))
3733 return gen_rtx_REG (mode, ARG_REGISTER(1));
3737 return arm_libcall_value_1 (mode);
3740 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3743 arm_function_value_regno_p (const unsigned int regno)
3745 if (regno == ARG_REGISTER (1)
3747 && TARGET_AAPCS_BASED
3749 && TARGET_HARD_FLOAT
3750 && regno == FIRST_VFP_REGNUM)
3752 && TARGET_HARD_FLOAT_ABI
3754 && regno == FIRST_CIRRUS_FP_REGNUM)
3755 || (TARGET_IWMMXT_ABI
3756 && regno == FIRST_IWMMXT_REGNUM)
3758 && TARGET_HARD_FLOAT_ABI
3760 && regno == FIRST_FPA_REGNUM))
3766 /* Determine the amount of memory needed to store the possible return
3767 registers of an untyped call. */
3769 arm_apply_result_size (void)
3775 if (TARGET_HARD_FLOAT_ABI)
3781 if (TARGET_MAVERICK)
3784 if (TARGET_IWMMXT_ABI)
3791 /* Decide whether TYPE should be returned in memory (true)
3792 or in a register (false). FNTYPE is the type of the function making
3795 arm_return_in_memory (const_tree type, const_tree fntype)
3799 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3801 if (TARGET_AAPCS_BASED)
3803 /* Simple, non-aggregate types (ie not including vectors and
3804 complex) are always returned in a register (or registers).
3805 We don't care about which register here, so we can short-cut
3806 some of the detail. */
3807 if (!AGGREGATE_TYPE_P (type)
3808 && TREE_CODE (type) != VECTOR_TYPE
3809 && TREE_CODE (type) != COMPLEX_TYPE)
3812 /* Any return value that is no larger than one word can be
3814 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3817 /* Check any available co-processors to see if they accept the
3818 type as a register candidate (VFP, for example, can return
3819 some aggregates in consecutive registers). These aren't
3820 available if the call is variadic. */
3821 if (aapcs_select_return_coproc (type, fntype) >= 0)
3824 /* Vector values should be returned using ARM registers, not
3825 memory (unless they're over 16 bytes, which will break since
3826 we only have four call-clobbered registers to play with). */
3827 if (TREE_CODE (type) == VECTOR_TYPE)
3828 return (size < 0 || size > (4 * UNITS_PER_WORD));
3830 /* The rest go in memory. */
3834 if (TREE_CODE (type) == VECTOR_TYPE)
3835 return (size < 0 || size > (4 * UNITS_PER_WORD));
3837 if (!AGGREGATE_TYPE_P (type) &&
3838 (TREE_CODE (type) != VECTOR_TYPE))
3839 /* All simple types are returned in registers. */
3842 if (arm_abi != ARM_ABI_APCS)
3844 /* ATPCS and later return aggregate types in memory only if they are
3845 larger than a word (or are variable size). */
3846 return (size < 0 || size > UNITS_PER_WORD);
3849 /* For the arm-wince targets we choose to be compatible with Microsoft's
3850 ARM and Thumb compilers, which always return aggregates in memory. */
3852 /* All structures/unions bigger than one word are returned in memory.
3853 Also catch the case where int_size_in_bytes returns -1. In this case
3854 the aggregate is either huge or of variable size, and in either case
3855 we will want to return it via memory and not in a register. */
3856 if (size < 0 || size > UNITS_PER_WORD)
3859 if (TREE_CODE (type) == RECORD_TYPE)
3863 /* For a struct the APCS says that we only return in a register
3864 if the type is 'integer like' and every addressable element
3865 has an offset of zero. For practical purposes this means
3866 that the structure can have at most one non bit-field element
3867 and that this element must be the first one in the structure. */
3869 /* Find the first field, ignoring non FIELD_DECL things which will
3870 have been created by C++. */
3871 for (field = TYPE_FIELDS (type);
3872 field && TREE_CODE (field) != FIELD_DECL;
3873 field = DECL_CHAIN (field))
3877 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3879 /* Check that the first field is valid for returning in a register. */
3881 /* ... Floats are not allowed */
3882 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3885 /* ... Aggregates that are not themselves valid for returning in
3886 a register are not allowed. */
3887 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3890 /* Now check the remaining fields, if any. Only bitfields are allowed,
3891 since they are not addressable. */
3892 for (field = DECL_CHAIN (field);
3894 field = DECL_CHAIN (field))
3896 if (TREE_CODE (field) != FIELD_DECL)
3899 if (!DECL_BIT_FIELD_TYPE (field))
3906 if (TREE_CODE (type) == UNION_TYPE)
3910 /* Unions can be returned in registers if every element is
3911 integral, or can be returned in an integer register. */
3912 for (field = TYPE_FIELDS (type);
3914 field = DECL_CHAIN (field))
3916 if (TREE_CODE (field) != FIELD_DECL)
3919 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3922 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3928 #endif /* not ARM_WINCE */
3930 /* Return all other types in memory. */
3934 /* Indicate whether or not words of a double are in big-endian order. */
3937 arm_float_words_big_endian (void)
3939 if (TARGET_MAVERICK)
3942 /* For FPA, float words are always big-endian. For VFP, floats words
3943 follow the memory system mode. */
3951 return (TARGET_BIG_END ? 1 : 0);
3956 const struct pcs_attribute_arg
3960 } pcs_attribute_args[] =
3962 {"aapcs", ARM_PCS_AAPCS},
3963 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3965 /* We could recognize these, but changes would be needed elsewhere
3966 * to implement them. */
3967 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3968 {"atpcs", ARM_PCS_ATPCS},
3969 {"apcs", ARM_PCS_APCS},
3971 {NULL, ARM_PCS_UNKNOWN}
3975 arm_pcs_from_attribute (tree attr)
3977 const struct pcs_attribute_arg *ptr;
3980 /* Get the value of the argument. */
3981 if (TREE_VALUE (attr) == NULL_TREE
3982 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3983 return ARM_PCS_UNKNOWN;
3985 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3987 /* Check it against the list of known arguments. */
3988 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3989 if (streq (arg, ptr->arg))
3992 /* An unrecognized interrupt type. */
3993 return ARM_PCS_UNKNOWN;
3996 /* Get the PCS variant to use for this call. TYPE is the function's type
3997 specification, DECL is the specific declartion. DECL may be null if
3998 the call could be indirect or if this is a library call. */
4000 arm_get_pcs_model (const_tree type, const_tree decl)
4002 bool user_convention = false;
4003 enum arm_pcs user_pcs = arm_pcs_default;
4008 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4011 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4012 user_convention = true;
4015 if (TARGET_AAPCS_BASED)
4017 /* Detect varargs functions. These always use the base rules
4018 (no argument is ever a candidate for a co-processor
4020 bool base_rules = stdarg_p (type);
4022 if (user_convention)
4024 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4025 sorry ("non-AAPCS derived PCS variant");
4026 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4027 error ("variadic functions must use the base AAPCS variant");
4031 return ARM_PCS_AAPCS;
4032 else if (user_convention)
4034 else if (decl && flag_unit_at_a_time)
4036 /* Local functions never leak outside this compilation unit,
4037 so we are free to use whatever conventions are
4039 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4040 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4042 return ARM_PCS_AAPCS_LOCAL;
4045 else if (user_convention && user_pcs != arm_pcs_default)
4046 sorry ("PCS variant");
4048 /* For everything else we use the target's default. */
4049 return arm_pcs_default;
4054 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4055 const_tree fntype ATTRIBUTE_UNUSED,
4056 rtx libcall ATTRIBUTE_UNUSED,
4057 const_tree fndecl ATTRIBUTE_UNUSED)
4059 /* Record the unallocated VFP registers. */
4060 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4061 pcum->aapcs_vfp_reg_alloc = 0;
4064 /* Walk down the type tree of TYPE counting consecutive base elements.
4065 If *MODEP is VOIDmode, then set it to the first valid floating point
4066 type. If a non-floating point type is found, or if a floating point
4067 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4068 otherwise return the count in the sub-tree. */
4070 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4072 enum machine_mode mode;
4075 switch (TREE_CODE (type))
4078 mode = TYPE_MODE (type);
4079 if (mode != DFmode && mode != SFmode)
4082 if (*modep == VOIDmode)
4091 mode = TYPE_MODE (TREE_TYPE (type));
4092 if (mode != DFmode && mode != SFmode)
4095 if (*modep == VOIDmode)
4104 /* Use V2SImode and V4SImode as representatives of all 64-bit
4105 and 128-bit vector types, whether or not those modes are
4106 supported with the present options. */
4107 size = int_size_in_bytes (type);
4120 if (*modep == VOIDmode)
4123 /* Vector modes are considered to be opaque: two vectors are
4124 equivalent for the purposes of being homogeneous aggregates
4125 if they are the same size. */
4134 tree index = TYPE_DOMAIN (type);
4136 /* Can't handle incomplete types. */
4137 if (!COMPLETE_TYPE_P(type))
4140 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4143 || !TYPE_MAX_VALUE (index)
4144 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4145 || !TYPE_MIN_VALUE (index)
4146 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4150 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4151 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4153 /* There must be no padding. */
4154 if (!host_integerp (TYPE_SIZE (type), 1)
4155 || (tree_low_cst (TYPE_SIZE (type), 1)
4156 != count * GET_MODE_BITSIZE (*modep)))
4168 /* Can't handle incomplete types. */
4169 if (!COMPLETE_TYPE_P(type))
4172 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4174 if (TREE_CODE (field) != FIELD_DECL)
4177 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4183 /* There must be no padding. */
4184 if (!host_integerp (TYPE_SIZE (type), 1)
4185 || (tree_low_cst (TYPE_SIZE (type), 1)
4186 != count * GET_MODE_BITSIZE (*modep)))
4193 case QUAL_UNION_TYPE:
4195 /* These aren't very interesting except in a degenerate case. */
4200 /* Can't handle incomplete types. */
4201 if (!COMPLETE_TYPE_P(type))
4204 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4206 if (TREE_CODE (field) != FIELD_DECL)
4209 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4212 count = count > sub_count ? count : sub_count;
4215 /* There must be no padding. */
4216 if (!host_integerp (TYPE_SIZE (type), 1)
4217 || (tree_low_cst (TYPE_SIZE (type), 1)
4218 != count * GET_MODE_BITSIZE (*modep)))
4231 /* Return true if PCS_VARIANT should use VFP registers. */
4233 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4235 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4237 static bool seen_thumb1_vfp = false;
4239 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4241 sorry ("Thumb-1 hard-float VFP ABI");
4242 /* sorry() is not immediately fatal, so only display this once. */
4243 seen_thumb1_vfp = true;
4249 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4252 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4253 (TARGET_VFP_DOUBLE || !is_double));
4257 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4258 enum machine_mode mode, const_tree type,
4259 enum machine_mode *base_mode, int *count)
4261 enum machine_mode new_mode = VOIDmode;
4263 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4264 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4265 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4270 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4273 new_mode = (mode == DCmode ? DFmode : SFmode);
4275 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4277 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4279 if (ag_count > 0 && ag_count <= 4)
4288 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4291 *base_mode = new_mode;
4296 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4297 enum machine_mode mode, const_tree type)
4299 int count ATTRIBUTE_UNUSED;
4300 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4302 if (!use_vfp_abi (pcs_variant, false))
4304 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4309 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4312 if (!use_vfp_abi (pcum->pcs_variant, false))
4315 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4316 &pcum->aapcs_vfp_rmode,
4317 &pcum->aapcs_vfp_rcount);
4321 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4322 const_tree type ATTRIBUTE_UNUSED)
4324 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4325 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4328 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4329 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4331 pcum->aapcs_vfp_reg_alloc = mask << regno;
4332 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4335 int rcount = pcum->aapcs_vfp_rcount;
4337 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4341 /* Avoid using unsupported vector modes. */
4342 if (rmode == V2SImode)
4344 else if (rmode == V4SImode)
4351 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4352 for (i = 0; i < rcount; i++)
4354 rtx tmp = gen_rtx_REG (rmode,
4355 FIRST_VFP_REGNUM + regno + i * rshift);
4356 tmp = gen_rtx_EXPR_LIST
4358 GEN_INT (i * GET_MODE_SIZE (rmode)));
4359 XVECEXP (par, 0, i) = tmp;
4362 pcum->aapcs_reg = par;
4365 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4372 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4373 enum machine_mode mode,
4374 const_tree type ATTRIBUTE_UNUSED)
4376 if (!use_vfp_abi (pcs_variant, false))
4379 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4382 enum machine_mode ag_mode;
4387 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4392 if (ag_mode == V2SImode)
4394 else if (ag_mode == V4SImode)
4400 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4401 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4402 for (i = 0; i < count; i++)
4404 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4405 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4406 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4407 XVECEXP (par, 0, i) = tmp;
4413 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4417 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4418 enum machine_mode mode ATTRIBUTE_UNUSED,
4419 const_tree type ATTRIBUTE_UNUSED)
4421 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4422 pcum->aapcs_vfp_reg_alloc = 0;
4426 #define AAPCS_CP(X) \
4428 aapcs_ ## X ## _cum_init, \
4429 aapcs_ ## X ## _is_call_candidate, \
4430 aapcs_ ## X ## _allocate, \
4431 aapcs_ ## X ## _is_return_candidate, \
4432 aapcs_ ## X ## _allocate_return_reg, \
4433 aapcs_ ## X ## _advance \
4436 /* Table of co-processors that can be used to pass arguments in
4437 registers. Idealy no arugment should be a candidate for more than
4438 one co-processor table entry, but the table is processed in order
4439 and stops after the first match. If that entry then fails to put
4440 the argument into a co-processor register, the argument will go on
4444 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4445 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4447 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4448 BLKmode) is a candidate for this co-processor's registers; this
4449 function should ignore any position-dependent state in
4450 CUMULATIVE_ARGS and only use call-type dependent information. */
4451 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4453 /* Return true if the argument does get a co-processor register; it
4454 should set aapcs_reg to an RTX of the register allocated as is
4455 required for a return from FUNCTION_ARG. */
4456 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4458 /* Return true if a result of mode MODE (or type TYPE if MODE is
4459 BLKmode) is can be returned in this co-processor's registers. */
4460 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4462 /* Allocate and return an RTX element to hold the return type of a
4463 call, this routine must not fail and will only be called if
4464 is_return_candidate returned true with the same parameters. */
4465 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4467 /* Finish processing this argument and prepare to start processing
4469 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4470 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4478 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4483 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4484 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4491 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4493 /* We aren't passed a decl, so we can't check that a call is local.
4494 However, it isn't clear that that would be a win anyway, since it
4495 might limit some tail-calling opportunities. */
4496 enum arm_pcs pcs_variant;
4500 const_tree fndecl = NULL_TREE;
4502 if (TREE_CODE (fntype) == FUNCTION_DECL)
4505 fntype = TREE_TYPE (fntype);
4508 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4511 pcs_variant = arm_pcs_default;
4513 if (pcs_variant != ARM_PCS_AAPCS)
4517 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4518 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4527 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4530 /* We aren't passed a decl, so we can't check that a call is local.
4531 However, it isn't clear that that would be a win anyway, since it
4532 might limit some tail-calling opportunities. */
4533 enum arm_pcs pcs_variant;
4534 int unsignedp ATTRIBUTE_UNUSED;
4538 const_tree fndecl = NULL_TREE;
4540 if (TREE_CODE (fntype) == FUNCTION_DECL)
4543 fntype = TREE_TYPE (fntype);
4546 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4549 pcs_variant = arm_pcs_default;
4551 /* Promote integer types. */
4552 if (type && INTEGRAL_TYPE_P (type))
4553 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4555 if (pcs_variant != ARM_PCS_AAPCS)
4559 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4560 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4562 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4566 /* Promotes small structs returned in a register to full-word size
4567 for big-endian AAPCS. */
4568 if (type && arm_return_in_msb (type))
4570 HOST_WIDE_INT size = int_size_in_bytes (type);
4571 if (size % UNITS_PER_WORD != 0)
4573 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4574 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4578 return gen_rtx_REG (mode, R0_REGNUM);
4582 aapcs_libcall_value (enum machine_mode mode)
4584 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4585 && GET_MODE_SIZE (mode) <= 4)
4588 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4591 /* Lay out a function argument using the AAPCS rules. The rule
4592 numbers referred to here are those in the AAPCS. */
4594 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4595 const_tree type, bool named)
4600 /* We only need to do this once per argument. */
4601 if (pcum->aapcs_arg_processed)
4604 pcum->aapcs_arg_processed = true;
4606 /* Special case: if named is false then we are handling an incoming
4607 anonymous argument which is on the stack. */
4611 /* Is this a potential co-processor register candidate? */
4612 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4614 int slot = aapcs_select_call_coproc (pcum, mode, type);
4615 pcum->aapcs_cprc_slot = slot;
4617 /* We don't have to apply any of the rules from part B of the
4618 preparation phase, these are handled elsewhere in the
4623 /* A Co-processor register candidate goes either in its own
4624 class of registers or on the stack. */
4625 if (!pcum->aapcs_cprc_failed[slot])
4627 /* C1.cp - Try to allocate the argument to co-processor
4629 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4632 /* C2.cp - Put the argument on the stack and note that we
4633 can't assign any more candidates in this slot. We also
4634 need to note that we have allocated stack space, so that
4635 we won't later try to split a non-cprc candidate between
4636 core registers and the stack. */
4637 pcum->aapcs_cprc_failed[slot] = true;
4638 pcum->can_split = false;
4641 /* We didn't get a register, so this argument goes on the
4643 gcc_assert (pcum->can_split == false);
4648 /* C3 - For double-word aligned arguments, round the NCRN up to the
4649 next even number. */
4650 ncrn = pcum->aapcs_ncrn;
4651 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4654 nregs = ARM_NUM_REGS2(mode, type);
4656 /* Sigh, this test should really assert that nregs > 0, but a GCC
4657 extension allows empty structs and then gives them empty size; it
4658 then allows such a structure to be passed by value. For some of
4659 the code below we have to pretend that such an argument has
4660 non-zero size so that we 'locate' it correctly either in
4661 registers or on the stack. */
4662 gcc_assert (nregs >= 0);
4664 nregs2 = nregs ? nregs : 1;
4666 /* C4 - Argument fits entirely in core registers. */
4667 if (ncrn + nregs2 <= NUM_ARG_REGS)
4669 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4670 pcum->aapcs_next_ncrn = ncrn + nregs;
4674 /* C5 - Some core registers left and there are no arguments already
4675 on the stack: split this argument between the remaining core
4676 registers and the stack. */
4677 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4679 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4680 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4681 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4685 /* C6 - NCRN is set to 4. */
4686 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4688 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4692 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4693 for a call to a function whose data type is FNTYPE.
4694 For a library call, FNTYPE is NULL. */
4696 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4698 tree fndecl ATTRIBUTE_UNUSED)
4700 /* Long call handling. */
4702 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4704 pcum->pcs_variant = arm_pcs_default;
4706 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4708 if (arm_libcall_uses_aapcs_base (libname))
4709 pcum->pcs_variant = ARM_PCS_AAPCS;
4711 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4712 pcum->aapcs_reg = NULL_RTX;
4713 pcum->aapcs_partial = 0;
4714 pcum->aapcs_arg_processed = false;
4715 pcum->aapcs_cprc_slot = -1;
4716 pcum->can_split = true;
4718 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4722 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4724 pcum->aapcs_cprc_failed[i] = false;
4725 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4733 /* On the ARM, the offset starts at 0. */
4735 pcum->iwmmxt_nregs = 0;
4736 pcum->can_split = true;
4738 /* Varargs vectors are treated the same as long long.
4739 named_count avoids having to change the way arm handles 'named' */
4740 pcum->named_count = 0;
4743 if (TARGET_REALLY_IWMMXT && fntype)
4747 for (fn_arg = TYPE_ARG_TYPES (fntype);
4749 fn_arg = TREE_CHAIN (fn_arg))
4750 pcum->named_count += 1;
4752 if (! pcum->named_count)
4753 pcum->named_count = INT_MAX;
4758 /* Return true if mode/type need doubleword alignment. */
4760 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4762 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4763 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4767 /* Determine where to put an argument to a function.
4768 Value is zero to push the argument on the stack,
4769 or a hard register in which to store the argument.
4771 MODE is the argument's machine mode.
4772 TYPE is the data type of the argument (as a tree).
4773 This is null for libcalls where that information may
4775 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4776 the preceding args and about the function being called.
4777 NAMED is nonzero if this argument is a named parameter
4778 (otherwise it is an extra parameter matching an ellipsis).
4780 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4781 other arguments are passed on the stack. If (NAMED == 0) (which happens
4782 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4783 defined), say it is passed in the stack (function_prologue will
4784 indeed make it pass in the stack if necessary). */
4787 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4788 const_tree type, bool named)
4790 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4793 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4794 a call insn (op3 of a call_value insn). */
4795 if (mode == VOIDmode)
4798 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4800 aapcs_layout_arg (pcum, mode, type, named);
4801 return pcum->aapcs_reg;
4804 /* Varargs vectors are treated the same as long long.
4805 named_count avoids having to change the way arm handles 'named' */
4806 if (TARGET_IWMMXT_ABI
4807 && arm_vector_mode_supported_p (mode)
4808 && pcum->named_count > pcum->nargs + 1)
4810 if (pcum->iwmmxt_nregs <= 9)
4811 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4814 pcum->can_split = false;
4819 /* Put doubleword aligned quantities in even register pairs. */
4821 && ARM_DOUBLEWORD_ALIGN
4822 && arm_needs_doubleword_align (mode, type))
4825 /* Only allow splitting an arg between regs and memory if all preceding
4826 args were allocated to regs. For args passed by reference we only count
4827 the reference pointer. */
4828 if (pcum->can_split)
4831 nregs = ARM_NUM_REGS2 (mode, type);
4833 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4836 return gen_rtx_REG (mode, pcum->nregs);
4840 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4842 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4843 ? DOUBLEWORD_ALIGNMENT
4848 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4849 tree type, bool named)
4851 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4852 int nregs = pcum->nregs;
4854 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4856 aapcs_layout_arg (pcum, mode, type, named);
4857 return pcum->aapcs_partial;
4860 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4863 if (NUM_ARG_REGS > nregs
4864 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4866 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4871 /* Update the data in PCUM to advance over an argument
4872 of mode MODE and data type TYPE.
4873 (TYPE is null for libcalls where that information may not be available.) */
4876 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4877 const_tree type, bool named)
4879 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4881 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4883 aapcs_layout_arg (pcum, mode, type, named);
4885 if (pcum->aapcs_cprc_slot >= 0)
4887 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4889 pcum->aapcs_cprc_slot = -1;
4892 /* Generic stuff. */
4893 pcum->aapcs_arg_processed = false;
4894 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4895 pcum->aapcs_reg = NULL_RTX;
4896 pcum->aapcs_partial = 0;
4901 if (arm_vector_mode_supported_p (mode)
4902 && pcum->named_count > pcum->nargs
4903 && TARGET_IWMMXT_ABI)
4904 pcum->iwmmxt_nregs += 1;
4906 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4910 /* Variable sized types are passed by reference. This is a GCC
4911 extension to the ARM ABI. */
4914 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4915 enum machine_mode mode ATTRIBUTE_UNUSED,
4916 const_tree type, bool named ATTRIBUTE_UNUSED)
4918 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4921 /* Encode the current state of the #pragma [no_]long_calls. */
4924 OFF, /* No #pragma [no_]long_calls is in effect. */
4925 LONG, /* #pragma long_calls is in effect. */
4926 SHORT /* #pragma no_long_calls is in effect. */
4929 static arm_pragma_enum arm_pragma_long_calls = OFF;
4932 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4934 arm_pragma_long_calls = LONG;
4938 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4940 arm_pragma_long_calls = SHORT;
4944 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4946 arm_pragma_long_calls = OFF;
4949 /* Handle an attribute requiring a FUNCTION_DECL;
4950 arguments as in struct attribute_spec.handler. */
4952 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4953 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4955 if (TREE_CODE (*node) != FUNCTION_DECL)
4957 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4959 *no_add_attrs = true;
4965 /* Handle an "interrupt" or "isr" attribute;
4966 arguments as in struct attribute_spec.handler. */
4968 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4973 if (TREE_CODE (*node) != FUNCTION_DECL)
4975 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4977 *no_add_attrs = true;
4979 /* FIXME: the argument if any is checked for type attributes;
4980 should it be checked for decl ones? */
4984 if (TREE_CODE (*node) == FUNCTION_TYPE
4985 || TREE_CODE (*node) == METHOD_TYPE)
4987 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4989 warning (OPT_Wattributes, "%qE attribute ignored",
4991 *no_add_attrs = true;
4994 else if (TREE_CODE (*node) == POINTER_TYPE
4995 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4996 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4997 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4999 *node = build_variant_type_copy (*node);
5000 TREE_TYPE (*node) = build_type_attribute_variant
5002 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5003 *no_add_attrs = true;
5007 /* Possibly pass this attribute on from the type to a decl. */
5008 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5009 | (int) ATTR_FLAG_FUNCTION_NEXT
5010 | (int) ATTR_FLAG_ARRAY_NEXT))
5012 *no_add_attrs = true;
5013 return tree_cons (name, args, NULL_TREE);
5017 warning (OPT_Wattributes, "%qE attribute ignored",
5026 /* Handle a "pcs" attribute; arguments as in struct
5027 attribute_spec.handler. */
5029 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5030 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5032 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5034 warning (OPT_Wattributes, "%qE attribute ignored", name);
5035 *no_add_attrs = true;
5040 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5041 /* Handle the "notshared" attribute. This attribute is another way of
5042 requesting hidden visibility. ARM's compiler supports
5043 "__declspec(notshared)"; we support the same thing via an
5047 arm_handle_notshared_attribute (tree *node,
5048 tree name ATTRIBUTE_UNUSED,
5049 tree args ATTRIBUTE_UNUSED,
5050 int flags ATTRIBUTE_UNUSED,
5053 tree decl = TYPE_NAME (*node);
5057 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5058 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5059 *no_add_attrs = false;
5065 /* Return 0 if the attributes for two types are incompatible, 1 if they
5066 are compatible, and 2 if they are nearly compatible (which causes a
5067 warning to be generated). */
5069 arm_comp_type_attributes (const_tree type1, const_tree type2)
5073 /* Check for mismatch of non-default calling convention. */
5074 if (TREE_CODE (type1) != FUNCTION_TYPE)
5077 /* Check for mismatched call attributes. */
5078 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5079 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5080 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5081 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5083 /* Only bother to check if an attribute is defined. */
5084 if (l1 | l2 | s1 | s2)
5086 /* If one type has an attribute, the other must have the same attribute. */
5087 if ((l1 != l2) || (s1 != s2))
5090 /* Disallow mixed attributes. */
5091 if ((l1 & s2) || (l2 & s1))
5095 /* Check for mismatched ISR attribute. */
5096 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5098 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5099 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5101 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5108 /* Assigns default attributes to newly defined type. This is used to
5109 set short_call/long_call attributes for function types of
5110 functions defined inside corresponding #pragma scopes. */
5112 arm_set_default_type_attributes (tree type)
5114 /* Add __attribute__ ((long_call)) to all functions, when
5115 inside #pragma long_calls or __attribute__ ((short_call)),
5116 when inside #pragma no_long_calls. */
5117 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5119 tree type_attr_list, attr_name;
5120 type_attr_list = TYPE_ATTRIBUTES (type);
5122 if (arm_pragma_long_calls == LONG)
5123 attr_name = get_identifier ("long_call");
5124 else if (arm_pragma_long_calls == SHORT)
5125 attr_name = get_identifier ("short_call");
5129 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5130 TYPE_ATTRIBUTES (type) = type_attr_list;
5134 /* Return true if DECL is known to be linked into section SECTION. */
5137 arm_function_in_section_p (tree decl, section *section)
5139 /* We can only be certain about functions defined in the same
5140 compilation unit. */
5141 if (!TREE_STATIC (decl))
5144 /* Make sure that SYMBOL always binds to the definition in this
5145 compilation unit. */
5146 if (!targetm.binds_local_p (decl))
5149 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5150 if (!DECL_SECTION_NAME (decl))
5152 /* Make sure that we will not create a unique section for DECL. */
5153 if (flag_function_sections || DECL_ONE_ONLY (decl))
5157 return function_section (decl) == section;
5160 /* Return nonzero if a 32-bit "long_call" should be generated for
5161 a call from the current function to DECL. We generate a long_call
5164 a. has an __attribute__((long call))
5165 or b. is within the scope of a #pragma long_calls
5166 or c. the -mlong-calls command line switch has been specified
5168 However we do not generate a long call if the function:
5170 d. has an __attribute__ ((short_call))
5171 or e. is inside the scope of a #pragma no_long_calls
5172 or f. is defined in the same section as the current function. */
5175 arm_is_long_call_p (tree decl)
5180 return TARGET_LONG_CALLS;
5182 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5183 if (lookup_attribute ("short_call", attrs))
5186 /* For "f", be conservative, and only cater for cases in which the
5187 whole of the current function is placed in the same section. */
5188 if (!flag_reorder_blocks_and_partition
5189 && TREE_CODE (decl) == FUNCTION_DECL
5190 && arm_function_in_section_p (decl, current_function_section ()))
5193 if (lookup_attribute ("long_call", attrs))
5196 return TARGET_LONG_CALLS;
5199 /* Return nonzero if it is ok to make a tail-call to DECL. */
5201 arm_function_ok_for_sibcall (tree decl, tree exp)
5203 unsigned long func_type;
5205 if (cfun->machine->sibcall_blocked)
5208 /* Never tailcall something for which we have no decl, or if we
5209 are generating code for Thumb-1. */
5210 if (decl == NULL || TARGET_THUMB1)
5213 /* The PIC register is live on entry to VxWorks PLT entries, so we
5214 must make the call before restoring the PIC register. */
5215 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5218 /* Cannot tail-call to long calls, since these are out of range of
5219 a branch instruction. */
5220 if (arm_is_long_call_p (decl))
5223 /* If we are interworking and the function is not declared static
5224 then we can't tail-call it unless we know that it exists in this
5225 compilation unit (since it might be a Thumb routine). */
5226 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5229 func_type = arm_current_func_type ();
5230 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5231 if (IS_INTERRUPT (func_type))
5234 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5236 /* Check that the return value locations are the same. For
5237 example that we aren't returning a value from the sibling in
5238 a VFP register but then need to transfer it to a core
5242 a = arm_function_value (TREE_TYPE (exp), decl, false);
5243 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5245 if (!rtx_equal_p (a, b))
5249 /* Never tailcall if function may be called with a misaligned SP. */
5250 if (IS_STACKALIGN (func_type))
5253 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5254 references should become a NOP. Don't convert such calls into
5256 if (TARGET_AAPCS_BASED
5257 && arm_abi == ARM_ABI_AAPCS
5258 && DECL_WEAK (decl))
5261 /* Everything else is ok. */
5266 /* Addressing mode support functions. */
5268 /* Return nonzero if X is a legitimate immediate operand when compiling
5269 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5271 legitimate_pic_operand_p (rtx x)
5273 if (GET_CODE (x) == SYMBOL_REF
5274 || (GET_CODE (x) == CONST
5275 && GET_CODE (XEXP (x, 0)) == PLUS
5276 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5282 /* Record that the current function needs a PIC register. Initialize
5283 cfun->machine->pic_reg if we have not already done so. */
5286 require_pic_register (void)
5288 /* A lot of the logic here is made obscure by the fact that this
5289 routine gets called as part of the rtx cost estimation process.
5290 We don't want those calls to affect any assumptions about the real
5291 function; and further, we can't call entry_of_function() until we
5292 start the real expansion process. */
5293 if (!crtl->uses_pic_offset_table)
5295 gcc_assert (can_create_pseudo_p ());
5296 if (arm_pic_register != INVALID_REGNUM)
5298 if (!cfun->machine->pic_reg)
5299 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5301 /* Play games to avoid marking the function as needing pic
5302 if we are being called as part of the cost-estimation
5304 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5305 crtl->uses_pic_offset_table = 1;
5311 if (!cfun->machine->pic_reg)
5312 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5314 /* Play games to avoid marking the function as needing pic
5315 if we are being called as part of the cost-estimation
5317 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5319 crtl->uses_pic_offset_table = 1;
5322 arm_load_pic_register (0UL);
5327 for (insn = seq; insn; insn = NEXT_INSN (insn))
5329 INSN_LOCATOR (insn) = prologue_locator;
5331 /* We can be called during expansion of PHI nodes, where
5332 we can't yet emit instructions directly in the final
5333 insn stream. Queue the insns on the entry edge, they will
5334 be committed after everything else is expanded. */
5335 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5342 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5344 if (GET_CODE (orig) == SYMBOL_REF
5345 || GET_CODE (orig) == LABEL_REF)
5351 gcc_assert (can_create_pseudo_p ());
5352 reg = gen_reg_rtx (Pmode);
5355 /* VxWorks does not impose a fixed gap between segments; the run-time
5356 gap can be different from the object-file gap. We therefore can't
5357 use GOTOFF unless we are absolutely sure that the symbol is in the
5358 same segment as the GOT. Unfortunately, the flexibility of linker
5359 scripts means that we can't be sure of that in general, so assume
5360 that GOTOFF is never valid on VxWorks. */
5361 if ((GET_CODE (orig) == LABEL_REF
5362 || (GET_CODE (orig) == SYMBOL_REF &&
5363 SYMBOL_REF_LOCAL_P (orig)))
5365 && !TARGET_VXWORKS_RTP)
5366 insn = arm_pic_static_addr (orig, reg);
5372 /* If this function doesn't have a pic register, create one now. */
5373 require_pic_register ();
5375 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5377 /* Make the MEM as close to a constant as possible. */
5378 mem = SET_SRC (pat);
5379 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5380 MEM_READONLY_P (mem) = 1;
5381 MEM_NOTRAP_P (mem) = 1;
5383 insn = emit_insn (pat);
5386 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5388 set_unique_reg_note (insn, REG_EQUAL, orig);
5392 else if (GET_CODE (orig) == CONST)
5396 if (GET_CODE (XEXP (orig, 0)) == PLUS
5397 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5400 /* Handle the case where we have: const (UNSPEC_TLS). */
5401 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5402 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5405 /* Handle the case where we have:
5406 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5408 if (GET_CODE (XEXP (orig, 0)) == PLUS
5409 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5410 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5412 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5418 gcc_assert (can_create_pseudo_p ());
5419 reg = gen_reg_rtx (Pmode);
5422 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5424 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5425 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5426 base == reg ? 0 : reg);
5428 if (GET_CODE (offset) == CONST_INT)
5430 /* The base register doesn't really matter, we only want to
5431 test the index for the appropriate mode. */
5432 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5434 gcc_assert (can_create_pseudo_p ());
5435 offset = force_reg (Pmode, offset);
5438 if (GET_CODE (offset) == CONST_INT)
5439 return plus_constant (base, INTVAL (offset));
5442 if (GET_MODE_SIZE (mode) > 4
5443 && (GET_MODE_CLASS (mode) == MODE_INT
5444 || TARGET_SOFT_FLOAT))
5446 emit_insn (gen_addsi3 (reg, base, offset));
5450 return gen_rtx_PLUS (Pmode, base, offset);
5457 /* Find a spare register to use during the prolog of a function. */
5460 thumb_find_work_register (unsigned long pushed_regs_mask)
5464 /* Check the argument registers first as these are call-used. The
5465 register allocation order means that sometimes r3 might be used
5466 but earlier argument registers might not, so check them all. */
5467 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5468 if (!df_regs_ever_live_p (reg))
5471 /* Before going on to check the call-saved registers we can try a couple
5472 more ways of deducing that r3 is available. The first is when we are
5473 pushing anonymous arguments onto the stack and we have less than 4
5474 registers worth of fixed arguments(*). In this case r3 will be part of
5475 the variable argument list and so we can be sure that it will be
5476 pushed right at the start of the function. Hence it will be available
5477 for the rest of the prologue.
5478 (*): ie crtl->args.pretend_args_size is greater than 0. */
5479 if (cfun->machine->uses_anonymous_args
5480 && crtl->args.pretend_args_size > 0)
5481 return LAST_ARG_REGNUM;
5483 /* The other case is when we have fixed arguments but less than 4 registers
5484 worth. In this case r3 might be used in the body of the function, but
5485 it is not being used to convey an argument into the function. In theory
5486 we could just check crtl->args.size to see how many bytes are
5487 being passed in argument registers, but it seems that it is unreliable.
5488 Sometimes it will have the value 0 when in fact arguments are being
5489 passed. (See testcase execute/20021111-1.c for an example). So we also
5490 check the args_info.nregs field as well. The problem with this field is
5491 that it makes no allowances for arguments that are passed to the
5492 function but which are not used. Hence we could miss an opportunity
5493 when a function has an unused argument in r3. But it is better to be
5494 safe than to be sorry. */
5495 if (! cfun->machine->uses_anonymous_args
5496 && crtl->args.size >= 0
5497 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5498 && crtl->args.info.nregs < 4)
5499 return LAST_ARG_REGNUM;
5501 /* Otherwise look for a call-saved register that is going to be pushed. */
5502 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5503 if (pushed_regs_mask & (1 << reg))
5508 /* Thumb-2 can use high regs. */
5509 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5510 if (pushed_regs_mask & (1 << reg))
5513 /* Something went wrong - thumb_compute_save_reg_mask()
5514 should have arranged for a suitable register to be pushed. */
5518 static GTY(()) int pic_labelno;
5520 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5524 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5526 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5528 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5531 gcc_assert (flag_pic);
5533 pic_reg = cfun->machine->pic_reg;
5534 if (TARGET_VXWORKS_RTP)
5536 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5537 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5538 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5540 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5542 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5543 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5547 /* We use an UNSPEC rather than a LABEL_REF because this label
5548 never appears in the code stream. */
5550 labelno = GEN_INT (pic_labelno++);
5551 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5552 l1 = gen_rtx_CONST (VOIDmode, l1);
5554 /* On the ARM the PC register contains 'dot + 8' at the time of the
5555 addition, on the Thumb it is 'dot + 4'. */
5556 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5557 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5559 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5563 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5565 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5567 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5569 else /* TARGET_THUMB1 */
5571 if (arm_pic_register != INVALID_REGNUM
5572 && REGNO (pic_reg) > LAST_LO_REGNUM)
5574 /* We will have pushed the pic register, so we should always be
5575 able to find a work register. */
5576 pic_tmp = gen_rtx_REG (SImode,
5577 thumb_find_work_register (saved_regs));
5578 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5579 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5582 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5583 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5587 /* Need to emit this whether or not we obey regdecls,
5588 since setjmp/longjmp can cause life info to screw up. */
5592 /* Generate code to load the address of a static var when flag_pic is set. */
5594 arm_pic_static_addr (rtx orig, rtx reg)
5596 rtx l1, labelno, offset_rtx, insn;
5598 gcc_assert (flag_pic);
5600 /* We use an UNSPEC rather than a LABEL_REF because this label
5601 never appears in the code stream. */
5602 labelno = GEN_INT (pic_labelno++);
5603 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5604 l1 = gen_rtx_CONST (VOIDmode, l1);
5606 /* On the ARM the PC register contains 'dot + 8' at the time of the
5607 addition, on the Thumb it is 'dot + 4'. */
5608 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5609 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5610 UNSPEC_SYMBOL_OFFSET);
5611 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5615 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5617 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5619 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5621 else /* TARGET_THUMB1 */
5623 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5624 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5630 /* Return nonzero if X is valid as an ARM state addressing register. */
5632 arm_address_register_rtx_p (rtx x, int strict_p)
5636 if (GET_CODE (x) != REG)
5642 return ARM_REGNO_OK_FOR_BASE_P (regno);
5644 return (regno <= LAST_ARM_REGNUM
5645 || regno >= FIRST_PSEUDO_REGISTER
5646 || regno == FRAME_POINTER_REGNUM
5647 || regno == ARG_POINTER_REGNUM);
5650 /* Return TRUE if this rtx is the difference of a symbol and a label,
5651 and will reduce to a PC-relative relocation in the object file.
5652 Expressions like this can be left alone when generating PIC, rather
5653 than forced through the GOT. */
5655 pcrel_constant_p (rtx x)
5657 if (GET_CODE (x) == MINUS)
5658 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5663 /* Return true if X will surely end up in an index register after next
5666 will_be_in_index_register (const_rtx x)
5668 /* arm.md: calculate_pic_address will split this into a register. */
5669 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5672 /* Return nonzero if X is a valid ARM state address operand. */
5674 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5678 enum rtx_code code = GET_CODE (x);
5680 if (arm_address_register_rtx_p (x, strict_p))
5683 use_ldrd = (TARGET_LDRD
5685 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5687 if (code == POST_INC || code == PRE_DEC
5688 || ((code == PRE_INC || code == POST_DEC)
5689 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5690 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5692 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5693 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5694 && GET_CODE (XEXP (x, 1)) == PLUS
5695 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5697 rtx addend = XEXP (XEXP (x, 1), 1);
5699 /* Don't allow ldrd post increment by register because it's hard
5700 to fixup invalid register choices. */
5702 && GET_CODE (x) == POST_MODIFY
5703 && GET_CODE (addend) == REG)
5706 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5707 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5710 /* After reload constants split into minipools will have addresses
5711 from a LABEL_REF. */
5712 else if (reload_completed
5713 && (code == LABEL_REF
5715 && GET_CODE (XEXP (x, 0)) == PLUS
5716 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5717 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5720 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5723 else if (code == PLUS)
5725 rtx xop0 = XEXP (x, 0);
5726 rtx xop1 = XEXP (x, 1);
5728 return ((arm_address_register_rtx_p (xop0, strict_p)
5729 && ((GET_CODE(xop1) == CONST_INT
5730 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5731 || (!strict_p && will_be_in_index_register (xop1))))
5732 || (arm_address_register_rtx_p (xop1, strict_p)
5733 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5737 /* Reload currently can't handle MINUS, so disable this for now */
5738 else if (GET_CODE (x) == MINUS)
5740 rtx xop0 = XEXP (x, 0);
5741 rtx xop1 = XEXP (x, 1);
5743 return (arm_address_register_rtx_p (xop0, strict_p)
5744 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5748 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5749 && code == SYMBOL_REF
5750 && CONSTANT_POOL_ADDRESS_P (x)
5752 && symbol_mentioned_p (get_pool_constant (x))
5753 && ! pcrel_constant_p (get_pool_constant (x))))
5759 /* Return nonzero if X is a valid Thumb-2 address operand. */
5761 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5764 enum rtx_code code = GET_CODE (x);
5766 if (arm_address_register_rtx_p (x, strict_p))
5769 use_ldrd = (TARGET_LDRD
5771 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5773 if (code == POST_INC || code == PRE_DEC
5774 || ((code == PRE_INC || code == POST_DEC)
5775 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5776 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5778 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5779 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5780 && GET_CODE (XEXP (x, 1)) == PLUS
5781 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5783 /* Thumb-2 only has autoincrement by constant. */
5784 rtx addend = XEXP (XEXP (x, 1), 1);
5785 HOST_WIDE_INT offset;
5787 if (GET_CODE (addend) != CONST_INT)
5790 offset = INTVAL(addend);
5791 if (GET_MODE_SIZE (mode) <= 4)
5792 return (offset > -256 && offset < 256);
5794 return (use_ldrd && offset > -1024 && offset < 1024
5795 && (offset & 3) == 0);
5798 /* After reload constants split into minipools will have addresses
5799 from a LABEL_REF. */
5800 else if (reload_completed
5801 && (code == LABEL_REF
5803 && GET_CODE (XEXP (x, 0)) == PLUS
5804 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5805 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5808 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5811 else if (code == PLUS)
5813 rtx xop0 = XEXP (x, 0);
5814 rtx xop1 = XEXP (x, 1);
5816 return ((arm_address_register_rtx_p (xop0, strict_p)
5817 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5818 || (!strict_p && will_be_in_index_register (xop1))))
5819 || (arm_address_register_rtx_p (xop1, strict_p)
5820 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5823 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5824 && code == SYMBOL_REF
5825 && CONSTANT_POOL_ADDRESS_P (x)
5827 && symbol_mentioned_p (get_pool_constant (x))
5828 && ! pcrel_constant_p (get_pool_constant (x))))
5834 /* Return nonzero if INDEX is valid for an address index operand in
5837 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5840 HOST_WIDE_INT range;
5841 enum rtx_code code = GET_CODE (index);
5843 /* Standard coprocessor addressing modes. */
5844 if (TARGET_HARD_FLOAT
5845 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5846 && (mode == SFmode || mode == DFmode
5847 || (TARGET_MAVERICK && mode == DImode)))
5848 return (code == CONST_INT && INTVAL (index) < 1024
5849 && INTVAL (index) > -1024
5850 && (INTVAL (index) & 3) == 0);
5852 /* For quad modes, we restrict the constant offset to be slightly less
5853 than what the instruction format permits. We do this because for
5854 quad mode moves, we will actually decompose them into two separate
5855 double-mode reads or writes. INDEX must therefore be a valid
5856 (double-mode) offset and so should INDEX+8. */
5857 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5858 return (code == CONST_INT
5859 && INTVAL (index) < 1016
5860 && INTVAL (index) > -1024
5861 && (INTVAL (index) & 3) == 0);
5863 /* We have no such constraint on double mode offsets, so we permit the
5864 full range of the instruction format. */
5865 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5866 return (code == CONST_INT
5867 && INTVAL (index) < 1024
5868 && INTVAL (index) > -1024
5869 && (INTVAL (index) & 3) == 0);
5871 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5872 return (code == CONST_INT
5873 && INTVAL (index) < 1024
5874 && INTVAL (index) > -1024
5875 && (INTVAL (index) & 3) == 0);
5877 if (arm_address_register_rtx_p (index, strict_p)
5878 && (GET_MODE_SIZE (mode) <= 4))
5881 if (mode == DImode || mode == DFmode)
5883 if (code == CONST_INT)
5885 HOST_WIDE_INT val = INTVAL (index);
5888 return val > -256 && val < 256;
5890 return val > -4096 && val < 4092;
5893 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5896 if (GET_MODE_SIZE (mode) <= 4
5900 || (mode == QImode && outer == SIGN_EXTEND))))
5904 rtx xiop0 = XEXP (index, 0);
5905 rtx xiop1 = XEXP (index, 1);
5907 return ((arm_address_register_rtx_p (xiop0, strict_p)
5908 && power_of_two_operand (xiop1, SImode))
5909 || (arm_address_register_rtx_p (xiop1, strict_p)
5910 && power_of_two_operand (xiop0, SImode)));
5912 else if (code == LSHIFTRT || code == ASHIFTRT
5913 || code == ASHIFT || code == ROTATERT)
5915 rtx op = XEXP (index, 1);
5917 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5918 && GET_CODE (op) == CONST_INT
5920 && INTVAL (op) <= 31);
5924 /* For ARM v4 we may be doing a sign-extend operation during the
5930 || (outer == SIGN_EXTEND && mode == QImode))
5936 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5938 return (code == CONST_INT
5939 && INTVAL (index) < range
5940 && INTVAL (index) > -range);
5943 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5944 index operand. i.e. 1, 2, 4 or 8. */
5946 thumb2_index_mul_operand (rtx op)
5950 if (GET_CODE(op) != CONST_INT)
5954 return (val == 1 || val == 2 || val == 4 || val == 8);
5957 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5959 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5961 enum rtx_code code = GET_CODE (index);
5963 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5964 /* Standard coprocessor addressing modes. */
5965 if (TARGET_HARD_FLOAT
5966 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5967 && (mode == SFmode || mode == DFmode
5968 || (TARGET_MAVERICK && mode == DImode)))
5969 return (code == CONST_INT && INTVAL (index) < 1024
5970 /* Thumb-2 allows only > -256 index range for it's core register
5971 load/stores. Since we allow SF/DF in core registers, we have
5972 to use the intersection between -256~4096 (core) and -1024~1024
5974 && INTVAL (index) > -256
5975 && (INTVAL (index) & 3) == 0);
5977 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5979 /* For DImode assume values will usually live in core regs
5980 and only allow LDRD addressing modes. */
5981 if (!TARGET_LDRD || mode != DImode)
5982 return (code == CONST_INT
5983 && INTVAL (index) < 1024
5984 && INTVAL (index) > -1024
5985 && (INTVAL (index) & 3) == 0);
5988 /* For quad modes, we restrict the constant offset to be slightly less
5989 than what the instruction format permits. We do this because for
5990 quad mode moves, we will actually decompose them into two separate
5991 double-mode reads or writes. INDEX must therefore be a valid
5992 (double-mode) offset and so should INDEX+8. */
5993 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5994 return (code == CONST_INT
5995 && INTVAL (index) < 1016
5996 && INTVAL (index) > -1024
5997 && (INTVAL (index) & 3) == 0);
5999 /* We have no such constraint on double mode offsets, so we permit the
6000 full range of the instruction format. */
6001 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6002 return (code == CONST_INT
6003 && INTVAL (index) < 1024
6004 && INTVAL (index) > -1024
6005 && (INTVAL (index) & 3) == 0);
6007 if (arm_address_register_rtx_p (index, strict_p)
6008 && (GET_MODE_SIZE (mode) <= 4))
6011 if (mode == DImode || mode == DFmode)
6013 if (code == CONST_INT)
6015 HOST_WIDE_INT val = INTVAL (index);
6016 /* ??? Can we assume ldrd for thumb2? */
6017 /* Thumb-2 ldrd only has reg+const addressing modes. */
6018 /* ldrd supports offsets of +-1020.
6019 However the ldr fallback does not. */
6020 return val > -256 && val < 256 && (val & 3) == 0;
6028 rtx xiop0 = XEXP (index, 0);
6029 rtx xiop1 = XEXP (index, 1);
6031 return ((arm_address_register_rtx_p (xiop0, strict_p)
6032 && thumb2_index_mul_operand (xiop1))
6033 || (arm_address_register_rtx_p (xiop1, strict_p)
6034 && thumb2_index_mul_operand (xiop0)));
6036 else if (code == ASHIFT)
6038 rtx op = XEXP (index, 1);
6040 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6041 && GET_CODE (op) == CONST_INT
6043 && INTVAL (op) <= 3);
6046 return (code == CONST_INT
6047 && INTVAL (index) < 4096
6048 && INTVAL (index) > -256);
6051 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6053 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6057 if (GET_CODE (x) != REG)
6063 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6065 return (regno <= LAST_LO_REGNUM
6066 || regno > LAST_VIRTUAL_REGISTER
6067 || regno == FRAME_POINTER_REGNUM
6068 || (GET_MODE_SIZE (mode) >= 4
6069 && (regno == STACK_POINTER_REGNUM
6070 || regno >= FIRST_PSEUDO_REGISTER
6071 || x == hard_frame_pointer_rtx
6072 || x == arg_pointer_rtx)));
6075 /* Return nonzero if x is a legitimate index register. This is the case
6076 for any base register that can access a QImode object. */
6078 thumb1_index_register_rtx_p (rtx x, int strict_p)
6080 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6083 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6085 The AP may be eliminated to either the SP or the FP, so we use the
6086 least common denominator, e.g. SImode, and offsets from 0 to 64.
6088 ??? Verify whether the above is the right approach.
6090 ??? Also, the FP may be eliminated to the SP, so perhaps that
6091 needs special handling also.
6093 ??? Look at how the mips16 port solves this problem. It probably uses
6094 better ways to solve some of these problems.
6096 Although it is not incorrect, we don't accept QImode and HImode
6097 addresses based on the frame pointer or arg pointer until the
6098 reload pass starts. This is so that eliminating such addresses
6099 into stack based ones won't produce impossible code. */
6101 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6103 /* ??? Not clear if this is right. Experiment. */
6104 if (GET_MODE_SIZE (mode) < 4
6105 && !(reload_in_progress || reload_completed)
6106 && (reg_mentioned_p (frame_pointer_rtx, x)
6107 || reg_mentioned_p (arg_pointer_rtx, x)
6108 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6109 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6110 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6111 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6114 /* Accept any base register. SP only in SImode or larger. */
6115 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6118 /* This is PC relative data before arm_reorg runs. */
6119 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6120 && GET_CODE (x) == SYMBOL_REF
6121 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6124 /* This is PC relative data after arm_reorg runs. */
6125 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6127 && (GET_CODE (x) == LABEL_REF
6128 || (GET_CODE (x) == CONST
6129 && GET_CODE (XEXP (x, 0)) == PLUS
6130 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6131 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6134 /* Post-inc indexing only supported for SImode and larger. */
6135 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6136 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6139 else if (GET_CODE (x) == PLUS)
6141 /* REG+REG address can be any two index registers. */
6142 /* We disallow FRAME+REG addressing since we know that FRAME
6143 will be replaced with STACK, and SP relative addressing only
6144 permits SP+OFFSET. */
6145 if (GET_MODE_SIZE (mode) <= 4
6146 && XEXP (x, 0) != frame_pointer_rtx
6147 && XEXP (x, 1) != frame_pointer_rtx
6148 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6149 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6150 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6153 /* REG+const has 5-7 bit offset for non-SP registers. */
6154 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6155 || XEXP (x, 0) == arg_pointer_rtx)
6156 && GET_CODE (XEXP (x, 1)) == CONST_INT
6157 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6160 /* REG+const has 10-bit offset for SP, but only SImode and
6161 larger is supported. */
6162 /* ??? Should probably check for DI/DFmode overflow here
6163 just like GO_IF_LEGITIMATE_OFFSET does. */
6164 else if (GET_CODE (XEXP (x, 0)) == REG
6165 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6166 && GET_MODE_SIZE (mode) >= 4
6167 && GET_CODE (XEXP (x, 1)) == CONST_INT
6168 && INTVAL (XEXP (x, 1)) >= 0
6169 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6170 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6173 else if (GET_CODE (XEXP (x, 0)) == REG
6174 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6175 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6176 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6177 && REGNO (XEXP (x, 0))
6178 <= LAST_VIRTUAL_POINTER_REGISTER))
6179 && GET_MODE_SIZE (mode) >= 4
6180 && GET_CODE (XEXP (x, 1)) == CONST_INT
6181 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6185 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6186 && GET_MODE_SIZE (mode) == 4
6187 && GET_CODE (x) == SYMBOL_REF
6188 && CONSTANT_POOL_ADDRESS_P (x)
6190 && symbol_mentioned_p (get_pool_constant (x))
6191 && ! pcrel_constant_p (get_pool_constant (x))))
6197 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6198 instruction of mode MODE. */
6200 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6202 switch (GET_MODE_SIZE (mode))
6205 return val >= 0 && val < 32;
6208 return val >= 0 && val < 64 && (val & 1) == 0;
6212 && (val + GET_MODE_SIZE (mode)) <= 128
6218 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6221 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6222 else if (TARGET_THUMB2)
6223 return thumb2_legitimate_address_p (mode, x, strict_p);
6224 else /* if (TARGET_THUMB1) */
6225 return thumb1_legitimate_address_p (mode, x, strict_p);
6228 /* Build the SYMBOL_REF for __tls_get_addr. */
6230 static GTY(()) rtx tls_get_addr_libfunc;
6233 get_tls_get_addr (void)
6235 if (!tls_get_addr_libfunc)
6236 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6237 return tls_get_addr_libfunc;
6241 arm_load_tp (rtx target)
6244 target = gen_reg_rtx (SImode);
6248 /* Can return in any reg. */
6249 emit_insn (gen_load_tp_hard (target));
6253 /* Always returned in r0. Immediately copy the result into a pseudo,
6254 otherwise other uses of r0 (e.g. setting up function arguments) may
6255 clobber the value. */
6259 emit_insn (gen_load_tp_soft ());
6261 tmp = gen_rtx_REG (SImode, 0);
6262 emit_move_insn (target, tmp);
6268 load_tls_operand (rtx x, rtx reg)
6272 if (reg == NULL_RTX)
6273 reg = gen_reg_rtx (SImode);
6275 tmp = gen_rtx_CONST (SImode, x);
6277 emit_move_insn (reg, tmp);
6283 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6285 rtx insns, label, labelno, sum;
6287 gcc_assert (reloc != TLS_DESCSEQ);
6290 labelno = GEN_INT (pic_labelno++);
6291 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6292 label = gen_rtx_CONST (VOIDmode, label);
6294 sum = gen_rtx_UNSPEC (Pmode,
6295 gen_rtvec (4, x, GEN_INT (reloc), label,
6296 GEN_INT (TARGET_ARM ? 8 : 4)),
6298 reg = load_tls_operand (sum, reg);
6301 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6303 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6305 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6306 LCT_PURE, /* LCT_CONST? */
6307 Pmode, 1, reg, Pmode);
6309 insns = get_insns ();
6316 arm_tls_descseq_addr (rtx x, rtx reg)
6318 rtx labelno = GEN_INT (pic_labelno++);
6319 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6320 rtx sum = gen_rtx_UNSPEC (Pmode,
6321 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6322 gen_rtx_CONST (VOIDmode, label),
6323 GEN_INT (!TARGET_ARM)),
6325 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6327 emit_insn (gen_tlscall (x, labelno));
6329 reg = gen_reg_rtx (SImode);
6331 gcc_assert (REGNO (reg) != 0);
6333 emit_move_insn (reg, reg0);
6339 legitimize_tls_address (rtx x, rtx reg)
6341 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6342 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6346 case TLS_MODEL_GLOBAL_DYNAMIC:
6347 if (TARGET_GNU2_TLS)
6349 reg = arm_tls_descseq_addr (x, reg);
6351 tp = arm_load_tp (NULL_RTX);
6353 dest = gen_rtx_PLUS (Pmode, tp, reg);
6357 /* Original scheme */
6358 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6359 dest = gen_reg_rtx (Pmode);
6360 emit_libcall_block (insns, dest, ret, x);
6364 case TLS_MODEL_LOCAL_DYNAMIC:
6365 if (TARGET_GNU2_TLS)
6367 reg = arm_tls_descseq_addr (x, reg);
6369 tp = arm_load_tp (NULL_RTX);
6371 dest = gen_rtx_PLUS (Pmode, tp, reg);
6375 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6377 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6378 share the LDM result with other LD model accesses. */
6379 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6381 dest = gen_reg_rtx (Pmode);
6382 emit_libcall_block (insns, dest, ret, eqv);
6384 /* Load the addend. */
6385 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6386 GEN_INT (TLS_LDO32)),
6388 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6389 dest = gen_rtx_PLUS (Pmode, dest, addend);
6393 case TLS_MODEL_INITIAL_EXEC:
6394 labelno = GEN_INT (pic_labelno++);
6395 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6396 label = gen_rtx_CONST (VOIDmode, label);
6397 sum = gen_rtx_UNSPEC (Pmode,
6398 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6399 GEN_INT (TARGET_ARM ? 8 : 4)),
6401 reg = load_tls_operand (sum, reg);
6404 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6405 else if (TARGET_THUMB2)
6406 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6409 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6410 emit_move_insn (reg, gen_const_mem (SImode, reg));
6413 tp = arm_load_tp (NULL_RTX);
6415 return gen_rtx_PLUS (Pmode, tp, reg);
6417 case TLS_MODEL_LOCAL_EXEC:
6418 tp = arm_load_tp (NULL_RTX);
6420 reg = gen_rtx_UNSPEC (Pmode,
6421 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6423 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6425 return gen_rtx_PLUS (Pmode, tp, reg);
6432 /* Try machine-dependent ways of modifying an illegitimate address
6433 to be legitimate. If we find one, return the new, valid address. */
6435 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6439 /* TODO: legitimize_address for Thumb2. */
6442 return thumb_legitimize_address (x, orig_x, mode);
6445 if (arm_tls_symbol_p (x))
6446 return legitimize_tls_address (x, NULL_RTX);
6448 if (GET_CODE (x) == PLUS)
6450 rtx xop0 = XEXP (x, 0);
6451 rtx xop1 = XEXP (x, 1);
6453 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6454 xop0 = force_reg (SImode, xop0);
6456 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6457 xop1 = force_reg (SImode, xop1);
6459 if (ARM_BASE_REGISTER_RTX_P (xop0)
6460 && GET_CODE (xop1) == CONST_INT)
6462 HOST_WIDE_INT n, low_n;
6466 /* VFP addressing modes actually allow greater offsets, but for
6467 now we just stick with the lowest common denominator. */
6469 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6481 low_n = ((mode) == TImode ? 0
6482 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6486 base_reg = gen_reg_rtx (SImode);
6487 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6488 emit_move_insn (base_reg, val);
6489 x = plus_constant (base_reg, low_n);
6491 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6492 x = gen_rtx_PLUS (SImode, xop0, xop1);
6495 /* XXX We don't allow MINUS any more -- see comment in
6496 arm_legitimate_address_outer_p (). */
6497 else if (GET_CODE (x) == MINUS)
6499 rtx xop0 = XEXP (x, 0);
6500 rtx xop1 = XEXP (x, 1);
6502 if (CONSTANT_P (xop0))
6503 xop0 = force_reg (SImode, xop0);
6505 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6506 xop1 = force_reg (SImode, xop1);
6508 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6509 x = gen_rtx_MINUS (SImode, xop0, xop1);
6512 /* Make sure to take full advantage of the pre-indexed addressing mode
6513 with absolute addresses which often allows for the base register to
6514 be factorized for multiple adjacent memory references, and it might
6515 even allows for the mini pool to be avoided entirely. */
6516 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6519 HOST_WIDE_INT mask, base, index;
6522 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6523 use a 8-bit index. So let's use a 12-bit index for SImode only and
6524 hope that arm_gen_constant will enable ldrb to use more bits. */
6525 bits = (mode == SImode) ? 12 : 8;
6526 mask = (1 << bits) - 1;
6527 base = INTVAL (x) & ~mask;
6528 index = INTVAL (x) & mask;
6529 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6531 /* It'll most probably be more efficient to generate the base
6532 with more bits set and use a negative index instead. */
6536 base_reg = force_reg (SImode, GEN_INT (base));
6537 x = plus_constant (base_reg, index);
6542 /* We need to find and carefully transform any SYMBOL and LABEL
6543 references; so go back to the original address expression. */
6544 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6546 if (new_x != orig_x)
6554 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6555 to be legitimate. If we find one, return the new, valid address. */
6557 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6559 if (arm_tls_symbol_p (x))
6560 return legitimize_tls_address (x, NULL_RTX);
6562 if (GET_CODE (x) == PLUS
6563 && GET_CODE (XEXP (x, 1)) == CONST_INT
6564 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6565 || INTVAL (XEXP (x, 1)) < 0))
6567 rtx xop0 = XEXP (x, 0);
6568 rtx xop1 = XEXP (x, 1);
6569 HOST_WIDE_INT offset = INTVAL (xop1);
6571 /* Try and fold the offset into a biasing of the base register and
6572 then offsetting that. Don't do this when optimizing for space
6573 since it can cause too many CSEs. */
6574 if (optimize_size && offset >= 0
6575 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6577 HOST_WIDE_INT delta;
6580 delta = offset - (256 - GET_MODE_SIZE (mode));
6581 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6582 delta = 31 * GET_MODE_SIZE (mode);
6584 delta = offset & (~31 * GET_MODE_SIZE (mode));
6586 xop0 = force_operand (plus_constant (xop0, offset - delta),
6588 x = plus_constant (xop0, delta);
6590 else if (offset < 0 && offset > -256)
6591 /* Small negative offsets are best done with a subtract before the
6592 dereference, forcing these into a register normally takes two
6594 x = force_operand (x, NULL_RTX);
6597 /* For the remaining cases, force the constant into a register. */
6598 xop1 = force_reg (SImode, xop1);
6599 x = gen_rtx_PLUS (SImode, xop0, xop1);
6602 else if (GET_CODE (x) == PLUS
6603 && s_register_operand (XEXP (x, 1), SImode)
6604 && !s_register_operand (XEXP (x, 0), SImode))
6606 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6608 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6613 /* We need to find and carefully transform any SYMBOL and LABEL
6614 references; so go back to the original address expression. */
6615 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6617 if (new_x != orig_x)
6625 arm_legitimize_reload_address (rtx *p,
6626 enum machine_mode mode,
6627 int opnum, int type,
6628 int ind_levels ATTRIBUTE_UNUSED)
6630 /* We must recognize output that we have already generated ourselves. */
6631 if (GET_CODE (*p) == PLUS
6632 && GET_CODE (XEXP (*p, 0)) == PLUS
6633 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6634 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6635 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6637 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6638 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6639 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6643 if (GET_CODE (*p) == PLUS
6644 && GET_CODE (XEXP (*p, 0)) == REG
6645 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6646 /* If the base register is equivalent to a constant, let the generic
6647 code handle it. Otherwise we will run into problems if a future
6648 reload pass decides to rematerialize the constant. */
6649 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6650 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6652 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6653 HOST_WIDE_INT low, high;
6655 /* Detect coprocessor load/stores. */
6656 bool coproc_p = ((TARGET_HARD_FLOAT
6657 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6658 && (mode == SFmode || mode == DFmode
6659 || (mode == DImode && TARGET_MAVERICK)))
6660 || (TARGET_REALLY_IWMMXT
6661 && VALID_IWMMXT_REG_MODE (mode))
6663 && (VALID_NEON_DREG_MODE (mode)
6664 || VALID_NEON_QREG_MODE (mode))));
6666 /* For some conditions, bail out when lower two bits are unaligned. */
6667 if ((val & 0x3) != 0
6668 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6670 /* For DI, and DF under soft-float: */
6671 || ((mode == DImode || mode == DFmode)
6672 /* Without ldrd, we use stm/ldm, which does not
6673 fair well with unaligned bits. */
6675 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6676 || TARGET_THUMB2))))
6679 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6680 of which the (reg+high) gets turned into a reload add insn,
6681 we try to decompose the index into high/low values that can often
6682 also lead to better reload CSE.
6684 ldr r0, [r2, #4100] // Offset too large
6685 ldr r1, [r2, #4104] // Offset too large
6687 is best reloaded as:
6693 which post-reload CSE can simplify in most cases to eliminate the
6694 second add instruction:
6699 The idea here is that we want to split out the bits of the constant
6700 as a mask, rather than as subtracting the maximum offset that the
6701 respective type of load/store used can handle.
6703 When encountering negative offsets, we can still utilize it even if
6704 the overall offset is positive; sometimes this may lead to an immediate
6705 that can be constructed with fewer instructions.
6707 ldr r0, [r2, #0x3FFFFC]
6709 This is best reloaded as:
6710 add t1, r2, #0x400000
6713 The trick for spotting this for a load insn with N bits of offset
6714 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6715 negative offset that is going to make bit N and all the bits below
6716 it become zero in the remainder part.
6718 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6719 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6720 used in most cases of ARM load/store instructions. */
6722 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6723 (((VAL) & ((1 << (N)) - 1)) \
6724 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6729 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6731 /* NEON quad-word load/stores are made of two double-word accesses,
6732 so the valid index range is reduced by 8. Treat as 9-bit range if
6734 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6735 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6737 else if (GET_MODE_SIZE (mode) == 8)
6740 low = (TARGET_THUMB2
6741 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6742 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6744 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6745 to access doublewords. The supported load/store offsets are
6746 -8, -4, and 4, which we try to produce here. */
6747 low = ((val & 0xf) ^ 0x8) - 0x8;
6749 else if (GET_MODE_SIZE (mode) < 8)
6751 /* NEON element load/stores do not have an offset. */
6752 if (TARGET_NEON_FP16 && mode == HFmode)
6757 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6758 Try the wider 12-bit range first, and re-try if the result
6760 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6762 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6766 if (mode == HImode || mode == HFmode)
6769 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6772 /* The storehi/movhi_bytes fallbacks can use only
6773 [-4094,+4094] of the full ldrb/strb index range. */
6774 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6775 if (low == 4095 || low == -4095)
6780 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6786 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6787 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6788 - (unsigned HOST_WIDE_INT) 0x80000000);
6789 /* Check for overflow or zero */
6790 if (low == 0 || high == 0 || (high + low != val))
6793 /* Reload the high part into a base reg; leave the low part
6795 *p = gen_rtx_PLUS (GET_MODE (*p),
6796 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6799 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6800 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6801 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6809 thumb_legitimize_reload_address (rtx *x_p,
6810 enum machine_mode mode,
6811 int opnum, int type,
6812 int ind_levels ATTRIBUTE_UNUSED)
6816 if (GET_CODE (x) == PLUS
6817 && GET_MODE_SIZE (mode) < 4
6818 && REG_P (XEXP (x, 0))
6819 && XEXP (x, 0) == stack_pointer_rtx
6820 && GET_CODE (XEXP (x, 1)) == CONST_INT
6821 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6826 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6827 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6831 /* If both registers are hi-regs, then it's better to reload the
6832 entire expression rather than each register individually. That
6833 only requires one reload register rather than two. */
6834 if (GET_CODE (x) == PLUS
6835 && REG_P (XEXP (x, 0))
6836 && REG_P (XEXP (x, 1))
6837 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6838 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6843 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6844 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6851 /* Test for various thread-local symbols. */
6853 /* Return TRUE if X is a thread-local symbol. */
6856 arm_tls_symbol_p (rtx x)
6858 if (! TARGET_HAVE_TLS)
6861 if (GET_CODE (x) != SYMBOL_REF)
6864 return SYMBOL_REF_TLS_MODEL (x) != 0;
6867 /* Helper for arm_tls_referenced_p. */
6870 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6872 if (GET_CODE (*x) == SYMBOL_REF)
6873 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6875 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6876 TLS offsets, not real symbol references. */
6877 if (GET_CODE (*x) == UNSPEC
6878 && XINT (*x, 1) == UNSPEC_TLS)
6884 /* Return TRUE if X contains any TLS symbol references. */
6887 arm_tls_referenced_p (rtx x)
6889 if (! TARGET_HAVE_TLS)
6892 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6895 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6897 On the ARM, allow any integer (invalid ones are removed later by insn
6898 patterns), nice doubles and symbol_refs which refer to the function's
6901 When generating pic allow anything. */
6904 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6906 /* At present, we have no support for Neon structure constants, so forbid
6907 them here. It might be possible to handle simple cases like 0 and -1
6909 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6912 return flag_pic || !label_mentioned_p (x);
6916 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6918 return (GET_CODE (x) == CONST_INT
6919 || GET_CODE (x) == CONST_DOUBLE
6920 || CONSTANT_ADDRESS_P (x)
6925 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6927 return (!arm_cannot_force_const_mem (mode, x)
6929 ? arm_legitimate_constant_p_1 (mode, x)
6930 : thumb_legitimate_constant_p (mode, x)));
6933 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6936 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6940 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6942 split_const (x, &base, &offset);
6943 if (GET_CODE (base) == SYMBOL_REF
6944 && !offset_within_block_p (base, INTVAL (offset)))
6947 return arm_tls_referenced_p (x);
6950 #define REG_OR_SUBREG_REG(X) \
6951 (GET_CODE (X) == REG \
6952 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6954 #define REG_OR_SUBREG_RTX(X) \
6955 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6958 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6960 enum machine_mode mode = GET_MODE (x);
6974 return COSTS_N_INSNS (1);
6977 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6980 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6987 return COSTS_N_INSNS (2) + cycles;
6989 return COSTS_N_INSNS (1) + 16;
6992 return (COSTS_N_INSNS (1)
6993 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6994 + GET_CODE (SET_DEST (x)) == MEM));
6999 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7001 if (thumb_shiftable_const (INTVAL (x)))
7002 return COSTS_N_INSNS (2);
7003 return COSTS_N_INSNS (3);
7005 else if ((outer == PLUS || outer == COMPARE)
7006 && INTVAL (x) < 256 && INTVAL (x) > -256)
7008 else if ((outer == IOR || outer == XOR || outer == AND)
7009 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7010 return COSTS_N_INSNS (1);
7011 else if (outer == AND)
7014 /* This duplicates the tests in the andsi3 expander. */
7015 for (i = 9; i <= 31; i++)
7016 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7017 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7018 return COSTS_N_INSNS (2);
7020 else if (outer == ASHIFT || outer == ASHIFTRT
7021 || outer == LSHIFTRT)
7023 return COSTS_N_INSNS (2);
7029 return COSTS_N_INSNS (3);
7047 /* XXX another guess. */
7048 /* Memory costs quite a lot for the first word, but subsequent words
7049 load at the equivalent of a single insn each. */
7050 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7051 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7056 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7062 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7063 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7069 return total + COSTS_N_INSNS (1);
7071 /* Assume a two-shift sequence. Increase the cost slightly so
7072 we prefer actual shifts over an extend operation. */
7073 return total + 1 + COSTS_N_INSNS (2);
7081 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7083 enum machine_mode mode = GET_MODE (x);
7084 enum rtx_code subcode;
7086 enum rtx_code code = GET_CODE (x);
7092 /* Memory costs quite a lot for the first word, but subsequent words
7093 load at the equivalent of a single insn each. */
7094 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7101 if (TARGET_HARD_FLOAT && mode == SFmode)
7102 *total = COSTS_N_INSNS (2);
7103 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7104 *total = COSTS_N_INSNS (4);
7106 *total = COSTS_N_INSNS (20);
7110 if (GET_CODE (XEXP (x, 1)) == REG)
7111 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7112 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7113 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7119 *total += COSTS_N_INSNS (4);
7124 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7125 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7128 *total += COSTS_N_INSNS (3);
7132 *total += COSTS_N_INSNS (1);
7133 /* Increase the cost of complex shifts because they aren't any faster,
7134 and reduce dual issue opportunities. */
7135 if (arm_tune_cortex_a9
7136 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7144 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7145 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7146 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7148 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7152 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7153 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7155 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7162 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7164 if (TARGET_HARD_FLOAT
7166 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7168 *total = COSTS_N_INSNS (1);
7169 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7170 && arm_const_double_rtx (XEXP (x, 0)))
7172 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7176 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7177 && arm_const_double_rtx (XEXP (x, 1)))
7179 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7185 *total = COSTS_N_INSNS (20);
7189 *total = COSTS_N_INSNS (1);
7190 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7191 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7193 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7197 subcode = GET_CODE (XEXP (x, 1));
7198 if (subcode == ASHIFT || subcode == ASHIFTRT
7199 || subcode == LSHIFTRT
7200 || subcode == ROTATE || subcode == ROTATERT)
7202 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7203 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7207 /* A shift as a part of RSB costs no more than RSB itself. */
7208 if (GET_CODE (XEXP (x, 0)) == MULT
7209 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7211 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7212 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7217 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7219 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7220 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7224 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7225 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7227 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7228 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7229 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7230 *total += COSTS_N_INSNS (1);
7238 if (code == PLUS && arm_arch6 && mode == SImode
7239 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7240 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7242 *total = COSTS_N_INSNS (1);
7243 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7245 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7249 /* MLA: All arguments must be registers. We filter out
7250 multiplication by a power of two, so that we fall down into
7252 if (GET_CODE (XEXP (x, 0)) == MULT
7253 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7255 /* The cost comes from the cost of the multiply. */
7259 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7261 if (TARGET_HARD_FLOAT
7263 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7265 *total = COSTS_N_INSNS (1);
7266 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7267 && arm_const_double_rtx (XEXP (x, 1)))
7269 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7276 *total = COSTS_N_INSNS (20);
7280 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7281 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7283 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7284 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7285 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7286 *total += COSTS_N_INSNS (1);
7292 case AND: case XOR: case IOR:
7294 /* Normally the frame registers will be spilt into reg+const during
7295 reload, so it is a bad idea to combine them with other instructions,
7296 since then they might not be moved outside of loops. As a compromise
7297 we allow integration with ops that have a constant as their second
7299 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7300 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7301 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7302 *total = COSTS_N_INSNS (1);
7306 *total += COSTS_N_INSNS (2);
7307 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7308 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7310 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7317 *total += COSTS_N_INSNS (1);
7318 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7319 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7321 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7324 subcode = GET_CODE (XEXP (x, 0));
7325 if (subcode == ASHIFT || subcode == ASHIFTRT
7326 || subcode == LSHIFTRT
7327 || subcode == ROTATE || subcode == ROTATERT)
7329 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7330 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7335 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7337 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7338 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7342 if (subcode == UMIN || subcode == UMAX
7343 || subcode == SMIN || subcode == SMAX)
7345 *total = COSTS_N_INSNS (3);
7352 /* This should have been handled by the CPU specific routines. */
7356 if (arm_arch3m && mode == SImode
7357 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7358 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7359 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7360 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7361 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7362 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7364 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7367 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7371 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7373 if (TARGET_HARD_FLOAT
7375 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7377 *total = COSTS_N_INSNS (1);
7380 *total = COSTS_N_INSNS (2);
7386 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7387 if (mode == SImode && code == NOT)
7389 subcode = GET_CODE (XEXP (x, 0));
7390 if (subcode == ASHIFT || subcode == ASHIFTRT
7391 || subcode == LSHIFTRT
7392 || subcode == ROTATE || subcode == ROTATERT
7394 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7396 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7397 /* Register shifts cost an extra cycle. */
7398 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7399 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7408 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7410 *total = COSTS_N_INSNS (4);
7414 operand = XEXP (x, 0);
7416 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7417 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7418 && GET_CODE (XEXP (operand, 0)) == REG
7419 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7420 *total += COSTS_N_INSNS (1);
7421 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7422 + rtx_cost (XEXP (x, 2), code, 2, speed));
7426 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7428 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7434 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7435 && mode == SImode && XEXP (x, 1) == const0_rtx)
7437 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7443 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7444 && mode == SImode && XEXP (x, 1) == const0_rtx)
7446 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7466 /* SCC insns. In the case where the comparison has already been
7467 performed, then they cost 2 instructions. Otherwise they need
7468 an additional comparison before them. */
7469 *total = COSTS_N_INSNS (2);
7470 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7477 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7483 *total += COSTS_N_INSNS (1);
7484 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7485 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7487 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7491 subcode = GET_CODE (XEXP (x, 0));
7492 if (subcode == ASHIFT || subcode == ASHIFTRT
7493 || subcode == LSHIFTRT
7494 || subcode == ROTATE || subcode == ROTATERT)
7496 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7497 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7502 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7504 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7505 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7515 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7516 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7517 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7518 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7522 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7524 if (TARGET_HARD_FLOAT
7526 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7528 *total = COSTS_N_INSNS (1);
7531 *total = COSTS_N_INSNS (20);
7534 *total = COSTS_N_INSNS (1);
7536 *total += COSTS_N_INSNS (3);
7542 if (GET_MODE_CLASS (mode) == MODE_INT)
7544 rtx op = XEXP (x, 0);
7545 enum machine_mode opmode = GET_MODE (op);
7548 *total += COSTS_N_INSNS (1);
7550 if (opmode != SImode)
7554 /* If !arm_arch4, we use one of the extendhisi2_mem
7555 or movhi_bytes patterns for HImode. For a QImode
7556 sign extension, we first zero-extend from memory
7557 and then perform a shift sequence. */
7558 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7559 *total += COSTS_N_INSNS (2);
7562 *total += COSTS_N_INSNS (1);
7564 /* We don't have the necessary insn, so we need to perform some
7566 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7567 /* An and with constant 255. */
7568 *total += COSTS_N_INSNS (1);
7570 /* A shift sequence. Increase costs slightly to avoid
7571 combining two shifts into an extend operation. */
7572 *total += COSTS_N_INSNS (2) + 1;
7578 switch (GET_MODE (XEXP (x, 0)))
7585 *total = COSTS_N_INSNS (1);
7595 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7599 if (const_ok_for_arm (INTVAL (x))
7600 || const_ok_for_arm (~INTVAL (x)))
7601 *total = COSTS_N_INSNS (1);
7603 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7604 INTVAL (x), NULL_RTX,
7611 *total = COSTS_N_INSNS (3);
7615 *total = COSTS_N_INSNS (1);
7619 *total = COSTS_N_INSNS (1);
7620 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7624 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7625 && (mode == SFmode || !TARGET_VFP_SINGLE))
7626 *total = COSTS_N_INSNS (1);
7628 *total = COSTS_N_INSNS (4);
7635 *total = COSTS_N_INSNS (4);
7640 /* Estimates the size cost of thumb1 instructions.
7641 For now most of the code is copied from thumb1_rtx_costs. We need more
7642 fine grain tuning when we have more related test cases. */
7644 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7646 enum machine_mode mode = GET_MODE (x);
7659 return COSTS_N_INSNS (1);
7662 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7664 /* Thumb1 mul instruction can't operate on const. We must Load it
7665 into a register first. */
7666 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7667 return COSTS_N_INSNS (1) + const_size;
7669 return COSTS_N_INSNS (1);
7672 return (COSTS_N_INSNS (1)
7673 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7674 + GET_CODE (SET_DEST (x)) == MEM));
7679 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7680 return COSTS_N_INSNS (1);
7681 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7682 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7683 return COSTS_N_INSNS (2);
7684 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7685 if (thumb_shiftable_const (INTVAL (x)))
7686 return COSTS_N_INSNS (2);
7687 return COSTS_N_INSNS (3);
7689 else if ((outer == PLUS || outer == COMPARE)
7690 && INTVAL (x) < 256 && INTVAL (x) > -256)
7692 else if ((outer == IOR || outer == XOR || outer == AND)
7693 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7694 return COSTS_N_INSNS (1);
7695 else if (outer == AND)
7698 /* This duplicates the tests in the andsi3 expander. */
7699 for (i = 9; i <= 31; i++)
7700 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7701 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7702 return COSTS_N_INSNS (2);
7704 else if (outer == ASHIFT || outer == ASHIFTRT
7705 || outer == LSHIFTRT)
7707 return COSTS_N_INSNS (2);
7713 return COSTS_N_INSNS (3);
7731 /* XXX another guess. */
7732 /* Memory costs quite a lot for the first word, but subsequent words
7733 load at the equivalent of a single insn each. */
7734 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7735 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7740 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7745 /* XXX still guessing. */
7746 switch (GET_MODE (XEXP (x, 0)))
7749 return (1 + (mode == DImode ? 4 : 0)
7750 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7753 return (4 + (mode == DImode ? 4 : 0)
7754 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7757 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7768 /* RTX costs when optimizing for size. */
7770 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7773 enum machine_mode mode = GET_MODE (x);
7776 *total = thumb1_size_rtx_costs (x, code, outer_code);
7780 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7784 /* A memory access costs 1 insn if the mode is small, or the address is
7785 a single register, otherwise it costs one insn per word. */
7786 if (REG_P (XEXP (x, 0)))
7787 *total = COSTS_N_INSNS (1);
7789 && GET_CODE (XEXP (x, 0)) == PLUS
7790 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7791 /* This will be split into two instructions.
7792 See arm.md:calculate_pic_address. */
7793 *total = COSTS_N_INSNS (2);
7795 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7802 /* Needs a libcall, so it costs about this. */
7803 *total = COSTS_N_INSNS (2);
7807 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7809 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7817 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7819 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7822 else if (mode == SImode)
7824 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7825 /* Slightly disparage register shifts, but not by much. */
7826 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7827 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7831 /* Needs a libcall. */
7832 *total = COSTS_N_INSNS (2);
7836 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7837 && (mode == SFmode || !TARGET_VFP_SINGLE))
7839 *total = COSTS_N_INSNS (1);
7845 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7846 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7848 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7849 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7850 || subcode1 == ROTATE || subcode1 == ROTATERT
7851 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7852 || subcode1 == ASHIFTRT)
7854 /* It's just the cost of the two operands. */
7859 *total = COSTS_N_INSNS (1);
7863 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7867 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7868 && (mode == SFmode || !TARGET_VFP_SINGLE))
7870 *total = COSTS_N_INSNS (1);
7874 /* A shift as a part of ADD costs nothing. */
7875 if (GET_CODE (XEXP (x, 0)) == MULT
7876 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7878 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7879 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7880 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7885 case AND: case XOR: case IOR:
7888 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7890 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7891 || subcode == LSHIFTRT || subcode == ASHIFTRT
7892 || (code == AND && subcode == NOT))
7894 /* It's just the cost of the two operands. */
7900 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7904 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7908 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7909 && (mode == SFmode || !TARGET_VFP_SINGLE))
7911 *total = COSTS_N_INSNS (1);
7917 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7926 if (cc_register (XEXP (x, 0), VOIDmode))
7929 *total = COSTS_N_INSNS (1);
7933 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7934 && (mode == SFmode || !TARGET_VFP_SINGLE))
7935 *total = COSTS_N_INSNS (1);
7937 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7942 return arm_rtx_costs_1 (x, outer_code, total, 0);
7945 if (const_ok_for_arm (INTVAL (x)))
7946 /* A multiplication by a constant requires another instruction
7947 to load the constant to a register. */
7948 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7950 else if (const_ok_for_arm (~INTVAL (x)))
7951 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7952 else if (const_ok_for_arm (-INTVAL (x)))
7954 if (outer_code == COMPARE || outer_code == PLUS
7955 || outer_code == MINUS)
7958 *total = COSTS_N_INSNS (1);
7961 *total = COSTS_N_INSNS (2);
7967 *total = COSTS_N_INSNS (2);
7971 *total = COSTS_N_INSNS (4);
7976 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7977 cost of these slightly. */
7978 *total = COSTS_N_INSNS (1) + 1;
7985 if (mode != VOIDmode)
7986 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7988 *total = COSTS_N_INSNS (4); /* How knows? */
7993 /* RTX costs when optimizing for size. */
7995 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7996 int *total, bool speed)
7999 return arm_size_rtx_costs (x, (enum rtx_code) code,
8000 (enum rtx_code) outer_code, total);
8002 return current_tune->rtx_costs (x, (enum rtx_code) code,
8003 (enum rtx_code) outer_code,
8007 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8008 supported on any "slowmul" cores, so it can be ignored. */
8011 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8012 int *total, bool speed)
8014 enum machine_mode mode = GET_MODE (x);
8018 *total = thumb1_rtx_costs (x, code, outer_code);
8025 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8028 *total = COSTS_N_INSNS (20);
8032 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8034 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8035 & (unsigned HOST_WIDE_INT) 0xffffffff);
8036 int cost, const_ok = const_ok_for_arm (i);
8037 int j, booth_unit_size;
8039 /* Tune as appropriate. */
8040 cost = const_ok ? 4 : 8;
8041 booth_unit_size = 2;
8042 for (j = 0; i && j < 32; j += booth_unit_size)
8044 i >>= booth_unit_size;
8048 *total = COSTS_N_INSNS (cost);
8049 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8053 *total = COSTS_N_INSNS (20);
8057 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8062 /* RTX cost for cores with a fast multiply unit (M variants). */
8065 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8066 int *total, bool speed)
8068 enum machine_mode mode = GET_MODE (x);
8072 *total = thumb1_rtx_costs (x, code, outer_code);
8076 /* ??? should thumb2 use different costs? */
8080 /* There is no point basing this on the tuning, since it is always the
8081 fast variant if it exists at all. */
8083 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8084 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8085 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8087 *total = COSTS_N_INSNS(2);
8094 *total = COSTS_N_INSNS (5);
8098 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8100 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8101 & (unsigned HOST_WIDE_INT) 0xffffffff);
8102 int cost, const_ok = const_ok_for_arm (i);
8103 int j, booth_unit_size;
8105 /* Tune as appropriate. */
8106 cost = const_ok ? 4 : 8;
8107 booth_unit_size = 8;
8108 for (j = 0; i && j < 32; j += booth_unit_size)
8110 i >>= booth_unit_size;
8114 *total = COSTS_N_INSNS(cost);
8120 *total = COSTS_N_INSNS (4);
8124 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8126 if (TARGET_HARD_FLOAT
8128 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8130 *total = COSTS_N_INSNS (1);
8135 /* Requires a lib call */
8136 *total = COSTS_N_INSNS (20);
8140 return arm_rtx_costs_1 (x, outer_code, total, speed);
8145 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8146 so it can be ignored. */
8149 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8150 int *total, bool speed)
8152 enum machine_mode mode = GET_MODE (x);
8156 *total = thumb1_rtx_costs (x, code, outer_code);
8163 if (GET_CODE (XEXP (x, 0)) != MULT)
8164 return arm_rtx_costs_1 (x, outer_code, total, speed);
8166 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8167 will stall until the multiplication is complete. */
8168 *total = COSTS_N_INSNS (3);
8172 /* There is no point basing this on the tuning, since it is always the
8173 fast variant if it exists at all. */
8175 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8176 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8177 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8179 *total = COSTS_N_INSNS (2);
8186 *total = COSTS_N_INSNS (5);
8190 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8192 /* If operand 1 is a constant we can more accurately
8193 calculate the cost of the multiply. The multiplier can
8194 retire 15 bits on the first cycle and a further 12 on the
8195 second. We do, of course, have to load the constant into
8196 a register first. */
8197 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8198 /* There's a general overhead of one cycle. */
8200 unsigned HOST_WIDE_INT masked_const;
8205 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8207 masked_const = i & 0xffff8000;
8208 if (masked_const != 0)
8211 masked_const = i & 0xf8000000;
8212 if (masked_const != 0)
8215 *total = COSTS_N_INSNS (cost);
8221 *total = COSTS_N_INSNS (3);
8225 /* Requires a lib call */
8226 *total = COSTS_N_INSNS (20);
8230 return arm_rtx_costs_1 (x, outer_code, total, speed);
8235 /* RTX costs for 9e (and later) cores. */
8238 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8239 int *total, bool speed)
8241 enum machine_mode mode = GET_MODE (x);
8248 *total = COSTS_N_INSNS (3);
8252 *total = thumb1_rtx_costs (x, code, outer_code);
8260 /* There is no point basing this on the tuning, since it is always the
8261 fast variant if it exists at all. */
8263 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8264 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8265 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8267 *total = COSTS_N_INSNS (2);
8274 *total = COSTS_N_INSNS (5);
8280 *total = COSTS_N_INSNS (2);
8284 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8286 if (TARGET_HARD_FLOAT
8288 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8290 *total = COSTS_N_INSNS (1);
8295 *total = COSTS_N_INSNS (20);
8299 return arm_rtx_costs_1 (x, outer_code, total, speed);
8302 /* All address computations that can be done are free, but rtx cost returns
8303 the same for practically all of them. So we weight the different types
8304 of address here in the order (most pref first):
8305 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8307 arm_arm_address_cost (rtx x)
8309 enum rtx_code c = GET_CODE (x);
8311 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8313 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8318 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8321 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8331 arm_thumb_address_cost (rtx x)
8333 enum rtx_code c = GET_CODE (x);
8338 && GET_CODE (XEXP (x, 0)) == REG
8339 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8346 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8348 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8351 /* Adjust cost hook for XScale. */
8353 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8355 /* Some true dependencies can have a higher cost depending
8356 on precisely how certain input operands are used. */
8357 if (REG_NOTE_KIND(link) == 0
8358 && recog_memoized (insn) >= 0
8359 && recog_memoized (dep) >= 0)
8361 int shift_opnum = get_attr_shift (insn);
8362 enum attr_type attr_type = get_attr_type (dep);
8364 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8365 operand for INSN. If we have a shifted input operand and the
8366 instruction we depend on is another ALU instruction, then we may
8367 have to account for an additional stall. */
8368 if (shift_opnum != 0
8369 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8371 rtx shifted_operand;
8374 /* Get the shifted operand. */
8375 extract_insn (insn);
8376 shifted_operand = recog_data.operand[shift_opnum];
8378 /* Iterate over all the operands in DEP. If we write an operand
8379 that overlaps with SHIFTED_OPERAND, then we have increase the
8380 cost of this dependency. */
8382 preprocess_constraints ();
8383 for (opno = 0; opno < recog_data.n_operands; opno++)
8385 /* We can ignore strict inputs. */
8386 if (recog_data.operand_type[opno] == OP_IN)
8389 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8401 /* Adjust cost hook for Cortex A9. */
8403 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8405 switch (REG_NOTE_KIND (link))
8412 case REG_DEP_OUTPUT:
8413 if (recog_memoized (insn) >= 0
8414 && recog_memoized (dep) >= 0)
8416 if (GET_CODE (PATTERN (insn)) == SET)
8419 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8421 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8423 enum attr_type attr_type_insn = get_attr_type (insn);
8424 enum attr_type attr_type_dep = get_attr_type (dep);
8426 /* By default all dependencies of the form
8429 have an extra latency of 1 cycle because
8430 of the input and output dependency in this
8431 case. However this gets modeled as an true
8432 dependency and hence all these checks. */
8433 if (REG_P (SET_DEST (PATTERN (insn)))
8434 && REG_P (SET_DEST (PATTERN (dep)))
8435 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8436 SET_DEST (PATTERN (dep))))
8438 /* FMACS is a special case where the dependant
8439 instruction can be issued 3 cycles before
8440 the normal latency in case of an output
8442 if ((attr_type_insn == TYPE_FMACS
8443 || attr_type_insn == TYPE_FMACD)
8444 && (attr_type_dep == TYPE_FMACS
8445 || attr_type_dep == TYPE_FMACD))
8447 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8448 *cost = insn_default_latency (dep) - 3;
8450 *cost = insn_default_latency (dep);
8455 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8456 *cost = insn_default_latency (dep) + 1;
8458 *cost = insn_default_latency (dep);
8474 /* Adjust cost hook for FA726TE. */
8476 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8478 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8479 have penalty of 3. */
8480 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8481 && recog_memoized (insn) >= 0
8482 && recog_memoized (dep) >= 0
8483 && get_attr_conds (dep) == CONDS_SET)
8485 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8486 if (get_attr_conds (insn) == CONDS_USE
8487 && get_attr_type (insn) != TYPE_BRANCH)
8493 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8494 || get_attr_conds (insn) == CONDS_USE)
8504 /* Implement TARGET_REGISTER_MOVE_COST.
8506 Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8507 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8508 it is typically more expensive than a single memory access. We set
8509 the cost to less than two memory accesses so that floating
8510 point to integer conversion does not go through memory. */
8513 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8514 reg_class_t from, reg_class_t to)
8518 if ((from == FPA_REGS && to != FPA_REGS)
8519 || (from != FPA_REGS && to == FPA_REGS))
8521 else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8522 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8524 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8525 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8527 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8529 else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8530 || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8537 if (from == HI_REGS || to == HI_REGS)
8544 /* Implement TARGET_MEMORY_MOVE_COST. */
8547 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8548 bool in ATTRIBUTE_UNUSED)
8554 if (GET_MODE_SIZE (mode) < 4)
8557 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8561 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8562 It corrects the value of COST based on the relationship between
8563 INSN and DEP through the dependence LINK. It returns the new
8564 value. There is a per-core adjust_cost hook to adjust scheduler costs
8565 and the per-core hook can choose to completely override the generic
8566 adjust_cost function. Only put bits of code into arm_adjust_cost that
8567 are common across all cores. */
8569 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8573 /* When generating Thumb-1 code, we want to place flag-setting operations
8574 close to a conditional branch which depends on them, so that we can
8575 omit the comparison. */
8577 && REG_NOTE_KIND (link) == 0
8578 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8579 && recog_memoized (dep) >= 0
8580 && get_attr_conds (dep) == CONDS_SET)
8583 if (current_tune->sched_adjust_cost != NULL)
8585 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8589 /* XXX This is not strictly true for the FPA. */
8590 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8591 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8594 /* Call insns don't incur a stall, even if they follow a load. */
8595 if (REG_NOTE_KIND (link) == 0
8596 && GET_CODE (insn) == CALL_INSN)
8599 if ((i_pat = single_set (insn)) != NULL
8600 && GET_CODE (SET_SRC (i_pat)) == MEM
8601 && (d_pat = single_set (dep)) != NULL
8602 && GET_CODE (SET_DEST (d_pat)) == MEM)
8604 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8605 /* This is a load after a store, there is no conflict if the load reads
8606 from a cached area. Assume that loads from the stack, and from the
8607 constant pool are cached, and that others will miss. This is a
8610 if ((GET_CODE (src_mem) == SYMBOL_REF
8611 && CONSTANT_POOL_ADDRESS_P (src_mem))
8612 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8613 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8614 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8622 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8625 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8627 return (optimize > 0) ? 2 : 0;
8631 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8633 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8636 static int fp_consts_inited = 0;
8638 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8639 static const char * const strings_fp[8] =
8642 "4", "5", "0.5", "10"
8645 static REAL_VALUE_TYPE values_fp[8];
8648 init_fp_table (void)
8654 fp_consts_inited = 1;
8656 fp_consts_inited = 8;
8658 for (i = 0; i < fp_consts_inited; i++)
8660 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8665 /* Return TRUE if rtx X is a valid immediate FP constant. */
8667 arm_const_double_rtx (rtx x)
8672 if (!fp_consts_inited)
8675 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8676 if (REAL_VALUE_MINUS_ZERO (r))
8679 for (i = 0; i < fp_consts_inited; i++)
8680 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8686 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8688 neg_const_double_rtx_ok_for_fpa (rtx x)
8693 if (!fp_consts_inited)
8696 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8697 r = real_value_negate (&r);
8698 if (REAL_VALUE_MINUS_ZERO (r))
8701 for (i = 0; i < 8; i++)
8702 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8709 /* VFPv3 has a fairly wide range of representable immediates, formed from
8710 "quarter-precision" floating-point values. These can be evaluated using this
8711 formula (with ^ for exponentiation):
8715 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8716 16 <= n <= 31 and 0 <= r <= 7.
8718 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8720 - A (most-significant) is the sign bit.
8721 - BCD are the exponent (encoded as r XOR 3).
8722 - EFGH are the mantissa (encoded as n - 16).
8725 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8726 fconst[sd] instruction, or -1 if X isn't suitable. */
8728 vfp3_const_double_index (rtx x)
8730 REAL_VALUE_TYPE r, m;
8732 unsigned HOST_WIDE_INT mantissa, mant_hi;
8733 unsigned HOST_WIDE_INT mask;
8734 HOST_WIDE_INT m1, m2;
8735 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8737 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8740 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8742 /* We can't represent these things, so detect them first. */
8743 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8746 /* Extract sign, exponent and mantissa. */
8747 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8748 r = real_value_abs (&r);
8749 exponent = REAL_EXP (&r);
8750 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8751 highest (sign) bit, with a fixed binary point at bit point_pos.
8752 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8753 bits for the mantissa, this may fail (low bits would be lost). */
8754 real_ldexp (&m, &r, point_pos - exponent);
8755 REAL_VALUE_TO_INT (&m1, &m2, m);
8759 /* If there are bits set in the low part of the mantissa, we can't
8760 represent this value. */
8764 /* Now make it so that mantissa contains the most-significant bits, and move
8765 the point_pos to indicate that the least-significant bits have been
8767 point_pos -= HOST_BITS_PER_WIDE_INT;
8770 /* We can permit four significant bits of mantissa only, plus a high bit
8771 which is always 1. */
8772 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8773 if ((mantissa & mask) != 0)
8776 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8777 mantissa >>= point_pos - 5;
8779 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8780 floating-point immediate zero with Neon using an integer-zero load, but
8781 that case is handled elsewhere.) */
8785 gcc_assert (mantissa >= 16 && mantissa <= 31);
8787 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8788 normalized significands are in the range [1, 2). (Our mantissa is shifted
8789 left 4 places at this point relative to normalized IEEE754 values). GCC
8790 internally uses [0.5, 1) (see real.c), so the exponent returned from
8791 REAL_EXP must be altered. */
8792 exponent = 5 - exponent;
8794 if (exponent < 0 || exponent > 7)
8797 /* Sign, mantissa and exponent are now in the correct form to plug into the
8798 formula described in the comment above. */
8799 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8802 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8804 vfp3_const_double_rtx (rtx x)
8809 return vfp3_const_double_index (x) != -1;
8812 /* Recognize immediates which can be used in various Neon instructions. Legal
8813 immediates are described by the following table (for VMVN variants, the
8814 bitwise inverse of the constant shown is recognized. In either case, VMOV
8815 is output and the correct instruction to use for a given constant is chosen
8816 by the assembler). The constant shown is replicated across all elements of
8817 the destination vector.
8819 insn elems variant constant (binary)
8820 ---- ----- ------- -----------------
8821 vmov i32 0 00000000 00000000 00000000 abcdefgh
8822 vmov i32 1 00000000 00000000 abcdefgh 00000000
8823 vmov i32 2 00000000 abcdefgh 00000000 00000000
8824 vmov i32 3 abcdefgh 00000000 00000000 00000000
8825 vmov i16 4 00000000 abcdefgh
8826 vmov i16 5 abcdefgh 00000000
8827 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8828 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8829 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8830 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8831 vmvn i16 10 00000000 abcdefgh
8832 vmvn i16 11 abcdefgh 00000000
8833 vmov i32 12 00000000 00000000 abcdefgh 11111111
8834 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8835 vmov i32 14 00000000 abcdefgh 11111111 11111111
8836 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8838 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8839 eeeeeeee ffffffff gggggggg hhhhhhhh
8840 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8842 For case 18, B = !b. Representable values are exactly those accepted by
8843 vfp3_const_double_index, but are output as floating-point numbers rather
8846 Variants 0-5 (inclusive) may also be used as immediates for the second
8847 operand of VORR/VBIC instructions.
8849 The INVERSE argument causes the bitwise inverse of the given operand to be
8850 recognized instead (used for recognizing legal immediates for the VAND/VORN
8851 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8852 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8853 output, rather than the real insns vbic/vorr).
8855 INVERSE makes no difference to the recognition of float vectors.
8857 The return value is the variant of immediate as shown in the above table, or
8858 -1 if the given value doesn't match any of the listed patterns.
8861 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8862 rtx *modconst, int *elementwidth)
8864 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8866 for (i = 0; i < idx; i += (STRIDE)) \
8871 immtype = (CLASS); \
8872 elsize = (ELSIZE); \
8876 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8877 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8878 unsigned char bytes[16];
8879 int immtype = -1, matches;
8880 unsigned int invmask = inverse ? 0xff : 0;
8882 /* Vectors of float constants. */
8883 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8885 rtx el0 = CONST_VECTOR_ELT (op, 0);
8888 if (!vfp3_const_double_rtx (el0))
8891 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8893 for (i = 1; i < n_elts; i++)
8895 rtx elt = CONST_VECTOR_ELT (op, i);
8898 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8900 if (!REAL_VALUES_EQUAL (r0, re))
8905 *modconst = CONST_VECTOR_ELT (op, 0);
8913 /* Splat vector constant out into a byte vector. */
8914 for (i = 0; i < n_elts; i++)
8916 rtx el = CONST_VECTOR_ELT (op, i);
8917 unsigned HOST_WIDE_INT elpart;
8918 unsigned int part, parts;
8920 if (GET_CODE (el) == CONST_INT)
8922 elpart = INTVAL (el);
8925 else if (GET_CODE (el) == CONST_DOUBLE)
8927 elpart = CONST_DOUBLE_LOW (el);
8933 for (part = 0; part < parts; part++)
8936 for (byte = 0; byte < innersize; byte++)
8938 bytes[idx++] = (elpart & 0xff) ^ invmask;
8939 elpart >>= BITS_PER_UNIT;
8941 if (GET_CODE (el) == CONST_DOUBLE)
8942 elpart = CONST_DOUBLE_HIGH (el);
8947 gcc_assert (idx == GET_MODE_SIZE (mode));
8951 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8952 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8954 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8955 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8957 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8958 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8960 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8961 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8963 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8965 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8967 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8968 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8970 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8971 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8973 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8974 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8976 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8977 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8979 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8981 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8983 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8984 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8986 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8987 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8989 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8990 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8992 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8993 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8995 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8997 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8998 && bytes[i] == bytes[(i + 8) % idx]);
9006 *elementwidth = elsize;
9010 unsigned HOST_WIDE_INT imm = 0;
9012 /* Un-invert bytes of recognized vector, if necessary. */
9014 for (i = 0; i < idx; i++)
9015 bytes[i] ^= invmask;
9019 /* FIXME: Broken on 32-bit H_W_I hosts. */
9020 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9022 for (i = 0; i < 8; i++)
9023 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9024 << (i * BITS_PER_UNIT);
9026 *modconst = GEN_INT (imm);
9030 unsigned HOST_WIDE_INT imm = 0;
9032 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9033 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9035 *modconst = GEN_INT (imm);
9043 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9044 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9045 float elements), and a modified constant (whatever should be output for a
9046 VMOV) in *MODCONST. */
9049 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9050 rtx *modconst, int *elementwidth)
9054 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9060 *modconst = tmpconst;
9063 *elementwidth = tmpwidth;
9068 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9069 the immediate is valid, write a constant suitable for using as an operand
9070 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9071 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9074 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9075 rtx *modconst, int *elementwidth)
9079 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9081 if (retval < 0 || retval > 5)
9085 *modconst = tmpconst;
9088 *elementwidth = tmpwidth;
9093 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9094 the immediate is valid, write a constant suitable for using as an operand
9095 to VSHR/VSHL to *MODCONST and the corresponding element width to
9096 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9097 because they have different limitations. */
9100 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9101 rtx *modconst, int *elementwidth,
9104 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9105 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9106 unsigned HOST_WIDE_INT last_elt = 0;
9107 unsigned HOST_WIDE_INT maxshift;
9109 /* Split vector constant out into a byte vector. */
9110 for (i = 0; i < n_elts; i++)
9112 rtx el = CONST_VECTOR_ELT (op, i);
9113 unsigned HOST_WIDE_INT elpart;
9115 if (GET_CODE (el) == CONST_INT)
9116 elpart = INTVAL (el);
9117 else if (GET_CODE (el) == CONST_DOUBLE)
9122 if (i != 0 && elpart != last_elt)
9128 /* Shift less than element size. */
9129 maxshift = innersize * 8;
9133 /* Left shift immediate value can be from 0 to <size>-1. */
9134 if (last_elt >= maxshift)
9139 /* Right shift immediate value can be from 1 to <size>. */
9140 if (last_elt == 0 || last_elt > maxshift)
9145 *elementwidth = innersize * 8;
9148 *modconst = CONST_VECTOR_ELT (op, 0);
9153 /* Return a string suitable for output of Neon immediate logic operation
9157 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9158 int inverse, int quad)
9160 int width, is_valid;
9161 static char templ[40];
9163 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9165 gcc_assert (is_valid != 0);
9168 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9170 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9175 /* Return a string suitable for output of Neon immediate shift operation
9176 (VSHR or VSHL) MNEM. */
9179 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9180 enum machine_mode mode, int quad,
9183 int width, is_valid;
9184 static char templ[40];
9186 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9187 gcc_assert (is_valid != 0);
9190 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9192 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9197 /* Output a sequence of pairwise operations to implement a reduction.
9198 NOTE: We do "too much work" here, because pairwise operations work on two
9199 registers-worth of operands in one go. Unfortunately we can't exploit those
9200 extra calculations to do the full operation in fewer steps, I don't think.
9201 Although all vector elements of the result but the first are ignored, we
9202 actually calculate the same result in each of the elements. An alternative
9203 such as initially loading a vector with zero to use as each of the second
9204 operands would use up an additional register and take an extra instruction,
9205 for no particular gain. */
9208 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9209 rtx (*reduc) (rtx, rtx, rtx))
9211 enum machine_mode inner = GET_MODE_INNER (mode);
9212 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9215 for (i = parts / 2; i >= 1; i /= 2)
9217 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9218 emit_insn (reduc (dest, tmpsum, tmpsum));
9223 /* If VALS is a vector constant that can be loaded into a register
9224 using VDUP, generate instructions to do so and return an RTX to
9225 assign to the register. Otherwise return NULL_RTX. */
9228 neon_vdup_constant (rtx vals)
9230 enum machine_mode mode = GET_MODE (vals);
9231 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9232 int n_elts = GET_MODE_NUNITS (mode);
9233 bool all_same = true;
9237 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9240 for (i = 0; i < n_elts; ++i)
9242 x = XVECEXP (vals, 0, i);
9243 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9248 /* The elements are not all the same. We could handle repeating
9249 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9250 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9254 /* We can load this constant by using VDUP and a constant in a
9255 single ARM register. This will be cheaper than a vector
9258 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9259 return gen_rtx_VEC_DUPLICATE (mode, x);
9262 /* Generate code to load VALS, which is a PARALLEL containing only
9263 constants (for vec_init) or CONST_VECTOR, efficiently into a
9264 register. Returns an RTX to copy into the register, or NULL_RTX
9265 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9268 neon_make_constant (rtx vals)
9270 enum machine_mode mode = GET_MODE (vals);
9272 rtx const_vec = NULL_RTX;
9273 int n_elts = GET_MODE_NUNITS (mode);
9277 if (GET_CODE (vals) == CONST_VECTOR)
9279 else if (GET_CODE (vals) == PARALLEL)
9281 /* A CONST_VECTOR must contain only CONST_INTs and
9282 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9283 Only store valid constants in a CONST_VECTOR. */
9284 for (i = 0; i < n_elts; ++i)
9286 rtx x = XVECEXP (vals, 0, i);
9287 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9290 if (n_const == n_elts)
9291 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9296 if (const_vec != NULL
9297 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9298 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9300 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9301 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9302 pipeline cycle; creating the constant takes one or two ARM
9305 else if (const_vec != NULL_RTX)
9306 /* Load from constant pool. On Cortex-A8 this takes two cycles
9307 (for either double or quad vectors). We can not take advantage
9308 of single-cycle VLD1 because we need a PC-relative addressing
9312 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9313 We can not construct an initializer. */
9317 /* Initialize vector TARGET to VALS. */
9320 neon_expand_vector_init (rtx target, rtx vals)
9322 enum machine_mode mode = GET_MODE (target);
9323 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9324 int n_elts = GET_MODE_NUNITS (mode);
9325 int n_var = 0, one_var = -1;
9326 bool all_same = true;
9330 for (i = 0; i < n_elts; ++i)
9332 x = XVECEXP (vals, 0, i);
9333 if (!CONSTANT_P (x))
9334 ++n_var, one_var = i;
9336 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9342 rtx constant = neon_make_constant (vals);
9343 if (constant != NULL_RTX)
9345 emit_move_insn (target, constant);
9350 /* Splat a single non-constant element if we can. */
9351 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9353 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9354 emit_insn (gen_rtx_SET (VOIDmode, target,
9355 gen_rtx_VEC_DUPLICATE (mode, x)));
9359 /* One field is non-constant. Load constant then overwrite varying
9360 field. This is more efficient than using the stack. */
9363 rtx copy = copy_rtx (vals);
9364 rtx index = GEN_INT (one_var);
9366 /* Load constant part of vector, substitute neighboring value for
9368 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9369 neon_expand_vector_init (target, copy);
9371 /* Insert variable. */
9372 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9376 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9379 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9382 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9385 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9388 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9391 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9394 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9397 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9400 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9408 /* Construct the vector in memory one field at a time
9409 and load the whole vector. */
9410 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9411 for (i = 0; i < n_elts; i++)
9412 emit_move_insn (adjust_address_nv (mem, inner_mode,
9413 i * GET_MODE_SIZE (inner_mode)),
9414 XVECEXP (vals, 0, i));
9415 emit_move_insn (target, mem);
9418 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9419 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9420 reported source locations are bogus. */
9423 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9428 gcc_assert (GET_CODE (operand) == CONST_INT);
9430 lane = INTVAL (operand);
9432 if (lane < low || lane >= high)
9436 /* Bounds-check lanes. */
9439 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9441 bounds_check (operand, low, high, "lane out of range");
9444 /* Bounds-check constants. */
9447 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9449 bounds_check (operand, low, high, "constant out of range");
9453 neon_element_bits (enum machine_mode mode)
9456 return GET_MODE_BITSIZE (mode);
9458 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9462 /* Predicates for `match_operand' and `match_operator'. */
9464 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9466 cirrus_memory_offset (rtx op)
9468 /* Reject eliminable registers. */
9469 if (! (reload_in_progress || reload_completed)
9470 && ( reg_mentioned_p (frame_pointer_rtx, op)
9471 || reg_mentioned_p (arg_pointer_rtx, op)
9472 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9473 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9474 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9475 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9478 if (GET_CODE (op) == MEM)
9484 /* Match: (mem (reg)). */
9485 if (GET_CODE (ind) == REG)
9491 if (GET_CODE (ind) == PLUS
9492 && GET_CODE (XEXP (ind, 0)) == REG
9493 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9494 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9501 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9502 WB is true if full writeback address modes are allowed and is false
9503 if limited writeback address modes (POST_INC and PRE_DEC) are
9507 arm_coproc_mem_operand (rtx op, bool wb)
9511 /* Reject eliminable registers. */
9512 if (! (reload_in_progress || reload_completed)
9513 && ( reg_mentioned_p (frame_pointer_rtx, op)
9514 || reg_mentioned_p (arg_pointer_rtx, op)
9515 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9516 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9517 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9518 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9521 /* Constants are converted into offsets from labels. */
9522 if (GET_CODE (op) != MEM)
9527 if (reload_completed
9528 && (GET_CODE (ind) == LABEL_REF
9529 || (GET_CODE (ind) == CONST
9530 && GET_CODE (XEXP (ind, 0)) == PLUS
9531 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9532 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9535 /* Match: (mem (reg)). */
9536 if (GET_CODE (ind) == REG)
9537 return arm_address_register_rtx_p (ind, 0);
9539 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9540 acceptable in any case (subject to verification by
9541 arm_address_register_rtx_p). We need WB to be true to accept
9542 PRE_INC and POST_DEC. */
9543 if (GET_CODE (ind) == POST_INC
9544 || GET_CODE (ind) == PRE_DEC
9546 && (GET_CODE (ind) == PRE_INC
9547 || GET_CODE (ind) == POST_DEC)))
9548 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9551 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9552 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9553 && GET_CODE (XEXP (ind, 1)) == PLUS
9554 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9555 ind = XEXP (ind, 1);
9560 if (GET_CODE (ind) == PLUS
9561 && GET_CODE (XEXP (ind, 0)) == REG
9562 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9563 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9564 && INTVAL (XEXP (ind, 1)) > -1024
9565 && INTVAL (XEXP (ind, 1)) < 1024
9566 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9572 /* Return TRUE if OP is a memory operand which we can load or store a vector
9573 to/from. TYPE is one of the following values:
9574 0 - Vector load/stor (vldr)
9575 1 - Core registers (ldm)
9576 2 - Element/structure loads (vld1)
9579 neon_vector_mem_operand (rtx op, int type)
9583 /* Reject eliminable registers. */
9584 if (! (reload_in_progress || reload_completed)
9585 && ( reg_mentioned_p (frame_pointer_rtx, op)
9586 || reg_mentioned_p (arg_pointer_rtx, op)
9587 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9588 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9589 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9590 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9593 /* Constants are converted into offsets from labels. */
9594 if (GET_CODE (op) != MEM)
9599 if (reload_completed
9600 && (GET_CODE (ind) == LABEL_REF
9601 || (GET_CODE (ind) == CONST
9602 && GET_CODE (XEXP (ind, 0)) == PLUS
9603 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9604 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9607 /* Match: (mem (reg)). */
9608 if (GET_CODE (ind) == REG)
9609 return arm_address_register_rtx_p (ind, 0);
9611 /* Allow post-increment with Neon registers. */
9612 if ((type != 1 && GET_CODE (ind) == POST_INC)
9613 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9614 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9616 /* FIXME: vld1 allows register post-modify. */
9622 && GET_CODE (ind) == PLUS
9623 && GET_CODE (XEXP (ind, 0)) == REG
9624 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9625 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9626 && INTVAL (XEXP (ind, 1)) > -1024
9627 && INTVAL (XEXP (ind, 1)) < 1016
9628 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9634 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9637 neon_struct_mem_operand (rtx op)
9641 /* Reject eliminable registers. */
9642 if (! (reload_in_progress || reload_completed)
9643 && ( reg_mentioned_p (frame_pointer_rtx, op)
9644 || reg_mentioned_p (arg_pointer_rtx, op)
9645 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9646 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9647 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9648 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9651 /* Constants are converted into offsets from labels. */
9652 if (GET_CODE (op) != MEM)
9657 if (reload_completed
9658 && (GET_CODE (ind) == LABEL_REF
9659 || (GET_CODE (ind) == CONST
9660 && GET_CODE (XEXP (ind, 0)) == PLUS
9661 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9662 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9665 /* Match: (mem (reg)). */
9666 if (GET_CODE (ind) == REG)
9667 return arm_address_register_rtx_p (ind, 0);
9669 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9670 if (GET_CODE (ind) == POST_INC
9671 || GET_CODE (ind) == PRE_DEC)
9672 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9677 /* Return true if X is a register that will be eliminated later on. */
9679 arm_eliminable_register (rtx x)
9681 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9682 || REGNO (x) == ARG_POINTER_REGNUM
9683 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9684 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9687 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9688 coprocessor registers. Otherwise return NO_REGS. */
9691 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9695 if (!TARGET_NEON_FP16)
9696 return GENERAL_REGS;
9697 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9699 return GENERAL_REGS;
9702 /* The neon move patterns handle all legitimate vector and struct
9705 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9706 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9707 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9708 || VALID_NEON_STRUCT_MODE (mode)))
9711 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9714 return GENERAL_REGS;
9717 /* Values which must be returned in the most-significant end of the return
9721 arm_return_in_msb (const_tree valtype)
9723 return (TARGET_AAPCS_BASED
9725 && (AGGREGATE_TYPE_P (valtype)
9726 || TREE_CODE (valtype) == COMPLEX_TYPE
9727 || FIXED_POINT_TYPE_P (valtype)));
9730 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9731 Use by the Cirrus Maverick code which has to workaround
9732 a hardware bug triggered by such instructions. */
9734 arm_memory_load_p (rtx insn)
9736 rtx body, lhs, rhs;;
9738 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9741 body = PATTERN (insn);
9743 if (GET_CODE (body) != SET)
9746 lhs = XEXP (body, 0);
9747 rhs = XEXP (body, 1);
9749 lhs = REG_OR_SUBREG_RTX (lhs);
9751 /* If the destination is not a general purpose
9752 register we do not have to worry. */
9753 if (GET_CODE (lhs) != REG
9754 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9757 /* As well as loads from memory we also have to react
9758 to loads of invalid constants which will be turned
9759 into loads from the minipool. */
9760 return (GET_CODE (rhs) == MEM
9761 || GET_CODE (rhs) == SYMBOL_REF
9762 || note_invalid_constants (insn, -1, false));
9765 /* Return TRUE if INSN is a Cirrus instruction. */
9767 arm_cirrus_insn_p (rtx insn)
9769 enum attr_cirrus attr;
9771 /* get_attr cannot accept USE or CLOBBER. */
9773 || GET_CODE (insn) != INSN
9774 || GET_CODE (PATTERN (insn)) == USE
9775 || GET_CODE (PATTERN (insn)) == CLOBBER)
9778 attr = get_attr_cirrus (insn);
9780 return attr != CIRRUS_NOT;
9783 /* Cirrus reorg for invalid instruction combinations. */
9785 cirrus_reorg (rtx first)
9787 enum attr_cirrus attr;
9788 rtx body = PATTERN (first);
9792 /* Any branch must be followed by 2 non Cirrus instructions. */
9793 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9796 t = next_nonnote_insn (first);
9798 if (arm_cirrus_insn_p (t))
9801 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9805 emit_insn_after (gen_nop (), first);
9810 /* (float (blah)) is in parallel with a clobber. */
9811 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9812 body = XVECEXP (body, 0, 0);
9814 if (GET_CODE (body) == SET)
9816 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9818 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9819 be followed by a non Cirrus insn. */
9820 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9822 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9823 emit_insn_after (gen_nop (), first);
9827 else if (arm_memory_load_p (first))
9829 unsigned int arm_regno;
9831 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9832 ldr/cfmv64hr combination where the Rd field is the same
9833 in both instructions must be split with a non Cirrus
9840 /* Get Arm register number for ldr insn. */
9841 if (GET_CODE (lhs) == REG)
9842 arm_regno = REGNO (lhs);
9845 gcc_assert (GET_CODE (rhs) == REG);
9846 arm_regno = REGNO (rhs);
9850 first = next_nonnote_insn (first);
9852 if (! arm_cirrus_insn_p (first))
9855 body = PATTERN (first);
9857 /* (float (blah)) is in parallel with a clobber. */
9858 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9859 body = XVECEXP (body, 0, 0);
9861 if (GET_CODE (body) == FLOAT)
9862 body = XEXP (body, 0);
9864 if (get_attr_cirrus (first) == CIRRUS_MOVE
9865 && GET_CODE (XEXP (body, 1)) == REG
9866 && arm_regno == REGNO (XEXP (body, 1)))
9867 emit_insn_after (gen_nop (), first);
9873 /* get_attr cannot accept USE or CLOBBER. */
9875 || GET_CODE (first) != INSN
9876 || GET_CODE (PATTERN (first)) == USE
9877 || GET_CODE (PATTERN (first)) == CLOBBER)
9880 attr = get_attr_cirrus (first);
9882 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9883 must be followed by a non-coprocessor instruction. */
9884 if (attr == CIRRUS_COMPARE)
9888 t = next_nonnote_insn (first);
9890 if (arm_cirrus_insn_p (t))
9893 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9897 emit_insn_after (gen_nop (), first);
9903 /* Return TRUE if X references a SYMBOL_REF. */
9905 symbol_mentioned_p (rtx x)
9910 if (GET_CODE (x) == SYMBOL_REF)
9913 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9914 are constant offsets, not symbols. */
9915 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9918 fmt = GET_RTX_FORMAT (GET_CODE (x));
9920 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9926 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9927 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9930 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9937 /* Return TRUE if X references a LABEL_REF. */
9939 label_mentioned_p (rtx x)
9944 if (GET_CODE (x) == LABEL_REF)
9947 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9948 instruction, but they are constant offsets, not symbols. */
9949 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9952 fmt = GET_RTX_FORMAT (GET_CODE (x));
9953 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9959 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9960 if (label_mentioned_p (XVECEXP (x, i, j)))
9963 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9971 tls_mentioned_p (rtx x)
9973 switch (GET_CODE (x))
9976 return tls_mentioned_p (XEXP (x, 0));
9979 if (XINT (x, 1) == UNSPEC_TLS)
9987 /* Must not copy any rtx that uses a pc-relative address. */
9990 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9992 if (GET_CODE (*x) == UNSPEC
9993 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9999 arm_cannot_copy_insn_p (rtx insn)
10001 /* The tls call insn cannot be copied, as it is paired with a data
10003 if (recog_memoized (insn) == CODE_FOR_tlscall)
10006 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10010 minmax_code (rtx x)
10012 enum rtx_code code = GET_CODE (x);
10025 gcc_unreachable ();
10029 /* Return 1 if memory locations are adjacent. */
10031 adjacent_mem_locations (rtx a, rtx b)
10033 /* We don't guarantee to preserve the order of these memory refs. */
10034 if (volatile_refs_p (a) || volatile_refs_p (b))
10037 if ((GET_CODE (XEXP (a, 0)) == REG
10038 || (GET_CODE (XEXP (a, 0)) == PLUS
10039 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10040 && (GET_CODE (XEXP (b, 0)) == REG
10041 || (GET_CODE (XEXP (b, 0)) == PLUS
10042 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10044 HOST_WIDE_INT val0 = 0, val1 = 0;
10048 if (GET_CODE (XEXP (a, 0)) == PLUS)
10050 reg0 = XEXP (XEXP (a, 0), 0);
10051 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10054 reg0 = XEXP (a, 0);
10056 if (GET_CODE (XEXP (b, 0)) == PLUS)
10058 reg1 = XEXP (XEXP (b, 0), 0);
10059 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10062 reg1 = XEXP (b, 0);
10064 /* Don't accept any offset that will require multiple
10065 instructions to handle, since this would cause the
10066 arith_adjacentmem pattern to output an overlong sequence. */
10067 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10070 /* Don't allow an eliminable register: register elimination can make
10071 the offset too large. */
10072 if (arm_eliminable_register (reg0))
10075 val_diff = val1 - val0;
10079 /* If the target has load delay slots, then there's no benefit
10080 to using an ldm instruction unless the offset is zero and
10081 we are optimizing for size. */
10082 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10083 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10084 && (val_diff == 4 || val_diff == -4));
10087 return ((REGNO (reg0) == REGNO (reg1))
10088 && (val_diff == 4 || val_diff == -4));
10094 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10095 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10096 instruction. ADD_OFFSET is nonzero if the base address register needs
10097 to be modified with an add instruction before we can use it. */
10100 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10101 int nops, HOST_WIDE_INT add_offset)
10103 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10104 if the offset isn't small enough. The reason 2 ldrs are faster
10105 is because these ARMs are able to do more than one cache access
10106 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10107 whilst the ARM8 has a double bandwidth cache. This means that
10108 these cores can do both an instruction fetch and a data fetch in
10109 a single cycle, so the trick of calculating the address into a
10110 scratch register (one of the result regs) and then doing a load
10111 multiple actually becomes slower (and no smaller in code size).
10112 That is the transformation
10114 ldr rd1, [rbase + offset]
10115 ldr rd2, [rbase + offset + 4]
10119 add rd1, rbase, offset
10120 ldmia rd1, {rd1, rd2}
10122 produces worse code -- '3 cycles + any stalls on rd2' instead of
10123 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10124 access per cycle, the first sequence could never complete in less
10125 than 6 cycles, whereas the ldm sequence would only take 5 and
10126 would make better use of sequential accesses if not hitting the
10129 We cheat here and test 'arm_ld_sched' which we currently know to
10130 only be true for the ARM8, ARM9 and StrongARM. If this ever
10131 changes, then the test below needs to be reworked. */
10132 if (nops == 2 && arm_ld_sched && add_offset != 0)
10135 /* XScale has load-store double instructions, but they have stricter
10136 alignment requirements than load-store multiple, so we cannot
10139 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10140 the pipeline until completion.
10148 An ldr instruction takes 1-3 cycles, but does not block the
10157 Best case ldr will always win. However, the more ldr instructions
10158 we issue, the less likely we are to be able to schedule them well.
10159 Using ldr instructions also increases code size.
10161 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10162 for counts of 3 or 4 regs. */
10163 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10168 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10169 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10170 an array ORDER which describes the sequence to use when accessing the
10171 offsets that produces an ascending order. In this sequence, each
10172 offset must be larger by exactly 4 than the previous one. ORDER[0]
10173 must have been filled in with the lowest offset by the caller.
10174 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10175 we use to verify that ORDER produces an ascending order of registers.
10176 Return true if it was possible to construct such an order, false if
10180 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10181 int *unsorted_regs)
10184 for (i = 1; i < nops; i++)
10188 order[i] = order[i - 1];
10189 for (j = 0; j < nops; j++)
10190 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10192 /* We must find exactly one offset that is higher than the
10193 previous one by 4. */
10194 if (order[i] != order[i - 1])
10198 if (order[i] == order[i - 1])
10200 /* The register numbers must be ascending. */
10201 if (unsorted_regs != NULL
10202 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10208 /* Used to determine in a peephole whether a sequence of load
10209 instructions can be changed into a load-multiple instruction.
10210 NOPS is the number of separate load instructions we are examining. The
10211 first NOPS entries in OPERANDS are the destination registers, the
10212 next NOPS entries are memory operands. If this function is
10213 successful, *BASE is set to the common base register of the memory
10214 accesses; *LOAD_OFFSET is set to the first memory location's offset
10215 from that base register.
10216 REGS is an array filled in with the destination register numbers.
10217 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10218 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10219 the sequence of registers in REGS matches the loads from ascending memory
10220 locations, and the function verifies that the register numbers are
10221 themselves ascending. If CHECK_REGS is false, the register numbers
10222 are stored in the order they are found in the operands. */
10224 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10225 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10227 int unsorted_regs[MAX_LDM_STM_OPS];
10228 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10229 int order[MAX_LDM_STM_OPS];
10230 rtx base_reg_rtx = NULL;
10234 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10235 easily extended if required. */
10236 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10238 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10240 /* Loop over the operands and check that the memory references are
10241 suitable (i.e. immediate offsets from the same base register). At
10242 the same time, extract the target register, and the memory
10244 for (i = 0; i < nops; i++)
10249 /* Convert a subreg of a mem into the mem itself. */
10250 if (GET_CODE (operands[nops + i]) == SUBREG)
10251 operands[nops + i] = alter_subreg (operands + (nops + i));
10253 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10255 /* Don't reorder volatile memory references; it doesn't seem worth
10256 looking for the case where the order is ok anyway. */
10257 if (MEM_VOLATILE_P (operands[nops + i]))
10260 offset = const0_rtx;
10262 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10263 || (GET_CODE (reg) == SUBREG
10264 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10265 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10266 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10268 || (GET_CODE (reg) == SUBREG
10269 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10270 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10275 base_reg = REGNO (reg);
10276 base_reg_rtx = reg;
10277 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10280 else if (base_reg != (int) REGNO (reg))
10281 /* Not addressed from the same base register. */
10284 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10285 ? REGNO (operands[i])
10286 : REGNO (SUBREG_REG (operands[i])));
10288 /* If it isn't an integer register, or if it overwrites the
10289 base register but isn't the last insn in the list, then
10290 we can't do this. */
10291 if (unsorted_regs[i] < 0
10292 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10293 || unsorted_regs[i] > 14
10294 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10297 unsorted_offsets[i] = INTVAL (offset);
10298 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10302 /* Not a suitable memory address. */
10306 /* All the useful information has now been extracted from the
10307 operands into unsorted_regs and unsorted_offsets; additionally,
10308 order[0] has been set to the lowest offset in the list. Sort
10309 the offsets into order, verifying that they are adjacent, and
10310 check that the register numbers are ascending. */
10311 if (!compute_offset_order (nops, unsorted_offsets, order,
10312 check_regs ? unsorted_regs : NULL))
10316 memcpy (saved_order, order, sizeof order);
10322 for (i = 0; i < nops; i++)
10323 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10325 *load_offset = unsorted_offsets[order[0]];
10329 && !peep2_reg_dead_p (nops, base_reg_rtx))
10332 if (unsorted_offsets[order[0]] == 0)
10333 ldm_case = 1; /* ldmia */
10334 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10335 ldm_case = 2; /* ldmib */
10336 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10337 ldm_case = 3; /* ldmda */
10338 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10339 ldm_case = 4; /* ldmdb */
10340 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10341 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10346 if (!multiple_operation_profitable_p (false, nops,
10348 ? unsorted_offsets[order[0]] : 0))
10354 /* Used to determine in a peephole whether a sequence of store instructions can
10355 be changed into a store-multiple instruction.
10356 NOPS is the number of separate store instructions we are examining.
10357 NOPS_TOTAL is the total number of instructions recognized by the peephole
10359 The first NOPS entries in OPERANDS are the source registers, the next
10360 NOPS entries are memory operands. If this function is successful, *BASE is
10361 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10362 to the first memory location's offset from that base register. REGS is an
10363 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10364 likewise filled with the corresponding rtx's.
10365 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10366 numbers to an ascending order of stores.
10367 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10368 from ascending memory locations, and the function verifies that the register
10369 numbers are themselves ascending. If CHECK_REGS is false, the register
10370 numbers are stored in the order they are found in the operands. */
10372 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10373 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10374 HOST_WIDE_INT *load_offset, bool check_regs)
10376 int unsorted_regs[MAX_LDM_STM_OPS];
10377 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10378 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10379 int order[MAX_LDM_STM_OPS];
10381 rtx base_reg_rtx = NULL;
10384 /* Write back of base register is currently only supported for Thumb 1. */
10385 int base_writeback = TARGET_THUMB1;
10387 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10388 easily extended if required. */
10389 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10391 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10393 /* Loop over the operands and check that the memory references are
10394 suitable (i.e. immediate offsets from the same base register). At
10395 the same time, extract the target register, and the memory
10397 for (i = 0; i < nops; i++)
10402 /* Convert a subreg of a mem into the mem itself. */
10403 if (GET_CODE (operands[nops + i]) == SUBREG)
10404 operands[nops + i] = alter_subreg (operands + (nops + i));
10406 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10408 /* Don't reorder volatile memory references; it doesn't seem worth
10409 looking for the case where the order is ok anyway. */
10410 if (MEM_VOLATILE_P (operands[nops + i]))
10413 offset = const0_rtx;
10415 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10416 || (GET_CODE (reg) == SUBREG
10417 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10418 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10419 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10421 || (GET_CODE (reg) == SUBREG
10422 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10423 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10426 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10427 ? operands[i] : SUBREG_REG (operands[i]));
10428 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10432 base_reg = REGNO (reg);
10433 base_reg_rtx = reg;
10434 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10437 else if (base_reg != (int) REGNO (reg))
10438 /* Not addressed from the same base register. */
10441 /* If it isn't an integer register, then we can't do this. */
10442 if (unsorted_regs[i] < 0
10443 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10444 /* The effects are unpredictable if the base register is
10445 both updated and stored. */
10446 || (base_writeback && unsorted_regs[i] == base_reg)
10447 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10448 || unsorted_regs[i] > 14)
10451 unsorted_offsets[i] = INTVAL (offset);
10452 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10456 /* Not a suitable memory address. */
10460 /* All the useful information has now been extracted from the
10461 operands into unsorted_regs and unsorted_offsets; additionally,
10462 order[0] has been set to the lowest offset in the list. Sort
10463 the offsets into order, verifying that they are adjacent, and
10464 check that the register numbers are ascending. */
10465 if (!compute_offset_order (nops, unsorted_offsets, order,
10466 check_regs ? unsorted_regs : NULL))
10470 memcpy (saved_order, order, sizeof order);
10476 for (i = 0; i < nops; i++)
10478 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10480 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10483 *load_offset = unsorted_offsets[order[0]];
10487 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10490 if (unsorted_offsets[order[0]] == 0)
10491 stm_case = 1; /* stmia */
10492 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10493 stm_case = 2; /* stmib */
10494 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10495 stm_case = 3; /* stmda */
10496 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10497 stm_case = 4; /* stmdb */
10501 if (!multiple_operation_profitable_p (false, nops, 0))
10507 /* Routines for use in generating RTL. */
10509 /* Generate a load-multiple instruction. COUNT is the number of loads in
10510 the instruction; REGS and MEMS are arrays containing the operands.
10511 BASEREG is the base register to be used in addressing the memory operands.
10512 WBACK_OFFSET is nonzero if the instruction should update the base
10516 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10517 HOST_WIDE_INT wback_offset)
10522 if (!multiple_operation_profitable_p (false, count, 0))
10528 for (i = 0; i < count; i++)
10529 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10531 if (wback_offset != 0)
10532 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10534 seq = get_insns ();
10540 result = gen_rtx_PARALLEL (VOIDmode,
10541 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10542 if (wback_offset != 0)
10544 XVECEXP (result, 0, 0)
10545 = gen_rtx_SET (VOIDmode, basereg,
10546 plus_constant (basereg, wback_offset));
10551 for (j = 0; i < count; i++, j++)
10552 XVECEXP (result, 0, i)
10553 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10558 /* Generate a store-multiple instruction. COUNT is the number of stores in
10559 the instruction; REGS and MEMS are arrays containing the operands.
10560 BASEREG is the base register to be used in addressing the memory operands.
10561 WBACK_OFFSET is nonzero if the instruction should update the base
10565 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10566 HOST_WIDE_INT wback_offset)
10571 if (GET_CODE (basereg) == PLUS)
10572 basereg = XEXP (basereg, 0);
10574 if (!multiple_operation_profitable_p (false, count, 0))
10580 for (i = 0; i < count; i++)
10581 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10583 if (wback_offset != 0)
10584 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10586 seq = get_insns ();
10592 result = gen_rtx_PARALLEL (VOIDmode,
10593 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10594 if (wback_offset != 0)
10596 XVECEXP (result, 0, 0)
10597 = gen_rtx_SET (VOIDmode, basereg,
10598 plus_constant (basereg, wback_offset));
10603 for (j = 0; i < count; i++, j++)
10604 XVECEXP (result, 0, i)
10605 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10610 /* Generate either a load-multiple or a store-multiple instruction. This
10611 function can be used in situations where we can start with a single MEM
10612 rtx and adjust its address upwards.
10613 COUNT is the number of operations in the instruction, not counting a
10614 possible update of the base register. REGS is an array containing the
10616 BASEREG is the base register to be used in addressing the memory operands,
10617 which are constructed from BASEMEM.
10618 WRITE_BACK specifies whether the generated instruction should include an
10619 update of the base register.
10620 OFFSETP is used to pass an offset to and from this function; this offset
10621 is not used when constructing the address (instead BASEMEM should have an
10622 appropriate offset in its address), it is used only for setting
10623 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10626 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10627 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10629 rtx mems[MAX_LDM_STM_OPS];
10630 HOST_WIDE_INT offset = *offsetp;
10633 gcc_assert (count <= MAX_LDM_STM_OPS);
10635 if (GET_CODE (basereg) == PLUS)
10636 basereg = XEXP (basereg, 0);
10638 for (i = 0; i < count; i++)
10640 rtx addr = plus_constant (basereg, i * 4);
10641 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10649 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10650 write_back ? 4 * count : 0);
10652 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10653 write_back ? 4 * count : 0);
10657 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10658 rtx basemem, HOST_WIDE_INT *offsetp)
10660 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10665 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10666 rtx basemem, HOST_WIDE_INT *offsetp)
10668 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10672 /* Called from a peephole2 expander to turn a sequence of loads into an
10673 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10674 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10675 is true if we can reorder the registers because they are used commutatively
10677 Returns true iff we could generate a new instruction. */
10680 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10682 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10683 rtx mems[MAX_LDM_STM_OPS];
10684 int i, j, base_reg;
10686 HOST_WIDE_INT offset;
10687 int write_back = FALSE;
10691 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10692 &base_reg, &offset, !sort_regs);
10698 for (i = 0; i < nops - 1; i++)
10699 for (j = i + 1; j < nops; j++)
10700 if (regs[i] > regs[j])
10706 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10710 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10711 gcc_assert (ldm_case == 1 || ldm_case == 5);
10717 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10718 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10720 if (!TARGET_THUMB1)
10722 base_reg = regs[0];
10723 base_reg_rtx = newbase;
10727 for (i = 0; i < nops; i++)
10729 addr = plus_constant (base_reg_rtx, offset + i * 4);
10730 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10733 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10734 write_back ? offset + i * 4 : 0));
10738 /* Called from a peephole2 expander to turn a sequence of stores into an
10739 STM instruction. OPERANDS are the operands found by the peephole matcher;
10740 NOPS indicates how many separate stores we are trying to combine.
10741 Returns true iff we could generate a new instruction. */
10744 gen_stm_seq (rtx *operands, int nops)
10747 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10748 rtx mems[MAX_LDM_STM_OPS];
10751 HOST_WIDE_INT offset;
10752 int write_back = FALSE;
10755 bool base_reg_dies;
10757 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10758 mem_order, &base_reg, &offset, true);
10763 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10765 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10768 gcc_assert (base_reg_dies);
10774 gcc_assert (base_reg_dies);
10775 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10779 addr = plus_constant (base_reg_rtx, offset);
10781 for (i = 0; i < nops; i++)
10783 addr = plus_constant (base_reg_rtx, offset + i * 4);
10784 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10787 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10788 write_back ? offset + i * 4 : 0));
10792 /* Called from a peephole2 expander to turn a sequence of stores that are
10793 preceded by constant loads into an STM instruction. OPERANDS are the
10794 operands found by the peephole matcher; NOPS indicates how many
10795 separate stores we are trying to combine; there are 2 * NOPS
10796 instructions in the peephole.
10797 Returns true iff we could generate a new instruction. */
10800 gen_const_stm_seq (rtx *operands, int nops)
10802 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10803 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10804 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10805 rtx mems[MAX_LDM_STM_OPS];
10808 HOST_WIDE_INT offset;
10809 int write_back = FALSE;
10812 bool base_reg_dies;
10814 HARD_REG_SET allocated;
10816 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10817 mem_order, &base_reg, &offset, false);
10822 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10824 /* If the same register is used more than once, try to find a free
10826 CLEAR_HARD_REG_SET (allocated);
10827 for (i = 0; i < nops; i++)
10829 for (j = i + 1; j < nops; j++)
10830 if (regs[i] == regs[j])
10832 rtx t = peep2_find_free_register (0, nops * 2,
10833 TARGET_THUMB1 ? "l" : "r",
10834 SImode, &allocated);
10838 regs[i] = REGNO (t);
10842 /* Compute an ordering that maps the register numbers to an ascending
10845 for (i = 0; i < nops; i++)
10846 if (regs[i] < regs[reg_order[0]])
10849 for (i = 1; i < nops; i++)
10851 int this_order = reg_order[i - 1];
10852 for (j = 0; j < nops; j++)
10853 if (regs[j] > regs[reg_order[i - 1]]
10854 && (this_order == reg_order[i - 1]
10855 || regs[j] < regs[this_order]))
10857 reg_order[i] = this_order;
10860 /* Ensure that registers that must be live after the instruction end
10861 up with the correct value. */
10862 for (i = 0; i < nops; i++)
10864 int this_order = reg_order[i];
10865 if ((this_order != mem_order[i]
10866 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10867 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10871 /* Load the constants. */
10872 for (i = 0; i < nops; i++)
10874 rtx op = operands[2 * nops + mem_order[i]];
10875 sorted_regs[i] = regs[reg_order[i]];
10876 emit_move_insn (reg_rtxs[reg_order[i]], op);
10879 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10881 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10884 gcc_assert (base_reg_dies);
10890 gcc_assert (base_reg_dies);
10891 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10895 addr = plus_constant (base_reg_rtx, offset);
10897 for (i = 0; i < nops; i++)
10899 addr = plus_constant (base_reg_rtx, offset + i * 4);
10900 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10903 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10904 write_back ? offset + i * 4 : 0));
10908 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10909 unaligned copies on processors which support unaligned semantics for those
10910 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10911 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10912 An interleave factor of 1 (the minimum) will perform no interleaving.
10913 Load/store multiple are used for aligned addresses where possible. */
10916 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10917 HOST_WIDE_INT length,
10918 unsigned int interleave_factor)
10920 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10921 int *regnos = XALLOCAVEC (int, interleave_factor);
10922 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10923 HOST_WIDE_INT i, j;
10924 HOST_WIDE_INT remaining = length, words;
10925 rtx halfword_tmp = NULL, byte_tmp = NULL;
10927 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10928 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10929 HOST_WIDE_INT srcoffset, dstoffset;
10930 HOST_WIDE_INT src_autoinc, dst_autoinc;
10933 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10935 /* Use hard registers if we have aligned source or destination so we can use
10936 load/store multiple with contiguous registers. */
10937 if (dst_aligned || src_aligned)
10938 for (i = 0; i < interleave_factor; i++)
10939 regs[i] = gen_rtx_REG (SImode, i);
10941 for (i = 0; i < interleave_factor; i++)
10942 regs[i] = gen_reg_rtx (SImode);
10944 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10945 src = copy_addr_to_reg (XEXP (srcbase, 0));
10947 srcoffset = dstoffset = 0;
10949 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10950 For copying the last bytes we want to subtract this offset again. */
10951 src_autoinc = dst_autoinc = 0;
10953 for (i = 0; i < interleave_factor; i++)
10956 /* Copy BLOCK_SIZE_BYTES chunks. */
10958 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10961 if (src_aligned && interleave_factor > 1)
10963 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10964 TRUE, srcbase, &srcoffset));
10965 src_autoinc += UNITS_PER_WORD * interleave_factor;
10969 for (j = 0; j < interleave_factor; j++)
10971 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10973 mem = adjust_automodify_address (srcbase, SImode, addr,
10974 srcoffset + j * UNITS_PER_WORD);
10975 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10977 srcoffset += block_size_bytes;
10981 if (dst_aligned && interleave_factor > 1)
10983 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10984 TRUE, dstbase, &dstoffset));
10985 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10989 for (j = 0; j < interleave_factor; j++)
10991 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
10993 mem = adjust_automodify_address (dstbase, SImode, addr,
10994 dstoffset + j * UNITS_PER_WORD);
10995 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10997 dstoffset += block_size_bytes;
11000 remaining -= block_size_bytes;
11003 /* Copy any whole words left (note these aren't interleaved with any
11004 subsequent halfword/byte load/stores in the interests of simplicity). */
11006 words = remaining / UNITS_PER_WORD;
11008 gcc_assert (words < interleave_factor);
11010 if (src_aligned && words > 1)
11012 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11014 src_autoinc += UNITS_PER_WORD * words;
11018 for (j = 0; j < words; j++)
11020 addr = plus_constant (src,
11021 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11022 mem = adjust_automodify_address (srcbase, SImode, addr,
11023 srcoffset + j * UNITS_PER_WORD);
11024 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11026 srcoffset += words * UNITS_PER_WORD;
11029 if (dst_aligned && words > 1)
11031 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11033 dst_autoinc += words * UNITS_PER_WORD;
11037 for (j = 0; j < words; j++)
11039 addr = plus_constant (dst,
11040 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11041 mem = adjust_automodify_address (dstbase, SImode, addr,
11042 dstoffset + j * UNITS_PER_WORD);
11043 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11045 dstoffset += words * UNITS_PER_WORD;
11048 remaining -= words * UNITS_PER_WORD;
11050 gcc_assert (remaining < 4);
11052 /* Copy a halfword if necessary. */
11054 if (remaining >= 2)
11056 halfword_tmp = gen_reg_rtx (SImode);
11058 addr = plus_constant (src, srcoffset - src_autoinc);
11059 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11060 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11062 /* Either write out immediately, or delay until we've loaded the last
11063 byte, depending on interleave factor. */
11064 if (interleave_factor == 1)
11066 addr = plus_constant (dst, dstoffset - dst_autoinc);
11067 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11068 emit_insn (gen_unaligned_storehi (mem,
11069 gen_lowpart (HImode, halfword_tmp)));
11070 halfword_tmp = NULL;
11078 gcc_assert (remaining < 2);
11080 /* Copy last byte. */
11082 if ((remaining & 1) != 0)
11084 byte_tmp = gen_reg_rtx (SImode);
11086 addr = plus_constant (src, srcoffset - src_autoinc);
11087 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11088 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11090 if (interleave_factor == 1)
11092 addr = plus_constant (dst, dstoffset - dst_autoinc);
11093 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11094 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11103 /* Store last halfword if we haven't done so already. */
11107 addr = plus_constant (dst, dstoffset - dst_autoinc);
11108 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11109 emit_insn (gen_unaligned_storehi (mem,
11110 gen_lowpart (HImode, halfword_tmp)));
11114 /* Likewise for last byte. */
11118 addr = plus_constant (dst, dstoffset - dst_autoinc);
11119 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11120 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11124 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11127 /* From mips_adjust_block_mem:
11129 Helper function for doing a loop-based block operation on memory
11130 reference MEM. Each iteration of the loop will operate on LENGTH
11133 Create a new base register for use within the loop and point it to
11134 the start of MEM. Create a new memory reference that uses this
11135 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11138 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11141 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11143 /* Although the new mem does not refer to a known location,
11144 it does keep up to LENGTH bytes of alignment. */
11145 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11146 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11149 /* From mips_block_move_loop:
11151 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11152 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11153 the memory regions do not overlap. */
11156 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11157 unsigned int interleave_factor,
11158 HOST_WIDE_INT bytes_per_iter)
11160 rtx label, src_reg, dest_reg, final_src, test;
11161 HOST_WIDE_INT leftover;
11163 leftover = length % bytes_per_iter;
11164 length -= leftover;
11166 /* Create registers and memory references for use within the loop. */
11167 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11168 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11170 /* Calculate the value that SRC_REG should have after the last iteration of
11172 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11173 0, 0, OPTAB_WIDEN);
11175 /* Emit the start of the loop. */
11176 label = gen_label_rtx ();
11177 emit_label (label);
11179 /* Emit the loop body. */
11180 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11181 interleave_factor);
11183 /* Move on to the next block. */
11184 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11185 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11187 /* Emit the loop condition. */
11188 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11189 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11191 /* Mop up any left-over bytes. */
11193 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11196 /* Emit a block move when either the source or destination is unaligned (not
11197 aligned to a four-byte boundary). This may need further tuning depending on
11198 core type, optimize_size setting, etc. */
11201 arm_movmemqi_unaligned (rtx *operands)
11203 HOST_WIDE_INT length = INTVAL (operands[2]);
11207 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11208 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11209 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11210 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11211 or dst_aligned though: allow more interleaving in those cases since the
11212 resulting code can be smaller. */
11213 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11214 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11217 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11218 interleave_factor, bytes_per_iter);
11220 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11221 interleave_factor);
11225 /* Note that the loop created by arm_block_move_unaligned_loop may be
11226 subject to loop unrolling, which makes tuning this condition a little
11229 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11231 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11238 arm_gen_movmemqi (rtx *operands)
11240 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11241 HOST_WIDE_INT srcoffset, dstoffset;
11243 rtx src, dst, srcbase, dstbase;
11244 rtx part_bytes_reg = NULL;
11247 if (GET_CODE (operands[2]) != CONST_INT
11248 || GET_CODE (operands[3]) != CONST_INT
11249 || INTVAL (operands[2]) > 64)
11252 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11253 return arm_movmemqi_unaligned (operands);
11255 if (INTVAL (operands[3]) & 3)
11258 dstbase = operands[0];
11259 srcbase = operands[1];
11261 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11262 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11264 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11265 out_words_to_go = INTVAL (operands[2]) / 4;
11266 last_bytes = INTVAL (operands[2]) & 3;
11267 dstoffset = srcoffset = 0;
11269 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11270 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11272 for (i = 0; in_words_to_go >= 2; i+=4)
11274 if (in_words_to_go > 4)
11275 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11276 TRUE, srcbase, &srcoffset));
11278 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11279 src, FALSE, srcbase,
11282 if (out_words_to_go)
11284 if (out_words_to_go > 4)
11285 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11286 TRUE, dstbase, &dstoffset));
11287 else if (out_words_to_go != 1)
11288 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11289 out_words_to_go, dst,
11292 dstbase, &dstoffset));
11295 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11296 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11297 if (last_bytes != 0)
11299 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11305 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11306 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11309 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11310 if (out_words_to_go)
11314 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11315 sreg = copy_to_reg (mem);
11317 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11318 emit_move_insn (mem, sreg);
11321 gcc_assert (!in_words_to_go); /* Sanity check */
11324 if (in_words_to_go)
11326 gcc_assert (in_words_to_go > 0);
11328 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11329 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11332 gcc_assert (!last_bytes || part_bytes_reg);
11334 if (BYTES_BIG_ENDIAN && last_bytes)
11336 rtx tmp = gen_reg_rtx (SImode);
11338 /* The bytes we want are in the top end of the word. */
11339 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11340 GEN_INT (8 * (4 - last_bytes))));
11341 part_bytes_reg = tmp;
11345 mem = adjust_automodify_address (dstbase, QImode,
11346 plus_constant (dst, last_bytes - 1),
11347 dstoffset + last_bytes - 1);
11348 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11352 tmp = gen_reg_rtx (SImode);
11353 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11354 part_bytes_reg = tmp;
11361 if (last_bytes > 1)
11363 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11364 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11368 rtx tmp = gen_reg_rtx (SImode);
11369 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11370 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11371 part_bytes_reg = tmp;
11378 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11379 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11386 /* Select a dominance comparison mode if possible for a test of the general
11387 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11388 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11389 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11390 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11391 In all cases OP will be either EQ or NE, but we don't need to know which
11392 here. If we are unable to support a dominance comparison we return
11393 CC mode. This will then fail to match for the RTL expressions that
11394 generate this call. */
11396 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11398 enum rtx_code cond1, cond2;
11401 /* Currently we will probably get the wrong result if the individual
11402 comparisons are not simple. This also ensures that it is safe to
11403 reverse a comparison if necessary. */
11404 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11406 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11410 /* The if_then_else variant of this tests the second condition if the
11411 first passes, but is true if the first fails. Reverse the first
11412 condition to get a true "inclusive-or" expression. */
11413 if (cond_or == DOM_CC_NX_OR_Y)
11414 cond1 = reverse_condition (cond1);
11416 /* If the comparisons are not equal, and one doesn't dominate the other,
11417 then we can't do this. */
11419 && !comparison_dominates_p (cond1, cond2)
11420 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11425 enum rtx_code temp = cond1;
11433 if (cond_or == DOM_CC_X_AND_Y)
11438 case EQ: return CC_DEQmode;
11439 case LE: return CC_DLEmode;
11440 case LEU: return CC_DLEUmode;
11441 case GE: return CC_DGEmode;
11442 case GEU: return CC_DGEUmode;
11443 default: gcc_unreachable ();
11447 if (cond_or == DOM_CC_X_AND_Y)
11459 gcc_unreachable ();
11463 if (cond_or == DOM_CC_X_AND_Y)
11475 gcc_unreachable ();
11479 if (cond_or == DOM_CC_X_AND_Y)
11480 return CC_DLTUmode;
11485 return CC_DLTUmode;
11487 return CC_DLEUmode;
11491 gcc_unreachable ();
11495 if (cond_or == DOM_CC_X_AND_Y)
11496 return CC_DGTUmode;
11501 return CC_DGTUmode;
11503 return CC_DGEUmode;
11507 gcc_unreachable ();
11510 /* The remaining cases only occur when both comparisons are the
11513 gcc_assert (cond1 == cond2);
11517 gcc_assert (cond1 == cond2);
11521 gcc_assert (cond1 == cond2);
11525 gcc_assert (cond1 == cond2);
11526 return CC_DLEUmode;
11529 gcc_assert (cond1 == cond2);
11530 return CC_DGEUmode;
11533 gcc_unreachable ();
11538 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11540 /* All floating point compares return CCFP if it is an equality
11541 comparison, and CCFPE otherwise. */
11542 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11562 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11567 gcc_unreachable ();
11571 /* A compare with a shifted operand. Because of canonicalization, the
11572 comparison will have to be swapped when we emit the assembler. */
11573 if (GET_MODE (y) == SImode
11574 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11575 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11576 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11577 || GET_CODE (x) == ROTATERT))
11580 /* This operation is performed swapped, but since we only rely on the Z
11581 flag we don't need an additional mode. */
11582 if (GET_MODE (y) == SImode
11583 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11584 && GET_CODE (x) == NEG
11585 && (op == EQ || op == NE))
11588 /* This is a special case that is used by combine to allow a
11589 comparison of a shifted byte load to be split into a zero-extend
11590 followed by a comparison of the shifted integer (only valid for
11591 equalities and unsigned inequalities). */
11592 if (GET_MODE (x) == SImode
11593 && GET_CODE (x) == ASHIFT
11594 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11595 && GET_CODE (XEXP (x, 0)) == SUBREG
11596 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11597 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11598 && (op == EQ || op == NE
11599 || op == GEU || op == GTU || op == LTU || op == LEU)
11600 && GET_CODE (y) == CONST_INT)
11603 /* A construct for a conditional compare, if the false arm contains
11604 0, then both conditions must be true, otherwise either condition
11605 must be true. Not all conditions are possible, so CCmode is
11606 returned if it can't be done. */
11607 if (GET_CODE (x) == IF_THEN_ELSE
11608 && (XEXP (x, 2) == const0_rtx
11609 || XEXP (x, 2) == const1_rtx)
11610 && COMPARISON_P (XEXP (x, 0))
11611 && COMPARISON_P (XEXP (x, 1)))
11612 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11613 INTVAL (XEXP (x, 2)));
11615 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11616 if (GET_CODE (x) == AND
11617 && (op == EQ || op == NE)
11618 && COMPARISON_P (XEXP (x, 0))
11619 && COMPARISON_P (XEXP (x, 1)))
11620 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11623 if (GET_CODE (x) == IOR
11624 && (op == EQ || op == NE)
11625 && COMPARISON_P (XEXP (x, 0))
11626 && COMPARISON_P (XEXP (x, 1)))
11627 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11630 /* An operation (on Thumb) where we want to test for a single bit.
11631 This is done by shifting that bit up into the top bit of a
11632 scratch register; we can then branch on the sign bit. */
11634 && GET_MODE (x) == SImode
11635 && (op == EQ || op == NE)
11636 && GET_CODE (x) == ZERO_EXTRACT
11637 && XEXP (x, 1) == const1_rtx)
11640 /* An operation that sets the condition codes as a side-effect, the
11641 V flag is not set correctly, so we can only use comparisons where
11642 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11644 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11645 if (GET_MODE (x) == SImode
11647 && (op == EQ || op == NE || op == LT || op == GE)
11648 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11649 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11650 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11651 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11652 || GET_CODE (x) == LSHIFTRT
11653 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11654 || GET_CODE (x) == ROTATERT
11655 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11656 return CC_NOOVmode;
11658 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11661 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11662 && GET_CODE (x) == PLUS
11663 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11666 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11668 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11670 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11677 /* A DImode comparison against zero can be implemented by
11678 or'ing the two halves together. */
11679 if (y == const0_rtx)
11682 /* We can do an equality test in three Thumb instructions. */
11692 /* DImode unsigned comparisons can be implemented by cmp +
11693 cmpeq without a scratch register. Not worth doing in
11704 /* DImode signed and unsigned comparisons can be implemented
11705 by cmp + sbcs with a scratch register, but that does not
11706 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11707 gcc_assert (op != EQ && op != NE);
11711 gcc_unreachable ();
11718 /* X and Y are two things to compare using CODE. Emit the compare insn and
11719 return the rtx for register 0 in the proper mode. FP means this is a
11720 floating point compare: I don't think that it is needed on the arm. */
11722 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11724 enum machine_mode mode;
11726 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11728 /* We might have X as a constant, Y as a register because of the predicates
11729 used for cmpdi. If so, force X to a register here. */
11730 if (dimode_comparison && !REG_P (x))
11731 x = force_reg (DImode, x);
11733 mode = SELECT_CC_MODE (code, x, y);
11734 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11736 if (dimode_comparison
11737 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11738 && mode != CC_CZmode)
11742 /* To compare two non-zero values for equality, XOR them and
11743 then compare against zero. Not used for ARM mode; there
11744 CC_CZmode is cheaper. */
11745 if (mode == CC_Zmode && y != const0_rtx)
11747 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11750 /* A scratch register is required. */
11751 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11752 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11753 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11756 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11761 /* Generate a sequence of insns that will generate the correct return
11762 address mask depending on the physical architecture that the program
11765 arm_gen_return_addr_mask (void)
11767 rtx reg = gen_reg_rtx (Pmode);
11769 emit_insn (gen_return_addr_mask (reg));
11774 arm_reload_in_hi (rtx *operands)
11776 rtx ref = operands[1];
11778 HOST_WIDE_INT offset = 0;
11780 if (GET_CODE (ref) == SUBREG)
11782 offset = SUBREG_BYTE (ref);
11783 ref = SUBREG_REG (ref);
11786 if (GET_CODE (ref) == REG)
11788 /* We have a pseudo which has been spilt onto the stack; there
11789 are two cases here: the first where there is a simple
11790 stack-slot replacement and a second where the stack-slot is
11791 out of range, or is used as a subreg. */
11792 if (reg_equiv_mem (REGNO (ref)))
11794 ref = reg_equiv_mem (REGNO (ref));
11795 base = find_replacement (&XEXP (ref, 0));
11798 /* The slot is out of range, or was dressed up in a SUBREG. */
11799 base = reg_equiv_address (REGNO (ref));
11802 base = find_replacement (&XEXP (ref, 0));
11804 /* Handle the case where the address is too complex to be offset by 1. */
11805 if (GET_CODE (base) == MINUS
11806 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11808 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11810 emit_set_insn (base_plus, base);
11813 else if (GET_CODE (base) == PLUS)
11815 /* The addend must be CONST_INT, or we would have dealt with it above. */
11816 HOST_WIDE_INT hi, lo;
11818 offset += INTVAL (XEXP (base, 1));
11819 base = XEXP (base, 0);
11821 /* Rework the address into a legal sequence of insns. */
11822 /* Valid range for lo is -4095 -> 4095 */
11825 : -((-offset) & 0xfff));
11827 /* Corner case, if lo is the max offset then we would be out of range
11828 once we have added the additional 1 below, so bump the msb into the
11829 pre-loading insn(s). */
11833 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11834 ^ (HOST_WIDE_INT) 0x80000000)
11835 - (HOST_WIDE_INT) 0x80000000);
11837 gcc_assert (hi + lo == offset);
11841 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11843 /* Get the base address; addsi3 knows how to handle constants
11844 that require more than one insn. */
11845 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11851 /* Operands[2] may overlap operands[0] (though it won't overlap
11852 operands[1]), that's why we asked for a DImode reg -- so we can
11853 use the bit that does not overlap. */
11854 if (REGNO (operands[2]) == REGNO (operands[0]))
11855 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11857 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11859 emit_insn (gen_zero_extendqisi2 (scratch,
11860 gen_rtx_MEM (QImode,
11861 plus_constant (base,
11863 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11864 gen_rtx_MEM (QImode,
11865 plus_constant (base,
11867 if (!BYTES_BIG_ENDIAN)
11868 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11869 gen_rtx_IOR (SImode,
11872 gen_rtx_SUBREG (SImode, operands[0], 0),
11876 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11877 gen_rtx_IOR (SImode,
11878 gen_rtx_ASHIFT (SImode, scratch,
11880 gen_rtx_SUBREG (SImode, operands[0], 0)));
11883 /* Handle storing a half-word to memory during reload by synthesizing as two
11884 byte stores. Take care not to clobber the input values until after we
11885 have moved them somewhere safe. This code assumes that if the DImode
11886 scratch in operands[2] overlaps either the input value or output address
11887 in some way, then that value must die in this insn (we absolutely need
11888 two scratch registers for some corner cases). */
11890 arm_reload_out_hi (rtx *operands)
11892 rtx ref = operands[0];
11893 rtx outval = operands[1];
11895 HOST_WIDE_INT offset = 0;
11897 if (GET_CODE (ref) == SUBREG)
11899 offset = SUBREG_BYTE (ref);
11900 ref = SUBREG_REG (ref);
11903 if (GET_CODE (ref) == REG)
11905 /* We have a pseudo which has been spilt onto the stack; there
11906 are two cases here: the first where there is a simple
11907 stack-slot replacement and a second where the stack-slot is
11908 out of range, or is used as a subreg. */
11909 if (reg_equiv_mem (REGNO (ref)))
11911 ref = reg_equiv_mem (REGNO (ref));
11912 base = find_replacement (&XEXP (ref, 0));
11915 /* The slot is out of range, or was dressed up in a SUBREG. */
11916 base = reg_equiv_address (REGNO (ref));
11919 base = find_replacement (&XEXP (ref, 0));
11921 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11923 /* Handle the case where the address is too complex to be offset by 1. */
11924 if (GET_CODE (base) == MINUS
11925 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11927 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11929 /* Be careful not to destroy OUTVAL. */
11930 if (reg_overlap_mentioned_p (base_plus, outval))
11932 /* Updating base_plus might destroy outval, see if we can
11933 swap the scratch and base_plus. */
11934 if (!reg_overlap_mentioned_p (scratch, outval))
11937 scratch = base_plus;
11942 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11944 /* Be conservative and copy OUTVAL into the scratch now,
11945 this should only be necessary if outval is a subreg
11946 of something larger than a word. */
11947 /* XXX Might this clobber base? I can't see how it can,
11948 since scratch is known to overlap with OUTVAL, and
11949 must be wider than a word. */
11950 emit_insn (gen_movhi (scratch_hi, outval));
11951 outval = scratch_hi;
11955 emit_set_insn (base_plus, base);
11958 else if (GET_CODE (base) == PLUS)
11960 /* The addend must be CONST_INT, or we would have dealt with it above. */
11961 HOST_WIDE_INT hi, lo;
11963 offset += INTVAL (XEXP (base, 1));
11964 base = XEXP (base, 0);
11966 /* Rework the address into a legal sequence of insns. */
11967 /* Valid range for lo is -4095 -> 4095 */
11970 : -((-offset) & 0xfff));
11972 /* Corner case, if lo is the max offset then we would be out of range
11973 once we have added the additional 1 below, so bump the msb into the
11974 pre-loading insn(s). */
11978 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11979 ^ (HOST_WIDE_INT) 0x80000000)
11980 - (HOST_WIDE_INT) 0x80000000);
11982 gcc_assert (hi + lo == offset);
11986 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11988 /* Be careful not to destroy OUTVAL. */
11989 if (reg_overlap_mentioned_p (base_plus, outval))
11991 /* Updating base_plus might destroy outval, see if we
11992 can swap the scratch and base_plus. */
11993 if (!reg_overlap_mentioned_p (scratch, outval))
11996 scratch = base_plus;
12001 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12003 /* Be conservative and copy outval into scratch now,
12004 this should only be necessary if outval is a
12005 subreg of something larger than a word. */
12006 /* XXX Might this clobber base? I can't see how it
12007 can, since scratch is known to overlap with
12009 emit_insn (gen_movhi (scratch_hi, outval));
12010 outval = scratch_hi;
12014 /* Get the base address; addsi3 knows how to handle constants
12015 that require more than one insn. */
12016 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12022 if (BYTES_BIG_ENDIAN)
12024 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12025 plus_constant (base, offset + 1)),
12026 gen_lowpart (QImode, outval)));
12027 emit_insn (gen_lshrsi3 (scratch,
12028 gen_rtx_SUBREG (SImode, outval, 0),
12030 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12031 gen_lowpart (QImode, scratch)));
12035 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12036 gen_lowpart (QImode, outval)));
12037 emit_insn (gen_lshrsi3 (scratch,
12038 gen_rtx_SUBREG (SImode, outval, 0),
12040 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12041 plus_constant (base, offset + 1)),
12042 gen_lowpart (QImode, scratch)));
12046 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12047 (padded to the size of a word) should be passed in a register. */
12050 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12052 if (TARGET_AAPCS_BASED)
12053 return must_pass_in_stack_var_size (mode, type);
12055 return must_pass_in_stack_var_size_or_pad (mode, type);
12059 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12060 Return true if an argument passed on the stack should be padded upwards,
12061 i.e. if the least-significant byte has useful data.
12062 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12063 aggregate types are placed in the lowest memory address. */
12066 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12068 if (!TARGET_AAPCS_BASED)
12069 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12071 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12078 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12079 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12080 register has useful data, and return the opposite if the most
12081 significant byte does. */
12084 arm_pad_reg_upward (enum machine_mode mode,
12085 tree type, int first ATTRIBUTE_UNUSED)
12087 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12089 /* For AAPCS, small aggregates, small fixed-point types,
12090 and small complex types are always padded upwards. */
12093 if ((AGGREGATE_TYPE_P (type)
12094 || TREE_CODE (type) == COMPLEX_TYPE
12095 || FIXED_POINT_TYPE_P (type))
12096 && int_size_in_bytes (type) <= 4)
12101 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12102 && GET_MODE_SIZE (mode) <= 4)
12107 /* Otherwise, use default padding. */
12108 return !BYTES_BIG_ENDIAN;
12112 /* Print a symbolic form of X to the debug file, F. */
12114 arm_print_value (FILE *f, rtx x)
12116 switch (GET_CODE (x))
12119 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12123 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12131 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12133 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12134 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12142 fprintf (f, "\"%s\"", XSTR (x, 0));
12146 fprintf (f, "`%s'", XSTR (x, 0));
12150 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12154 arm_print_value (f, XEXP (x, 0));
12158 arm_print_value (f, XEXP (x, 0));
12160 arm_print_value (f, XEXP (x, 1));
12168 fprintf (f, "????");
12173 /* Routines for manipulation of the constant pool. */
12175 /* Arm instructions cannot load a large constant directly into a
12176 register; they have to come from a pc relative load. The constant
12177 must therefore be placed in the addressable range of the pc
12178 relative load. Depending on the precise pc relative load
12179 instruction the range is somewhere between 256 bytes and 4k. This
12180 means that we often have to dump a constant inside a function, and
12181 generate code to branch around it.
12183 It is important to minimize this, since the branches will slow
12184 things down and make the code larger.
12186 Normally we can hide the table after an existing unconditional
12187 branch so that there is no interruption of the flow, but in the
12188 worst case the code looks like this:
12206 We fix this by performing a scan after scheduling, which notices
12207 which instructions need to have their operands fetched from the
12208 constant table and builds the table.
12210 The algorithm starts by building a table of all the constants that
12211 need fixing up and all the natural barriers in the function (places
12212 where a constant table can be dropped without breaking the flow).
12213 For each fixup we note how far the pc-relative replacement will be
12214 able to reach and the offset of the instruction into the function.
12216 Having built the table we then group the fixes together to form
12217 tables that are as large as possible (subject to addressing
12218 constraints) and emit each table of constants after the last
12219 barrier that is within range of all the instructions in the group.
12220 If a group does not contain a barrier, then we forcibly create one
12221 by inserting a jump instruction into the flow. Once the table has
12222 been inserted, the insns are then modified to reference the
12223 relevant entry in the pool.
12225 Possible enhancements to the algorithm (not implemented) are:
12227 1) For some processors and object formats, there may be benefit in
12228 aligning the pools to the start of cache lines; this alignment
12229 would need to be taken into account when calculating addressability
12232 /* These typedefs are located at the start of this file, so that
12233 they can be used in the prototypes there. This comment is to
12234 remind readers of that fact so that the following structures
12235 can be understood more easily.
12237 typedef struct minipool_node Mnode;
12238 typedef struct minipool_fixup Mfix; */
12240 struct minipool_node
12242 /* Doubly linked chain of entries. */
12245 /* The maximum offset into the code that this entry can be placed. While
12246 pushing fixes for forward references, all entries are sorted in order
12247 of increasing max_address. */
12248 HOST_WIDE_INT max_address;
12249 /* Similarly for an entry inserted for a backwards ref. */
12250 HOST_WIDE_INT min_address;
12251 /* The number of fixes referencing this entry. This can become zero
12252 if we "unpush" an entry. In this case we ignore the entry when we
12253 come to emit the code. */
12255 /* The offset from the start of the minipool. */
12256 HOST_WIDE_INT offset;
12257 /* The value in table. */
12259 /* The mode of value. */
12260 enum machine_mode mode;
12261 /* The size of the value. With iWMMXt enabled
12262 sizes > 4 also imply an alignment of 8-bytes. */
12266 struct minipool_fixup
12270 HOST_WIDE_INT address;
12272 enum machine_mode mode;
12276 HOST_WIDE_INT forwards;
12277 HOST_WIDE_INT backwards;
12280 /* Fixes less than a word need padding out to a word boundary. */
12281 #define MINIPOOL_FIX_SIZE(mode) \
12282 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12284 static Mnode * minipool_vector_head;
12285 static Mnode * minipool_vector_tail;
12286 static rtx minipool_vector_label;
12287 static int minipool_pad;
12289 /* The linked list of all minipool fixes required for this function. */
12290 Mfix * minipool_fix_head;
12291 Mfix * minipool_fix_tail;
12292 /* The fix entry for the current minipool, once it has been placed. */
12293 Mfix * minipool_barrier;
12295 /* Determines if INSN is the start of a jump table. Returns the end
12296 of the TABLE or NULL_RTX. */
12298 is_jump_table (rtx insn)
12302 if (jump_to_label_p (insn)
12303 && ((table = next_real_insn (JUMP_LABEL (insn)))
12304 == next_real_insn (insn))
12306 && GET_CODE (table) == JUMP_INSN
12307 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12308 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12314 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12315 #define JUMP_TABLES_IN_TEXT_SECTION 0
12318 static HOST_WIDE_INT
12319 get_jump_table_size (rtx insn)
12321 /* ADDR_VECs only take room if read-only data does into the text
12323 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12325 rtx body = PATTERN (insn);
12326 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12327 HOST_WIDE_INT size;
12328 HOST_WIDE_INT modesize;
12330 modesize = GET_MODE_SIZE (GET_MODE (body));
12331 size = modesize * XVECLEN (body, elt);
12335 /* Round up size of TBB table to a halfword boundary. */
12336 size = (size + 1) & ~(HOST_WIDE_INT)1;
12339 /* No padding necessary for TBH. */
12342 /* Add two bytes for alignment on Thumb. */
12347 gcc_unreachable ();
12355 /* Return the maximum amount of padding that will be inserted before
12358 static HOST_WIDE_INT
12359 get_label_padding (rtx label)
12361 HOST_WIDE_INT align, min_insn_size;
12363 align = 1 << label_to_alignment (label);
12364 min_insn_size = TARGET_THUMB ? 2 : 4;
12365 return align > min_insn_size ? align - min_insn_size : 0;
12368 /* Move a minipool fix MP from its current location to before MAX_MP.
12369 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12370 constraints may need updating. */
12372 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12373 HOST_WIDE_INT max_address)
12375 /* The code below assumes these are different. */
12376 gcc_assert (mp != max_mp);
12378 if (max_mp == NULL)
12380 if (max_address < mp->max_address)
12381 mp->max_address = max_address;
12385 if (max_address > max_mp->max_address - mp->fix_size)
12386 mp->max_address = max_mp->max_address - mp->fix_size;
12388 mp->max_address = max_address;
12390 /* Unlink MP from its current position. Since max_mp is non-null,
12391 mp->prev must be non-null. */
12392 mp->prev->next = mp->next;
12393 if (mp->next != NULL)
12394 mp->next->prev = mp->prev;
12396 minipool_vector_tail = mp->prev;
12398 /* Re-insert it before MAX_MP. */
12400 mp->prev = max_mp->prev;
12403 if (mp->prev != NULL)
12404 mp->prev->next = mp;
12406 minipool_vector_head = mp;
12409 /* Save the new entry. */
12412 /* Scan over the preceding entries and adjust their addresses as
12414 while (mp->prev != NULL
12415 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12417 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12424 /* Add a constant to the minipool for a forward reference. Returns the
12425 node added or NULL if the constant will not fit in this pool. */
12427 add_minipool_forward_ref (Mfix *fix)
12429 /* If set, max_mp is the first pool_entry that has a lower
12430 constraint than the one we are trying to add. */
12431 Mnode * max_mp = NULL;
12432 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12435 /* If the minipool starts before the end of FIX->INSN then this FIX
12436 can not be placed into the current pool. Furthermore, adding the
12437 new constant pool entry may cause the pool to start FIX_SIZE bytes
12439 if (minipool_vector_head &&
12440 (fix->address + get_attr_length (fix->insn)
12441 >= minipool_vector_head->max_address - fix->fix_size))
12444 /* Scan the pool to see if a constant with the same value has
12445 already been added. While we are doing this, also note the
12446 location where we must insert the constant if it doesn't already
12448 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12450 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12451 && fix->mode == mp->mode
12452 && (GET_CODE (fix->value) != CODE_LABEL
12453 || (CODE_LABEL_NUMBER (fix->value)
12454 == CODE_LABEL_NUMBER (mp->value)))
12455 && rtx_equal_p (fix->value, mp->value))
12457 /* More than one fix references this entry. */
12459 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12462 /* Note the insertion point if necessary. */
12464 && mp->max_address > max_address)
12467 /* If we are inserting an 8-bytes aligned quantity and
12468 we have not already found an insertion point, then
12469 make sure that all such 8-byte aligned quantities are
12470 placed at the start of the pool. */
12471 if (ARM_DOUBLEWORD_ALIGN
12473 && fix->fix_size >= 8
12474 && mp->fix_size < 8)
12477 max_address = mp->max_address;
12481 /* The value is not currently in the minipool, so we need to create
12482 a new entry for it. If MAX_MP is NULL, the entry will be put on
12483 the end of the list since the placement is less constrained than
12484 any existing entry. Otherwise, we insert the new fix before
12485 MAX_MP and, if necessary, adjust the constraints on the other
12488 mp->fix_size = fix->fix_size;
12489 mp->mode = fix->mode;
12490 mp->value = fix->value;
12492 /* Not yet required for a backwards ref. */
12493 mp->min_address = -65536;
12495 if (max_mp == NULL)
12497 mp->max_address = max_address;
12499 mp->prev = minipool_vector_tail;
12501 if (mp->prev == NULL)
12503 minipool_vector_head = mp;
12504 minipool_vector_label = gen_label_rtx ();
12507 mp->prev->next = mp;
12509 minipool_vector_tail = mp;
12513 if (max_address > max_mp->max_address - mp->fix_size)
12514 mp->max_address = max_mp->max_address - mp->fix_size;
12516 mp->max_address = max_address;
12519 mp->prev = max_mp->prev;
12521 if (mp->prev != NULL)
12522 mp->prev->next = mp;
12524 minipool_vector_head = mp;
12527 /* Save the new entry. */
12530 /* Scan over the preceding entries and adjust their addresses as
12532 while (mp->prev != NULL
12533 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12535 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12543 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12544 HOST_WIDE_INT min_address)
12546 HOST_WIDE_INT offset;
12548 /* The code below assumes these are different. */
12549 gcc_assert (mp != min_mp);
12551 if (min_mp == NULL)
12553 if (min_address > mp->min_address)
12554 mp->min_address = min_address;
12558 /* We will adjust this below if it is too loose. */
12559 mp->min_address = min_address;
12561 /* Unlink MP from its current position. Since min_mp is non-null,
12562 mp->next must be non-null. */
12563 mp->next->prev = mp->prev;
12564 if (mp->prev != NULL)
12565 mp->prev->next = mp->next;
12567 minipool_vector_head = mp->next;
12569 /* Reinsert it after MIN_MP. */
12571 mp->next = min_mp->next;
12573 if (mp->next != NULL)
12574 mp->next->prev = mp;
12576 minipool_vector_tail = mp;
12582 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12584 mp->offset = offset;
12585 if (mp->refcount > 0)
12586 offset += mp->fix_size;
12588 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12589 mp->next->min_address = mp->min_address + mp->fix_size;
12595 /* Add a constant to the minipool for a backward reference. Returns the
12596 node added or NULL if the constant will not fit in this pool.
12598 Note that the code for insertion for a backwards reference can be
12599 somewhat confusing because the calculated offsets for each fix do
12600 not take into account the size of the pool (which is still under
12603 add_minipool_backward_ref (Mfix *fix)
12605 /* If set, min_mp is the last pool_entry that has a lower constraint
12606 than the one we are trying to add. */
12607 Mnode *min_mp = NULL;
12608 /* This can be negative, since it is only a constraint. */
12609 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12612 /* If we can't reach the current pool from this insn, or if we can't
12613 insert this entry at the end of the pool without pushing other
12614 fixes out of range, then we don't try. This ensures that we
12615 can't fail later on. */
12616 if (min_address >= minipool_barrier->address
12617 || (minipool_vector_tail->min_address + fix->fix_size
12618 >= minipool_barrier->address))
12621 /* Scan the pool to see if a constant with the same value has
12622 already been added. While we are doing this, also note the
12623 location where we must insert the constant if it doesn't already
12625 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12627 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12628 && fix->mode == mp->mode
12629 && (GET_CODE (fix->value) != CODE_LABEL
12630 || (CODE_LABEL_NUMBER (fix->value)
12631 == CODE_LABEL_NUMBER (mp->value)))
12632 && rtx_equal_p (fix->value, mp->value)
12633 /* Check that there is enough slack to move this entry to the
12634 end of the table (this is conservative). */
12635 && (mp->max_address
12636 > (minipool_barrier->address
12637 + minipool_vector_tail->offset
12638 + minipool_vector_tail->fix_size)))
12641 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12644 if (min_mp != NULL)
12645 mp->min_address += fix->fix_size;
12648 /* Note the insertion point if necessary. */
12649 if (mp->min_address < min_address)
12651 /* For now, we do not allow the insertion of 8-byte alignment
12652 requiring nodes anywhere but at the start of the pool. */
12653 if (ARM_DOUBLEWORD_ALIGN
12654 && fix->fix_size >= 8 && mp->fix_size < 8)
12659 else if (mp->max_address
12660 < minipool_barrier->address + mp->offset + fix->fix_size)
12662 /* Inserting before this entry would push the fix beyond
12663 its maximum address (which can happen if we have
12664 re-located a forwards fix); force the new fix to come
12666 if (ARM_DOUBLEWORD_ALIGN
12667 && fix->fix_size >= 8 && mp->fix_size < 8)
12672 min_address = mp->min_address + fix->fix_size;
12675 /* Do not insert a non-8-byte aligned quantity before 8-byte
12676 aligned quantities. */
12677 else if (ARM_DOUBLEWORD_ALIGN
12678 && fix->fix_size < 8
12679 && mp->fix_size >= 8)
12682 min_address = mp->min_address + fix->fix_size;
12687 /* We need to create a new entry. */
12689 mp->fix_size = fix->fix_size;
12690 mp->mode = fix->mode;
12691 mp->value = fix->value;
12693 mp->max_address = minipool_barrier->address + 65536;
12695 mp->min_address = min_address;
12697 if (min_mp == NULL)
12700 mp->next = minipool_vector_head;
12702 if (mp->next == NULL)
12704 minipool_vector_tail = mp;
12705 minipool_vector_label = gen_label_rtx ();
12708 mp->next->prev = mp;
12710 minipool_vector_head = mp;
12714 mp->next = min_mp->next;
12718 if (mp->next != NULL)
12719 mp->next->prev = mp;
12721 minipool_vector_tail = mp;
12724 /* Save the new entry. */
12732 /* Scan over the following entries and adjust their offsets. */
12733 while (mp->next != NULL)
12735 if (mp->next->min_address < mp->min_address + mp->fix_size)
12736 mp->next->min_address = mp->min_address + mp->fix_size;
12739 mp->next->offset = mp->offset + mp->fix_size;
12741 mp->next->offset = mp->offset;
12750 assign_minipool_offsets (Mfix *barrier)
12752 HOST_WIDE_INT offset = 0;
12755 minipool_barrier = barrier;
12757 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12759 mp->offset = offset;
12761 if (mp->refcount > 0)
12762 offset += mp->fix_size;
12766 /* Output the literal table */
12768 dump_minipool (rtx scan)
12774 if (ARM_DOUBLEWORD_ALIGN)
12775 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12776 if (mp->refcount > 0 && mp->fix_size >= 8)
12783 fprintf (dump_file,
12784 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12785 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12787 scan = emit_label_after (gen_label_rtx (), scan);
12788 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12789 scan = emit_label_after (minipool_vector_label, scan);
12791 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12793 if (mp->refcount > 0)
12797 fprintf (dump_file,
12798 ";; Offset %u, min %ld, max %ld ",
12799 (unsigned) mp->offset, (unsigned long) mp->min_address,
12800 (unsigned long) mp->max_address);
12801 arm_print_value (dump_file, mp->value);
12802 fputc ('\n', dump_file);
12805 switch (mp->fix_size)
12807 #ifdef HAVE_consttable_1
12809 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12813 #ifdef HAVE_consttable_2
12815 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12819 #ifdef HAVE_consttable_4
12821 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12825 #ifdef HAVE_consttable_8
12827 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12831 #ifdef HAVE_consttable_16
12833 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12838 gcc_unreachable ();
12846 minipool_vector_head = minipool_vector_tail = NULL;
12847 scan = emit_insn_after (gen_consttable_end (), scan);
12848 scan = emit_barrier_after (scan);
12851 /* Return the cost of forcibly inserting a barrier after INSN. */
12853 arm_barrier_cost (rtx insn)
12855 /* Basing the location of the pool on the loop depth is preferable,
12856 but at the moment, the basic block information seems to be
12857 corrupt by this stage of the compilation. */
12858 int base_cost = 50;
12859 rtx next = next_nonnote_insn (insn);
12861 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12864 switch (GET_CODE (insn))
12867 /* It will always be better to place the table before the label, rather
12876 return base_cost - 10;
12879 return base_cost + 10;
12883 /* Find the best place in the insn stream in the range
12884 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12885 Create the barrier by inserting a jump and add a new fix entry for
12888 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12890 HOST_WIDE_INT count = 0;
12892 rtx from = fix->insn;
12893 /* The instruction after which we will insert the jump. */
12894 rtx selected = NULL;
12896 /* The address at which the jump instruction will be placed. */
12897 HOST_WIDE_INT selected_address;
12899 HOST_WIDE_INT max_count = max_address - fix->address;
12900 rtx label = gen_label_rtx ();
12902 selected_cost = arm_barrier_cost (from);
12903 selected_address = fix->address;
12905 while (from && count < max_count)
12910 /* This code shouldn't have been called if there was a natural barrier
12912 gcc_assert (GET_CODE (from) != BARRIER);
12914 /* Count the length of this insn. This must stay in sync with the
12915 code that pushes minipool fixes. */
12916 if (LABEL_P (from))
12917 count += get_label_padding (from);
12919 count += get_attr_length (from);
12921 /* If there is a jump table, add its length. */
12922 tmp = is_jump_table (from);
12925 count += get_jump_table_size (tmp);
12927 /* Jump tables aren't in a basic block, so base the cost on
12928 the dispatch insn. If we select this location, we will
12929 still put the pool after the table. */
12930 new_cost = arm_barrier_cost (from);
12932 if (count < max_count
12933 && (!selected || new_cost <= selected_cost))
12936 selected_cost = new_cost;
12937 selected_address = fix->address + count;
12940 /* Continue after the dispatch table. */
12941 from = NEXT_INSN (tmp);
12945 new_cost = arm_barrier_cost (from);
12947 if (count < max_count
12948 && (!selected || new_cost <= selected_cost))
12951 selected_cost = new_cost;
12952 selected_address = fix->address + count;
12955 from = NEXT_INSN (from);
12958 /* Make sure that we found a place to insert the jump. */
12959 gcc_assert (selected);
12961 /* Make sure we do not split a call and its corresponding
12962 CALL_ARG_LOCATION note. */
12963 if (CALL_P (selected))
12965 rtx next = NEXT_INSN (selected);
12966 if (next && NOTE_P (next)
12967 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12971 /* Create a new JUMP_INSN that branches around a barrier. */
12972 from = emit_jump_insn_after (gen_jump (label), selected);
12973 JUMP_LABEL (from) = label;
12974 barrier = emit_barrier_after (from);
12975 emit_label_after (label, barrier);
12977 /* Create a minipool barrier entry for the new barrier. */
12978 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12979 new_fix->insn = barrier;
12980 new_fix->address = selected_address;
12981 new_fix->next = fix->next;
12982 fix->next = new_fix;
12987 /* Record that there is a natural barrier in the insn stream at
12990 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12992 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12995 fix->address = address;
12998 if (minipool_fix_head != NULL)
12999 minipool_fix_tail->next = fix;
13001 minipool_fix_head = fix;
13003 minipool_fix_tail = fix;
13006 /* Record INSN, which will need fixing up to load a value from the
13007 minipool. ADDRESS is the offset of the insn since the start of the
13008 function; LOC is a pointer to the part of the insn which requires
13009 fixing; VALUE is the constant that must be loaded, which is of type
13012 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13013 enum machine_mode mode, rtx value)
13015 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13018 fix->address = address;
13021 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13022 fix->value = value;
13023 fix->forwards = get_attr_pool_range (insn);
13024 fix->backwards = get_attr_neg_pool_range (insn);
13025 fix->minipool = NULL;
13027 /* If an insn doesn't have a range defined for it, then it isn't
13028 expecting to be reworked by this code. Better to stop now than
13029 to generate duff assembly code. */
13030 gcc_assert (fix->forwards || fix->backwards);
13032 /* If an entry requires 8-byte alignment then assume all constant pools
13033 require 4 bytes of padding. Trying to do this later on a per-pool
13034 basis is awkward because existing pool entries have to be modified. */
13035 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13040 fprintf (dump_file,
13041 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13042 GET_MODE_NAME (mode),
13043 INSN_UID (insn), (unsigned long) address,
13044 -1 * (long)fix->backwards, (long)fix->forwards);
13045 arm_print_value (dump_file, fix->value);
13046 fprintf (dump_file, "\n");
13049 /* Add it to the chain of fixes. */
13052 if (minipool_fix_head != NULL)
13053 minipool_fix_tail->next = fix;
13055 minipool_fix_head = fix;
13057 minipool_fix_tail = fix;
13060 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13061 Returns the number of insns needed, or 99 if we don't know how to
13064 arm_const_double_inline_cost (rtx val)
13066 rtx lowpart, highpart;
13067 enum machine_mode mode;
13069 mode = GET_MODE (val);
13071 if (mode == VOIDmode)
13074 gcc_assert (GET_MODE_SIZE (mode) == 8);
13076 lowpart = gen_lowpart (SImode, val);
13077 highpart = gen_highpart_mode (SImode, mode, val);
13079 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13080 gcc_assert (GET_CODE (highpart) == CONST_INT);
13082 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13083 NULL_RTX, NULL_RTX, 0, 0)
13084 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13085 NULL_RTX, NULL_RTX, 0, 0));
13088 /* Return true if it is worthwhile to split a 64-bit constant into two
13089 32-bit operations. This is the case if optimizing for size, or
13090 if we have load delay slots, or if one 32-bit part can be done with
13091 a single data operation. */
13093 arm_const_double_by_parts (rtx val)
13095 enum machine_mode mode = GET_MODE (val);
13098 if (optimize_size || arm_ld_sched)
13101 if (mode == VOIDmode)
13104 part = gen_highpart_mode (SImode, mode, val);
13106 gcc_assert (GET_CODE (part) == CONST_INT);
13108 if (const_ok_for_arm (INTVAL (part))
13109 || const_ok_for_arm (~INTVAL (part)))
13112 part = gen_lowpart (SImode, val);
13114 gcc_assert (GET_CODE (part) == CONST_INT);
13116 if (const_ok_for_arm (INTVAL (part))
13117 || const_ok_for_arm (~INTVAL (part)))
13123 /* Return true if it is possible to inline both the high and low parts
13124 of a 64-bit constant into 32-bit data processing instructions. */
13126 arm_const_double_by_immediates (rtx val)
13128 enum machine_mode mode = GET_MODE (val);
13131 if (mode == VOIDmode)
13134 part = gen_highpart_mode (SImode, mode, val);
13136 gcc_assert (GET_CODE (part) == CONST_INT);
13138 if (!const_ok_for_arm (INTVAL (part)))
13141 part = gen_lowpart (SImode, val);
13143 gcc_assert (GET_CODE (part) == CONST_INT);
13145 if (!const_ok_for_arm (INTVAL (part)))
13151 /* Scan INSN and note any of its operands that need fixing.
13152 If DO_PUSHES is false we do not actually push any of the fixups
13153 needed. The function returns TRUE if any fixups were needed/pushed.
13154 This is used by arm_memory_load_p() which needs to know about loads
13155 of constants that will be converted into minipool loads. */
13157 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13159 bool result = false;
13162 extract_insn (insn);
13164 if (!constrain_operands (1))
13165 fatal_insn_not_found (insn);
13167 if (recog_data.n_alternatives == 0)
13170 /* Fill in recog_op_alt with information about the constraints of
13172 preprocess_constraints ();
13174 for (opno = 0; opno < recog_data.n_operands; opno++)
13176 /* Things we need to fix can only occur in inputs. */
13177 if (recog_data.operand_type[opno] != OP_IN)
13180 /* If this alternative is a memory reference, then any mention
13181 of constants in this alternative is really to fool reload
13182 into allowing us to accept one there. We need to fix them up
13183 now so that we output the right code. */
13184 if (recog_op_alt[opno][which_alternative].memory_ok)
13186 rtx op = recog_data.operand[opno];
13188 if (CONSTANT_P (op))
13191 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13192 recog_data.operand_mode[opno], op);
13195 else if (GET_CODE (op) == MEM
13196 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13197 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13201 rtx cop = avoid_constant_pool_reference (op);
13203 /* Casting the address of something to a mode narrower
13204 than a word can cause avoid_constant_pool_reference()
13205 to return the pool reference itself. That's no good to
13206 us here. Lets just hope that we can use the
13207 constant pool value directly. */
13209 cop = get_pool_constant (XEXP (op, 0));
13211 push_minipool_fix (insn, address,
13212 recog_data.operand_loc[opno],
13213 recog_data.operand_mode[opno], cop);
13224 /* Convert instructions to their cc-clobbering variant if possible, since
13225 that allows us to use smaller encodings. */
13228 thumb2_reorg (void)
13233 INIT_REG_SET (&live);
13235 /* We are freeing block_for_insn in the toplev to keep compatibility
13236 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13237 compute_bb_for_insn ();
13244 COPY_REG_SET (&live, DF_LR_OUT (bb));
13245 df_simulate_initialize_backwards (bb, &live);
13246 FOR_BB_INSNS_REVERSE (bb, insn)
13248 if (NONJUMP_INSN_P (insn)
13249 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13251 rtx pat = PATTERN (insn);
13252 if (GET_CODE (pat) == SET
13253 && low_register_operand (XEXP (pat, 0), SImode)
13254 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13255 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13256 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13258 rtx dst = XEXP (pat, 0);
13259 rtx src = XEXP (pat, 1);
13260 rtx op0 = XEXP (src, 0);
13261 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13262 ? XEXP (src, 1) : NULL);
13264 if (rtx_equal_p (dst, op0)
13265 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13267 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13268 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13269 rtvec vec = gen_rtvec (2, pat, clobber);
13271 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13272 INSN_CODE (insn) = -1;
13274 /* We can also handle a commutative operation where the
13275 second operand matches the destination. */
13276 else if (op1 && rtx_equal_p (dst, op1))
13278 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13279 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13282 src = copy_rtx (src);
13283 XEXP (src, 0) = op1;
13284 XEXP (src, 1) = op0;
13285 pat = gen_rtx_SET (VOIDmode, dst, src);
13286 vec = gen_rtvec (2, pat, clobber);
13287 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13288 INSN_CODE (insn) = -1;
13293 if (NONDEBUG_INSN_P (insn))
13294 df_simulate_one_insn_backwards (bb, insn, &live);
13298 CLEAR_REG_SET (&live);
13301 /* Gcc puts the pool in the wrong place for ARM, since we can only
13302 load addresses a limited distance around the pc. We do some
13303 special munging to move the constant pool values to the correct
13304 point in the code. */
13309 HOST_WIDE_INT address = 0;
13315 minipool_fix_head = minipool_fix_tail = NULL;
13317 /* The first insn must always be a note, or the code below won't
13318 scan it properly. */
13319 insn = get_insns ();
13320 gcc_assert (GET_CODE (insn) == NOTE);
13323 /* Scan all the insns and record the operands that will need fixing. */
13324 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13326 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13327 && (arm_cirrus_insn_p (insn)
13328 || GET_CODE (insn) == JUMP_INSN
13329 || arm_memory_load_p (insn)))
13330 cirrus_reorg (insn);
13332 if (GET_CODE (insn) == BARRIER)
13333 push_minipool_barrier (insn, address);
13334 else if (INSN_P (insn))
13338 note_invalid_constants (insn, address, true);
13339 address += get_attr_length (insn);
13341 /* If the insn is a vector jump, add the size of the table
13342 and skip the table. */
13343 if ((table = is_jump_table (insn)) != NULL)
13345 address += get_jump_table_size (table);
13349 else if (LABEL_P (insn))
13350 /* Add the worst-case padding due to alignment. We don't add
13351 the _current_ padding because the minipool insertions
13352 themselves might change it. */
13353 address += get_label_padding (insn);
13356 fix = minipool_fix_head;
13358 /* Now scan the fixups and perform the required changes. */
13363 Mfix * last_added_fix;
13364 Mfix * last_barrier = NULL;
13367 /* Skip any further barriers before the next fix. */
13368 while (fix && GET_CODE (fix->insn) == BARRIER)
13371 /* No more fixes. */
13375 last_added_fix = NULL;
13377 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13379 if (GET_CODE (ftmp->insn) == BARRIER)
13381 if (ftmp->address >= minipool_vector_head->max_address)
13384 last_barrier = ftmp;
13386 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13389 last_added_fix = ftmp; /* Keep track of the last fix added. */
13392 /* If we found a barrier, drop back to that; any fixes that we
13393 could have reached but come after the barrier will now go in
13394 the next mini-pool. */
13395 if (last_barrier != NULL)
13397 /* Reduce the refcount for those fixes that won't go into this
13399 for (fdel = last_barrier->next;
13400 fdel && fdel != ftmp;
13403 fdel->minipool->refcount--;
13404 fdel->minipool = NULL;
13407 ftmp = last_barrier;
13411 /* ftmp is first fix that we can't fit into this pool and
13412 there no natural barriers that we could use. Insert a
13413 new barrier in the code somewhere between the previous
13414 fix and this one, and arrange to jump around it. */
13415 HOST_WIDE_INT max_address;
13417 /* The last item on the list of fixes must be a barrier, so
13418 we can never run off the end of the list of fixes without
13419 last_barrier being set. */
13422 max_address = minipool_vector_head->max_address;
13423 /* Check that there isn't another fix that is in range that
13424 we couldn't fit into this pool because the pool was
13425 already too large: we need to put the pool before such an
13426 instruction. The pool itself may come just after the
13427 fix because create_fix_barrier also allows space for a
13428 jump instruction. */
13429 if (ftmp->address < max_address)
13430 max_address = ftmp->address + 1;
13432 last_barrier = create_fix_barrier (last_added_fix, max_address);
13435 assign_minipool_offsets (last_barrier);
13439 if (GET_CODE (ftmp->insn) != BARRIER
13440 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13447 /* Scan over the fixes we have identified for this pool, fixing them
13448 up and adding the constants to the pool itself. */
13449 for (this_fix = fix; this_fix && ftmp != this_fix;
13450 this_fix = this_fix->next)
13451 if (GET_CODE (this_fix->insn) != BARRIER)
13454 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13455 minipool_vector_label),
13456 this_fix->minipool->offset);
13457 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13460 dump_minipool (last_barrier->insn);
13464 /* From now on we must synthesize any constants that we can't handle
13465 directly. This can happen if the RTL gets split during final
13466 instruction generation. */
13467 after_arm_reorg = 1;
13469 /* Free the minipool memory. */
13470 obstack_free (&minipool_obstack, minipool_startobj);
13473 /* Routines to output assembly language. */
13475 /* If the rtx is the correct value then return the string of the number.
13476 In this way we can ensure that valid double constants are generated even
13477 when cross compiling. */
13479 fp_immediate_constant (rtx x)
13484 if (!fp_consts_inited)
13487 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13488 for (i = 0; i < 8; i++)
13489 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13490 return strings_fp[i];
13492 gcc_unreachable ();
13495 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13496 static const char *
13497 fp_const_from_val (REAL_VALUE_TYPE *r)
13501 if (!fp_consts_inited)
13504 for (i = 0; i < 8; i++)
13505 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13506 return strings_fp[i];
13508 gcc_unreachable ();
13511 /* Output the operands of a LDM/STM instruction to STREAM.
13512 MASK is the ARM register set mask of which only bits 0-15 are important.
13513 REG is the base register, either the frame pointer or the stack pointer,
13514 INSTR is the possibly suffixed load or store instruction.
13515 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13518 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13519 unsigned long mask, int rfe)
13522 bool not_first = FALSE;
13524 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13525 fputc ('\t', stream);
13526 asm_fprintf (stream, instr, reg);
13527 fputc ('{', stream);
13529 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13530 if (mask & (1 << i))
13533 fprintf (stream, ", ");
13535 asm_fprintf (stream, "%r", i);
13540 fprintf (stream, "}^\n");
13542 fprintf (stream, "}\n");
13546 /* Output a FLDMD instruction to STREAM.
13547 BASE if the register containing the address.
13548 REG and COUNT specify the register range.
13549 Extra registers may be added to avoid hardware bugs.
13551 We output FLDMD even for ARMv5 VFP implementations. Although
13552 FLDMD is technically not supported until ARMv6, it is believed
13553 that all VFP implementations support its use in this context. */
13556 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13560 /* Workaround ARM10 VFPr1 bug. */
13561 if (count == 2 && !arm_arch6)
13568 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13569 load into multiple parts if we have to handle more than 16 registers. */
13572 vfp_output_fldmd (stream, base, reg, 16);
13573 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13577 fputc ('\t', stream);
13578 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13580 for (i = reg; i < reg + count; i++)
13583 fputs (", ", stream);
13584 asm_fprintf (stream, "d%d", i);
13586 fputs ("}\n", stream);
13591 /* Output the assembly for a store multiple. */
13594 vfp_output_fstmd (rtx * operands)
13601 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13602 p = strlen (pattern);
13604 gcc_assert (GET_CODE (operands[1]) == REG);
13606 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13607 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13609 p += sprintf (&pattern[p], ", d%d", base + i);
13611 strcpy (&pattern[p], "}");
13613 output_asm_insn (pattern, operands);
13618 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13619 number of bytes pushed. */
13622 vfp_emit_fstmd (int base_reg, int count)
13629 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13630 register pairs are stored by a store multiple insn. We avoid this
13631 by pushing an extra pair. */
13632 if (count == 2 && !arm_arch6)
13634 if (base_reg == LAST_VFP_REGNUM - 3)
13639 /* FSTMD may not store more than 16 doubleword registers at once. Split
13640 larger stores into multiple parts (up to a maximum of two, in
13645 /* NOTE: base_reg is an internal register number, so each D register
13647 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13648 saved += vfp_emit_fstmd (base_reg, 16);
13652 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13653 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13655 reg = gen_rtx_REG (DFmode, base_reg);
13658 XVECEXP (par, 0, 0)
13659 = gen_rtx_SET (VOIDmode,
13662 gen_rtx_PRE_MODIFY (Pmode,
13665 (stack_pointer_rtx,
13668 gen_rtx_UNSPEC (BLKmode,
13669 gen_rtvec (1, reg),
13670 UNSPEC_PUSH_MULT));
13672 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13673 plus_constant (stack_pointer_rtx, -(count * 8)));
13674 RTX_FRAME_RELATED_P (tmp) = 1;
13675 XVECEXP (dwarf, 0, 0) = tmp;
13677 tmp = gen_rtx_SET (VOIDmode,
13678 gen_frame_mem (DFmode, stack_pointer_rtx),
13680 RTX_FRAME_RELATED_P (tmp) = 1;
13681 XVECEXP (dwarf, 0, 1) = tmp;
13683 for (i = 1; i < count; i++)
13685 reg = gen_rtx_REG (DFmode, base_reg);
13687 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13689 tmp = gen_rtx_SET (VOIDmode,
13690 gen_frame_mem (DFmode,
13691 plus_constant (stack_pointer_rtx,
13694 RTX_FRAME_RELATED_P (tmp) = 1;
13695 XVECEXP (dwarf, 0, i + 1) = tmp;
13698 par = emit_insn (par);
13699 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13700 RTX_FRAME_RELATED_P (par) = 1;
13705 /* Emit a call instruction with pattern PAT. ADDR is the address of
13706 the call target. */
13709 arm_emit_call_insn (rtx pat, rtx addr)
13713 insn = emit_call_insn (pat);
13715 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13716 If the call might use such an entry, add a use of the PIC register
13717 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13718 if (TARGET_VXWORKS_RTP
13720 && GET_CODE (addr) == SYMBOL_REF
13721 && (SYMBOL_REF_DECL (addr)
13722 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13723 : !SYMBOL_REF_LOCAL_P (addr)))
13725 require_pic_register ();
13726 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13730 /* Output a 'call' insn. */
13732 output_call (rtx *operands)
13734 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13736 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13737 if (REGNO (operands[0]) == LR_REGNUM)
13739 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13740 output_asm_insn ("mov%?\t%0, %|lr", operands);
13743 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13745 if (TARGET_INTERWORK || arm_arch4t)
13746 output_asm_insn ("bx%?\t%0", operands);
13748 output_asm_insn ("mov%?\t%|pc, %0", operands);
13753 /* Output a 'call' insn that is a reference in memory. This is
13754 disabled for ARMv5 and we prefer a blx instead because otherwise
13755 there's a significant performance overhead. */
13757 output_call_mem (rtx *operands)
13759 gcc_assert (!arm_arch5);
13760 if (TARGET_INTERWORK)
13762 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13763 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13764 output_asm_insn ("bx%?\t%|ip", operands);
13766 else if (regno_use_in (LR_REGNUM, operands[0]))
13768 /* LR is used in the memory address. We load the address in the
13769 first instruction. It's safe to use IP as the target of the
13770 load since the call will kill it anyway. */
13771 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13772 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13774 output_asm_insn ("bx%?\t%|ip", operands);
13776 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13780 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13781 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13788 /* Output a move from arm registers to an fpa registers.
13789 OPERANDS[0] is an fpa register.
13790 OPERANDS[1] is the first registers of an arm register pair. */
13792 output_mov_long_double_fpa_from_arm (rtx *operands)
13794 int arm_reg0 = REGNO (operands[1]);
13797 gcc_assert (arm_reg0 != IP_REGNUM);
13799 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13800 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13801 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13803 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13804 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13809 /* Output a move from an fpa register to arm registers.
13810 OPERANDS[0] is the first registers of an arm register pair.
13811 OPERANDS[1] is an fpa register. */
13813 output_mov_long_double_arm_from_fpa (rtx *operands)
13815 int arm_reg0 = REGNO (operands[0]);
13818 gcc_assert (arm_reg0 != IP_REGNUM);
13820 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13821 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13822 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13824 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13825 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13829 /* Output a move from arm registers to arm registers of a long double
13830 OPERANDS[0] is the destination.
13831 OPERANDS[1] is the source. */
13833 output_mov_long_double_arm_from_arm (rtx *operands)
13835 /* We have to be careful here because the two might overlap. */
13836 int dest_start = REGNO (operands[0]);
13837 int src_start = REGNO (operands[1]);
13841 if (dest_start < src_start)
13843 for (i = 0; i < 3; i++)
13845 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13846 ops[1] = gen_rtx_REG (SImode, src_start + i);
13847 output_asm_insn ("mov%?\t%0, %1", ops);
13852 for (i = 2; i >= 0; i--)
13854 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13855 ops[1] = gen_rtx_REG (SImode, src_start + i);
13856 output_asm_insn ("mov%?\t%0, %1", ops);
13864 arm_emit_movpair (rtx dest, rtx src)
13866 /* If the src is an immediate, simplify it. */
13867 if (CONST_INT_P (src))
13869 HOST_WIDE_INT val = INTVAL (src);
13870 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13871 if ((val >> 16) & 0x0000ffff)
13872 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13874 GEN_INT ((val >> 16) & 0x0000ffff));
13877 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13878 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13881 /* Output a move from arm registers to an fpa registers.
13882 OPERANDS[0] is an fpa register.
13883 OPERANDS[1] is the first registers of an arm register pair. */
13885 output_mov_double_fpa_from_arm (rtx *operands)
13887 int arm_reg0 = REGNO (operands[1]);
13890 gcc_assert (arm_reg0 != IP_REGNUM);
13892 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13893 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13894 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13895 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13899 /* Output a move from an fpa register to arm registers.
13900 OPERANDS[0] is the first registers of an arm register pair.
13901 OPERANDS[1] is an fpa register. */
13903 output_mov_double_arm_from_fpa (rtx *operands)
13905 int arm_reg0 = REGNO (operands[0]);
13908 gcc_assert (arm_reg0 != IP_REGNUM);
13910 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13911 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13912 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13913 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13917 /* Output a move between double words. It must be REG<-MEM
13920 output_move_double (rtx *operands, bool emit, int *count)
13922 enum rtx_code code0 = GET_CODE (operands[0]);
13923 enum rtx_code code1 = GET_CODE (operands[1]);
13928 /* The only case when this might happen is when
13929 you are looking at the length of a DImode instruction
13930 that has an invalid constant in it. */
13931 if (code0 == REG && code1 != MEM)
13933 gcc_assert (!emit);
13940 unsigned int reg0 = REGNO (operands[0]);
13942 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13944 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13946 switch (GET_CODE (XEXP (operands[1], 0)))
13953 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13954 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13956 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13961 gcc_assert (TARGET_LDRD);
13963 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13970 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13972 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13980 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13982 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13987 gcc_assert (TARGET_LDRD);
13989 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13994 /* Autoicrement addressing modes should never have overlapping
13995 base and destination registers, and overlapping index registers
13996 are already prohibited, so this doesn't need to worry about
13998 otherops[0] = operands[0];
13999 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14000 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14002 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14004 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14006 /* Registers overlap so split out the increment. */
14009 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14010 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14017 /* Use a single insn if we can.
14018 FIXME: IWMMXT allows offsets larger than ldrd can
14019 handle, fix these up with a pair of ldr. */
14021 || GET_CODE (otherops[2]) != CONST_INT
14022 || (INTVAL (otherops[2]) > -256
14023 && INTVAL (otherops[2]) < 256))
14026 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14032 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14043 /* Use a single insn if we can.
14044 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14045 fix these up with a pair of ldr. */
14047 || GET_CODE (otherops[2]) != CONST_INT
14048 || (INTVAL (otherops[2]) > -256
14049 && INTVAL (otherops[2]) < 256))
14052 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14058 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14059 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14069 /* We might be able to use ldrd %0, %1 here. However the range is
14070 different to ldr/adr, and it is broken on some ARMv7-M
14071 implementations. */
14072 /* Use the second register of the pair to avoid problematic
14074 otherops[1] = operands[1];
14076 output_asm_insn ("adr%?\t%0, %1", otherops);
14077 operands[1] = otherops[0];
14081 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14083 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14090 /* ??? This needs checking for thumb2. */
14092 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14093 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14095 otherops[0] = operands[0];
14096 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14097 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14099 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14101 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14103 switch ((int) INTVAL (otherops[2]))
14107 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14113 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14119 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14123 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14124 operands[1] = otherops[0];
14126 && (GET_CODE (otherops[2]) == REG
14128 || (GET_CODE (otherops[2]) == CONST_INT
14129 && INTVAL (otherops[2]) > -256
14130 && INTVAL (otherops[2]) < 256)))
14132 if (reg_overlap_mentioned_p (operands[0],
14136 /* Swap base and index registers over to
14137 avoid a conflict. */
14139 otherops[1] = otherops[2];
14142 /* If both registers conflict, it will usually
14143 have been fixed by a splitter. */
14144 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14145 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14149 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14150 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14157 otherops[0] = operands[0];
14159 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14164 if (GET_CODE (otherops[2]) == CONST_INT)
14168 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14169 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14171 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14177 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14183 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14187 return "ldr%(d%)\t%0, [%1]";
14189 return "ldm%(ia%)\t%1, %M0";
14193 otherops[1] = adjust_address (operands[1], SImode, 4);
14194 /* Take care of overlapping base/data reg. */
14195 if (reg_mentioned_p (operands[0], operands[1]))
14199 output_asm_insn ("ldr%?\t%0, %1", otherops);
14200 output_asm_insn ("ldr%?\t%0, %1", operands);
14210 output_asm_insn ("ldr%?\t%0, %1", operands);
14211 output_asm_insn ("ldr%?\t%0, %1", otherops);
14221 /* Constraints should ensure this. */
14222 gcc_assert (code0 == MEM && code1 == REG);
14223 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14225 switch (GET_CODE (XEXP (operands[0], 0)))
14231 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14233 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14238 gcc_assert (TARGET_LDRD);
14240 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14247 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14249 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14257 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14259 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14264 gcc_assert (TARGET_LDRD);
14266 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14271 otherops[0] = operands[1];
14272 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14273 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14275 /* IWMMXT allows offsets larger than ldrd can handle,
14276 fix these up with a pair of ldr. */
14278 && GET_CODE (otherops[2]) == CONST_INT
14279 && (INTVAL(otherops[2]) <= -256
14280 || INTVAL(otherops[2]) >= 256))
14282 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14286 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14287 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14296 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14297 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14303 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14306 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14311 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14316 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14317 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14319 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14323 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14330 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14337 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14342 && (GET_CODE (otherops[2]) == REG
14344 || (GET_CODE (otherops[2]) == CONST_INT
14345 && INTVAL (otherops[2]) > -256
14346 && INTVAL (otherops[2]) < 256)))
14348 otherops[0] = operands[1];
14349 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14351 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14357 otherops[0] = adjust_address (operands[0], SImode, 4);
14358 otherops[1] = operands[1];
14361 output_asm_insn ("str%?\t%1, %0", operands);
14362 output_asm_insn ("str%?\t%H1, %0", otherops);
14372 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14373 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14376 output_move_quad (rtx *operands)
14378 if (REG_P (operands[0]))
14380 /* Load, or reg->reg move. */
14382 if (MEM_P (operands[1]))
14384 switch (GET_CODE (XEXP (operands[1], 0)))
14387 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14392 output_asm_insn ("adr%?\t%0, %1", operands);
14393 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14397 gcc_unreachable ();
14405 gcc_assert (REG_P (operands[1]));
14407 dest = REGNO (operands[0]);
14408 src = REGNO (operands[1]);
14410 /* This seems pretty dumb, but hopefully GCC won't try to do it
14413 for (i = 0; i < 4; i++)
14415 ops[0] = gen_rtx_REG (SImode, dest + i);
14416 ops[1] = gen_rtx_REG (SImode, src + i);
14417 output_asm_insn ("mov%?\t%0, %1", ops);
14420 for (i = 3; i >= 0; i--)
14422 ops[0] = gen_rtx_REG (SImode, dest + i);
14423 ops[1] = gen_rtx_REG (SImode, src + i);
14424 output_asm_insn ("mov%?\t%0, %1", ops);
14430 gcc_assert (MEM_P (operands[0]));
14431 gcc_assert (REG_P (operands[1]));
14432 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14434 switch (GET_CODE (XEXP (operands[0], 0)))
14437 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14441 gcc_unreachable ();
14448 /* Output a VFP load or store instruction. */
14451 output_move_vfp (rtx *operands)
14453 rtx reg, mem, addr, ops[2];
14454 int load = REG_P (operands[0]);
14455 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14456 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14459 enum machine_mode mode;
14461 reg = operands[!load];
14462 mem = operands[load];
14464 mode = GET_MODE (reg);
14466 gcc_assert (REG_P (reg));
14467 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14468 gcc_assert (mode == SFmode
14472 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14473 gcc_assert (MEM_P (mem));
14475 addr = XEXP (mem, 0);
14477 switch (GET_CODE (addr))
14480 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14481 ops[0] = XEXP (addr, 0);
14486 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14487 ops[0] = XEXP (addr, 0);
14492 templ = "f%s%c%%?\t%%%s0, %%1%s";
14498 sprintf (buff, templ,
14499 load ? "ld" : "st",
14502 integer_p ? "\t%@ int" : "");
14503 output_asm_insn (buff, ops);
14508 /* Output a Neon quad-word load or store, or a load or store for
14509 larger structure modes.
14511 WARNING: The ordering of elements is weird in big-endian mode,
14512 because we use VSTM, as required by the EABI. GCC RTL defines
14513 element ordering based on in-memory order. This can be differ
14514 from the architectural ordering of elements within a NEON register.
14515 The intrinsics defined in arm_neon.h use the NEON register element
14516 ordering, not the GCC RTL element ordering.
14518 For example, the in-memory ordering of a big-endian a quadword
14519 vector with 16-bit elements when stored from register pair {d0,d1}
14520 will be (lowest address first, d0[N] is NEON register element N):
14522 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14524 When necessary, quadword registers (dN, dN+1) are moved to ARM
14525 registers from rN in the order:
14527 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14529 So that STM/LDM can be used on vectors in ARM registers, and the
14530 same memory layout will result as if VSTM/VLDM were used. */
14533 output_move_neon (rtx *operands)
14535 rtx reg, mem, addr, ops[2];
14536 int regno, load = REG_P (operands[0]);
14539 enum machine_mode mode;
14541 reg = operands[!load];
14542 mem = operands[load];
14544 mode = GET_MODE (reg);
14546 gcc_assert (REG_P (reg));
14547 regno = REGNO (reg);
14548 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14549 || NEON_REGNO_OK_FOR_QUAD (regno));
14550 gcc_assert (VALID_NEON_DREG_MODE (mode)
14551 || VALID_NEON_QREG_MODE (mode)
14552 || VALID_NEON_STRUCT_MODE (mode));
14553 gcc_assert (MEM_P (mem));
14555 addr = XEXP (mem, 0);
14557 /* Strip off const from addresses like (const (plus (...))). */
14558 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14559 addr = XEXP (addr, 0);
14561 switch (GET_CODE (addr))
14564 templ = "v%smia%%?\t%%0!, %%h1";
14565 ops[0] = XEXP (addr, 0);
14570 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14571 templ = "v%smdb%%?\t%%0!, %%h1";
14572 ops[0] = XEXP (addr, 0);
14577 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14578 gcc_unreachable ();
14583 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14586 for (i = 0; i < nregs; i++)
14588 /* We're only using DImode here because it's a convenient size. */
14589 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14590 ops[1] = adjust_address (mem, DImode, 8 * i);
14591 if (reg_overlap_mentioned_p (ops[0], mem))
14593 gcc_assert (overlap == -1);
14598 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14599 output_asm_insn (buff, ops);
14604 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14605 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14606 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14607 output_asm_insn (buff, ops);
14614 templ = "v%smia%%?\t%%m0, %%h1";
14619 sprintf (buff, templ, load ? "ld" : "st");
14620 output_asm_insn (buff, ops);
14625 /* Compute and return the length of neon_mov<mode>, where <mode> is
14626 one of VSTRUCT modes: EI, OI, CI or XI. */
14628 arm_attr_length_move_neon (rtx insn)
14630 rtx reg, mem, addr;
14632 enum machine_mode mode;
14634 extract_insn_cached (insn);
14636 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14638 mode = GET_MODE (recog_data.operand[0]);
14649 gcc_unreachable ();
14653 load = REG_P (recog_data.operand[0]);
14654 reg = recog_data.operand[!load];
14655 mem = recog_data.operand[load];
14657 gcc_assert (MEM_P (mem));
14659 mode = GET_MODE (reg);
14660 addr = XEXP (mem, 0);
14662 /* Strip off const from addresses like (const (plus (...))). */
14663 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14664 addr = XEXP (addr, 0);
14666 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14668 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14675 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14679 arm_address_offset_is_imm (rtx insn)
14683 extract_insn_cached (insn);
14685 if (REG_P (recog_data.operand[0]))
14688 mem = recog_data.operand[0];
14690 gcc_assert (MEM_P (mem));
14692 addr = XEXP (mem, 0);
14694 if (GET_CODE (addr) == REG
14695 || (GET_CODE (addr) == PLUS
14696 && GET_CODE (XEXP (addr, 0)) == REG
14697 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14703 /* Output an ADD r, s, #n where n may be too big for one instruction.
14704 If adding zero to one register, output nothing. */
14706 output_add_immediate (rtx *operands)
14708 HOST_WIDE_INT n = INTVAL (operands[2]);
14710 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14713 output_multi_immediate (operands,
14714 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14717 output_multi_immediate (operands,
14718 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14725 /* Output a multiple immediate operation.
14726 OPERANDS is the vector of operands referred to in the output patterns.
14727 INSTR1 is the output pattern to use for the first constant.
14728 INSTR2 is the output pattern to use for subsequent constants.
14729 IMMED_OP is the index of the constant slot in OPERANDS.
14730 N is the constant value. */
14731 static const char *
14732 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14733 int immed_op, HOST_WIDE_INT n)
14735 #if HOST_BITS_PER_WIDE_INT > 32
14741 /* Quick and easy output. */
14742 operands[immed_op] = const0_rtx;
14743 output_asm_insn (instr1, operands);
14748 const char * instr = instr1;
14750 /* Note that n is never zero here (which would give no output). */
14751 for (i = 0; i < 32; i += 2)
14755 operands[immed_op] = GEN_INT (n & (255 << i));
14756 output_asm_insn (instr, operands);
14766 /* Return the name of a shifter operation. */
14767 static const char *
14768 arm_shift_nmem(enum rtx_code code)
14773 return ARM_LSL_NAME;
14789 /* Return the appropriate ARM instruction for the operation code.
14790 The returned result should not be overwritten. OP is the rtx of the
14791 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14794 arithmetic_instr (rtx op, int shift_first_arg)
14796 switch (GET_CODE (op))
14802 return shift_first_arg ? "rsb" : "sub";
14817 return arm_shift_nmem(GET_CODE(op));
14820 gcc_unreachable ();
14824 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14825 for the operation code. The returned result should not be overwritten.
14826 OP is the rtx code of the shift.
14827 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14829 static const char *
14830 shift_op (rtx op, HOST_WIDE_INT *amountp)
14833 enum rtx_code code = GET_CODE (op);
14835 switch (GET_CODE (XEXP (op, 1)))
14843 *amountp = INTVAL (XEXP (op, 1));
14847 gcc_unreachable ();
14853 gcc_assert (*amountp != -1);
14854 *amountp = 32 - *amountp;
14857 /* Fall through. */
14863 mnem = arm_shift_nmem(code);
14867 /* We never have to worry about the amount being other than a
14868 power of 2, since this case can never be reloaded from a reg. */
14869 gcc_assert (*amountp != -1);
14870 *amountp = int_log2 (*amountp);
14871 return ARM_LSL_NAME;
14874 gcc_unreachable ();
14877 if (*amountp != -1)
14879 /* This is not 100% correct, but follows from the desire to merge
14880 multiplication by a power of 2 with the recognizer for a
14881 shift. >=32 is not a valid shift for "lsl", so we must try and
14882 output a shift that produces the correct arithmetical result.
14883 Using lsr #32 is identical except for the fact that the carry bit
14884 is not set correctly if we set the flags; but we never use the
14885 carry bit from such an operation, so we can ignore that. */
14886 if (code == ROTATERT)
14887 /* Rotate is just modulo 32. */
14889 else if (*amountp != (*amountp & 31))
14891 if (code == ASHIFT)
14896 /* Shifts of 0 are no-ops. */
14904 /* Obtain the shift from the POWER of two. */
14906 static HOST_WIDE_INT
14907 int_log2 (HOST_WIDE_INT power)
14909 HOST_WIDE_INT shift = 0;
14911 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14913 gcc_assert (shift <= 31);
14920 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14921 because /bin/as is horribly restrictive. The judgement about
14922 whether or not each character is 'printable' (and can be output as
14923 is) or not (and must be printed with an octal escape) must be made
14924 with reference to the *host* character set -- the situation is
14925 similar to that discussed in the comments above pp_c_char in
14926 c-pretty-print.c. */
14928 #define MAX_ASCII_LEN 51
14931 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14934 int len_so_far = 0;
14936 fputs ("\t.ascii\t\"", stream);
14938 for (i = 0; i < len; i++)
14942 if (len_so_far >= MAX_ASCII_LEN)
14944 fputs ("\"\n\t.ascii\t\"", stream);
14950 if (c == '\\' || c == '\"')
14952 putc ('\\', stream);
14960 fprintf (stream, "\\%03o", c);
14965 fputs ("\"\n", stream);
14968 /* Compute the register save mask for registers 0 through 12
14969 inclusive. This code is used by arm_compute_save_reg_mask. */
14971 static unsigned long
14972 arm_compute_save_reg0_reg12_mask (void)
14974 unsigned long func_type = arm_current_func_type ();
14975 unsigned long save_reg_mask = 0;
14978 if (IS_INTERRUPT (func_type))
14980 unsigned int max_reg;
14981 /* Interrupt functions must not corrupt any registers,
14982 even call clobbered ones. If this is a leaf function
14983 we can just examine the registers used by the RTL, but
14984 otherwise we have to assume that whatever function is
14985 called might clobber anything, and so we have to save
14986 all the call-clobbered registers as well. */
14987 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14988 /* FIQ handlers have registers r8 - r12 banked, so
14989 we only need to check r0 - r7, Normal ISRs only
14990 bank r14 and r15, so we must check up to r12.
14991 r13 is the stack pointer which is always preserved,
14992 so we do not need to consider it here. */
14997 for (reg = 0; reg <= max_reg; reg++)
14998 if (df_regs_ever_live_p (reg)
14999 || (! current_function_is_leaf && call_used_regs[reg]))
15000 save_reg_mask |= (1 << reg);
15002 /* Also save the pic base register if necessary. */
15004 && !TARGET_SINGLE_PIC_BASE
15005 && arm_pic_register != INVALID_REGNUM
15006 && crtl->uses_pic_offset_table)
15007 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15009 else if (IS_VOLATILE(func_type))
15011 /* For noreturn functions we historically omitted register saves
15012 altogether. However this really messes up debugging. As a
15013 compromise save just the frame pointers. Combined with the link
15014 register saved elsewhere this should be sufficient to get
15016 if (frame_pointer_needed)
15017 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15018 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15019 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15020 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15021 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15025 /* In the normal case we only need to save those registers
15026 which are call saved and which are used by this function. */
15027 for (reg = 0; reg <= 11; reg++)
15028 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15029 save_reg_mask |= (1 << reg);
15031 /* Handle the frame pointer as a special case. */
15032 if (frame_pointer_needed)
15033 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15035 /* If we aren't loading the PIC register,
15036 don't stack it even though it may be live. */
15038 && !TARGET_SINGLE_PIC_BASE
15039 && arm_pic_register != INVALID_REGNUM
15040 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15041 || crtl->uses_pic_offset_table))
15042 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15044 /* The prologue will copy SP into R0, so save it. */
15045 if (IS_STACKALIGN (func_type))
15046 save_reg_mask |= 1;
15049 /* Save registers so the exception handler can modify them. */
15050 if (crtl->calls_eh_return)
15056 reg = EH_RETURN_DATA_REGNO (i);
15057 if (reg == INVALID_REGNUM)
15059 save_reg_mask |= 1 << reg;
15063 return save_reg_mask;
15067 /* Compute the number of bytes used to store the static chain register on the
15068 stack, above the stack frame. We need to know this accurately to get the
15069 alignment of the rest of the stack frame correct. */
15071 static int arm_compute_static_chain_stack_bytes (void)
15073 unsigned long func_type = arm_current_func_type ();
15074 int static_chain_stack_bytes = 0;
15076 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15077 IS_NESTED (func_type) &&
15078 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15079 static_chain_stack_bytes = 4;
15081 return static_chain_stack_bytes;
15085 /* Compute a bit mask of which registers need to be
15086 saved on the stack for the current function.
15087 This is used by arm_get_frame_offsets, which may add extra registers. */
15089 static unsigned long
15090 arm_compute_save_reg_mask (void)
15092 unsigned int save_reg_mask = 0;
15093 unsigned long func_type = arm_current_func_type ();
15096 if (IS_NAKED (func_type))
15097 /* This should never really happen. */
15100 /* If we are creating a stack frame, then we must save the frame pointer,
15101 IP (which will hold the old stack pointer), LR and the PC. */
15102 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15104 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15107 | (1 << PC_REGNUM);
15109 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15111 /* Decide if we need to save the link register.
15112 Interrupt routines have their own banked link register,
15113 so they never need to save it.
15114 Otherwise if we do not use the link register we do not need to save
15115 it. If we are pushing other registers onto the stack however, we
15116 can save an instruction in the epilogue by pushing the link register
15117 now and then popping it back into the PC. This incurs extra memory
15118 accesses though, so we only do it when optimizing for size, and only
15119 if we know that we will not need a fancy return sequence. */
15120 if (df_regs_ever_live_p (LR_REGNUM)
15123 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15124 && !crtl->calls_eh_return))
15125 save_reg_mask |= 1 << LR_REGNUM;
15127 if (cfun->machine->lr_save_eliminated)
15128 save_reg_mask &= ~ (1 << LR_REGNUM);
15130 if (TARGET_REALLY_IWMMXT
15131 && ((bit_count (save_reg_mask)
15132 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15133 arm_compute_static_chain_stack_bytes())
15136 /* The total number of registers that are going to be pushed
15137 onto the stack is odd. We need to ensure that the stack
15138 is 64-bit aligned before we start to save iWMMXt registers,
15139 and also before we start to create locals. (A local variable
15140 might be a double or long long which we will load/store using
15141 an iWMMXt instruction). Therefore we need to push another
15142 ARM register, so that the stack will be 64-bit aligned. We
15143 try to avoid using the arg registers (r0 -r3) as they might be
15144 used to pass values in a tail call. */
15145 for (reg = 4; reg <= 12; reg++)
15146 if ((save_reg_mask & (1 << reg)) == 0)
15150 save_reg_mask |= (1 << reg);
15153 cfun->machine->sibcall_blocked = 1;
15154 save_reg_mask |= (1 << 3);
15158 /* We may need to push an additional register for use initializing the
15159 PIC base register. */
15160 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15161 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15163 reg = thumb_find_work_register (1 << 4);
15164 if (!call_used_regs[reg])
15165 save_reg_mask |= (1 << reg);
15168 return save_reg_mask;
15172 /* Compute a bit mask of which registers need to be
15173 saved on the stack for the current function. */
15174 static unsigned long
15175 thumb1_compute_save_reg_mask (void)
15177 unsigned long mask;
15181 for (reg = 0; reg < 12; reg ++)
15182 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15186 && !TARGET_SINGLE_PIC_BASE
15187 && arm_pic_register != INVALID_REGNUM
15188 && crtl->uses_pic_offset_table)
15189 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15191 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15192 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15193 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15195 /* LR will also be pushed if any lo regs are pushed. */
15196 if (mask & 0xff || thumb_force_lr_save ())
15197 mask |= (1 << LR_REGNUM);
15199 /* Make sure we have a low work register if we need one.
15200 We will need one if we are going to push a high register,
15201 but we are not currently intending to push a low register. */
15202 if ((mask & 0xff) == 0
15203 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15205 /* Use thumb_find_work_register to choose which register
15206 we will use. If the register is live then we will
15207 have to push it. Use LAST_LO_REGNUM as our fallback
15208 choice for the register to select. */
15209 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15210 /* Make sure the register returned by thumb_find_work_register is
15211 not part of the return value. */
15212 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15213 reg = LAST_LO_REGNUM;
15215 if (! call_used_regs[reg])
15219 /* The 504 below is 8 bytes less than 512 because there are two possible
15220 alignment words. We can't tell here if they will be present or not so we
15221 have to play it safe and assume that they are. */
15222 if ((CALLER_INTERWORKING_SLOT_SIZE +
15223 ROUND_UP_WORD (get_frame_size ()) +
15224 crtl->outgoing_args_size) >= 504)
15226 /* This is the same as the code in thumb1_expand_prologue() which
15227 determines which register to use for stack decrement. */
15228 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15229 if (mask & (1 << reg))
15232 if (reg > LAST_LO_REGNUM)
15234 /* Make sure we have a register available for stack decrement. */
15235 mask |= 1 << LAST_LO_REGNUM;
15243 /* Return the number of bytes required to save VFP registers. */
15245 arm_get_vfp_saved_size (void)
15247 unsigned int regno;
15252 /* Space for saved VFP registers. */
15253 if (TARGET_HARD_FLOAT && TARGET_VFP)
15256 for (regno = FIRST_VFP_REGNUM;
15257 regno < LAST_VFP_REGNUM;
15260 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15261 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15265 /* Workaround ARM10 VFPr1 bug. */
15266 if (count == 2 && !arm_arch6)
15268 saved += count * 8;
15277 if (count == 2 && !arm_arch6)
15279 saved += count * 8;
15286 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15287 everything bar the final return instruction. */
15289 output_return_instruction (rtx operand, int really_return, int reverse)
15291 char conditional[10];
15294 unsigned long live_regs_mask;
15295 unsigned long func_type;
15296 arm_stack_offsets *offsets;
15298 func_type = arm_current_func_type ();
15300 if (IS_NAKED (func_type))
15303 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15305 /* If this function was declared non-returning, and we have
15306 found a tail call, then we have to trust that the called
15307 function won't return. */
15312 /* Otherwise, trap an attempted return by aborting. */
15314 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15316 assemble_external_libcall (ops[1]);
15317 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15323 gcc_assert (!cfun->calls_alloca || really_return);
15325 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15327 cfun->machine->return_used_this_function = 1;
15329 offsets = arm_get_frame_offsets ();
15330 live_regs_mask = offsets->saved_regs_mask;
15332 if (live_regs_mask)
15334 const char * return_reg;
15336 /* If we do not have any special requirements for function exit
15337 (e.g. interworking) then we can load the return address
15338 directly into the PC. Otherwise we must load it into LR. */
15340 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15341 return_reg = reg_names[PC_REGNUM];
15343 return_reg = reg_names[LR_REGNUM];
15345 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15347 /* There are three possible reasons for the IP register
15348 being saved. 1) a stack frame was created, in which case
15349 IP contains the old stack pointer, or 2) an ISR routine
15350 corrupted it, or 3) it was saved to align the stack on
15351 iWMMXt. In case 1, restore IP into SP, otherwise just
15353 if (frame_pointer_needed)
15355 live_regs_mask &= ~ (1 << IP_REGNUM);
15356 live_regs_mask |= (1 << SP_REGNUM);
15359 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15362 /* On some ARM architectures it is faster to use LDR rather than
15363 LDM to load a single register. On other architectures, the
15364 cost is the same. In 26 bit mode, or for exception handlers,
15365 we have to use LDM to load the PC so that the CPSR is also
15367 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15368 if (live_regs_mask == (1U << reg))
15371 if (reg <= LAST_ARM_REGNUM
15372 && (reg != LR_REGNUM
15374 || ! IS_INTERRUPT (func_type)))
15376 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15377 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15384 /* Generate the load multiple instruction to restore the
15385 registers. Note we can get here, even if
15386 frame_pointer_needed is true, but only if sp already
15387 points to the base of the saved core registers. */
15388 if (live_regs_mask & (1 << SP_REGNUM))
15390 unsigned HOST_WIDE_INT stack_adjust;
15392 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15393 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15395 if (stack_adjust && arm_arch5 && TARGET_ARM)
15396 if (TARGET_UNIFIED_ASM)
15397 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15399 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15402 /* If we can't use ldmib (SA110 bug),
15403 then try to pop r3 instead. */
15405 live_regs_mask |= 1 << 3;
15407 if (TARGET_UNIFIED_ASM)
15408 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15410 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15414 if (TARGET_UNIFIED_ASM)
15415 sprintf (instr, "pop%s\t{", conditional);
15417 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15419 p = instr + strlen (instr);
15421 for (reg = 0; reg <= SP_REGNUM; reg++)
15422 if (live_regs_mask & (1 << reg))
15424 int l = strlen (reg_names[reg]);
15430 memcpy (p, ", ", 2);
15434 memcpy (p, "%|", 2);
15435 memcpy (p + 2, reg_names[reg], l);
15439 if (live_regs_mask & (1 << LR_REGNUM))
15441 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15442 /* If returning from an interrupt, restore the CPSR. */
15443 if (IS_INTERRUPT (func_type))
15450 output_asm_insn (instr, & operand);
15452 /* See if we need to generate an extra instruction to
15453 perform the actual function return. */
15455 && func_type != ARM_FT_INTERWORKED
15456 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15458 /* The return has already been handled
15459 by loading the LR into the PC. */
15466 switch ((int) ARM_FUNC_TYPE (func_type))
15470 /* ??? This is wrong for unified assembly syntax. */
15471 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15474 case ARM_FT_INTERWORKED:
15475 sprintf (instr, "bx%s\t%%|lr", conditional);
15478 case ARM_FT_EXCEPTION:
15479 /* ??? This is wrong for unified assembly syntax. */
15480 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15484 /* Use bx if it's available. */
15485 if (arm_arch5 || arm_arch4t)
15486 sprintf (instr, "bx%s\t%%|lr", conditional);
15488 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15492 output_asm_insn (instr, & operand);
15498 /* Write the function name into the code section, directly preceding
15499 the function prologue.
15501 Code will be output similar to this:
15503 .ascii "arm_poke_function_name", 0
15506 .word 0xff000000 + (t1 - t0)
15507 arm_poke_function_name
15509 stmfd sp!, {fp, ip, lr, pc}
15512 When performing a stack backtrace, code can inspect the value
15513 of 'pc' stored at 'fp' + 0. If the trace function then looks
15514 at location pc - 12 and the top 8 bits are set, then we know
15515 that there is a function name embedded immediately preceding this
15516 location and has length ((pc[-3]) & 0xff000000).
15518 We assume that pc is declared as a pointer to an unsigned long.
15520 It is of no benefit to output the function name if we are assembling
15521 a leaf function. These function types will not contain a stack
15522 backtrace structure, therefore it is not possible to determine the
15525 arm_poke_function_name (FILE *stream, const char *name)
15527 unsigned long alignlength;
15528 unsigned long length;
15531 length = strlen (name) + 1;
15532 alignlength = ROUND_UP_WORD (length);
15534 ASM_OUTPUT_ASCII (stream, name, length);
15535 ASM_OUTPUT_ALIGN (stream, 2);
15536 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15537 assemble_aligned_integer (UNITS_PER_WORD, x);
15540 /* Place some comments into the assembler stream
15541 describing the current function. */
15543 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15545 unsigned long func_type;
15547 /* ??? Do we want to print some of the below anyway? */
15551 /* Sanity check. */
15552 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15554 func_type = arm_current_func_type ();
15556 switch ((int) ARM_FUNC_TYPE (func_type))
15559 case ARM_FT_NORMAL:
15561 case ARM_FT_INTERWORKED:
15562 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15565 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15568 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15570 case ARM_FT_EXCEPTION:
15571 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15575 if (IS_NAKED (func_type))
15576 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15578 if (IS_VOLATILE (func_type))
15579 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15581 if (IS_NESTED (func_type))
15582 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15583 if (IS_STACKALIGN (func_type))
15584 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15586 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15588 crtl->args.pretend_args_size, frame_size);
15590 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15591 frame_pointer_needed,
15592 cfun->machine->uses_anonymous_args);
15594 if (cfun->machine->lr_save_eliminated)
15595 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15597 if (crtl->calls_eh_return)
15598 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15603 arm_output_epilogue (rtx sibling)
15606 unsigned long saved_regs_mask;
15607 unsigned long func_type;
15608 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15609 frame that is $fp + 4 for a non-variadic function. */
15610 int floats_offset = 0;
15612 FILE * f = asm_out_file;
15613 unsigned int lrm_count = 0;
15614 int really_return = (sibling == NULL);
15616 arm_stack_offsets *offsets;
15618 /* If we have already generated the return instruction
15619 then it is futile to generate anything else. */
15620 if (use_return_insn (FALSE, sibling) &&
15621 (cfun->machine->return_used_this_function != 0))
15624 func_type = arm_current_func_type ();
15626 if (IS_NAKED (func_type))
15627 /* Naked functions don't have epilogues. */
15630 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15634 /* A volatile function should never return. Call abort. */
15635 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15636 assemble_external_libcall (op);
15637 output_asm_insn ("bl\t%a0", &op);
15642 /* If we are throwing an exception, then we really must be doing a
15643 return, so we can't tail-call. */
15644 gcc_assert (!crtl->calls_eh_return || really_return);
15646 offsets = arm_get_frame_offsets ();
15647 saved_regs_mask = offsets->saved_regs_mask;
15650 lrm_count = bit_count (saved_regs_mask);
15652 floats_offset = offsets->saved_args;
15653 /* Compute how far away the floats will be. */
15654 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15655 if (saved_regs_mask & (1 << reg))
15656 floats_offset += 4;
15658 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15660 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15661 int vfp_offset = offsets->frame;
15663 if (TARGET_FPA_EMU2)
15665 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15666 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15668 floats_offset += 12;
15669 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15670 reg, FP_REGNUM, floats_offset - vfp_offset);
15675 start_reg = LAST_FPA_REGNUM;
15677 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15679 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15681 floats_offset += 12;
15683 /* We can't unstack more than four registers at once. */
15684 if (start_reg - reg == 3)
15686 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15687 reg, FP_REGNUM, floats_offset - vfp_offset);
15688 start_reg = reg - 1;
15693 if (reg != start_reg)
15694 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15695 reg + 1, start_reg - reg,
15696 FP_REGNUM, floats_offset - vfp_offset);
15697 start_reg = reg - 1;
15701 /* Just in case the last register checked also needs unstacking. */
15702 if (reg != start_reg)
15703 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15704 reg + 1, start_reg - reg,
15705 FP_REGNUM, floats_offset - vfp_offset);
15708 if (TARGET_HARD_FLOAT && TARGET_VFP)
15712 /* The fldmd insns do not have base+offset addressing
15713 modes, so we use IP to hold the address. */
15714 saved_size = arm_get_vfp_saved_size ();
15716 if (saved_size > 0)
15718 floats_offset += saved_size;
15719 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15720 FP_REGNUM, floats_offset - vfp_offset);
15722 start_reg = FIRST_VFP_REGNUM;
15723 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15725 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15726 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15728 if (start_reg != reg)
15729 vfp_output_fldmd (f, IP_REGNUM,
15730 (start_reg - FIRST_VFP_REGNUM) / 2,
15731 (reg - start_reg) / 2);
15732 start_reg = reg + 2;
15735 if (start_reg != reg)
15736 vfp_output_fldmd (f, IP_REGNUM,
15737 (start_reg - FIRST_VFP_REGNUM) / 2,
15738 (reg - start_reg) / 2);
15743 /* The frame pointer is guaranteed to be non-double-word aligned.
15744 This is because it is set to (old_stack_pointer - 4) and the
15745 old_stack_pointer was double word aligned. Thus the offset to
15746 the iWMMXt registers to be loaded must also be non-double-word
15747 sized, so that the resultant address *is* double-word aligned.
15748 We can ignore floats_offset since that was already included in
15749 the live_regs_mask. */
15750 lrm_count += (lrm_count % 2 ? 2 : 1);
15752 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15753 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15755 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15756 reg, FP_REGNUM, lrm_count * 4);
15761 /* saved_regs_mask should contain the IP, which at the time of stack
15762 frame generation actually contains the old stack pointer. So a
15763 quick way to unwind the stack is just pop the IP register directly
15764 into the stack pointer. */
15765 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15766 saved_regs_mask &= ~ (1 << IP_REGNUM);
15767 saved_regs_mask |= (1 << SP_REGNUM);
15769 /* There are two registers left in saved_regs_mask - LR and PC. We
15770 only need to restore the LR register (the return address), but to
15771 save time we can load it directly into the PC, unless we need a
15772 special function exit sequence, or we are not really returning. */
15774 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15775 && !crtl->calls_eh_return)
15776 /* Delete the LR from the register mask, so that the LR on
15777 the stack is loaded into the PC in the register mask. */
15778 saved_regs_mask &= ~ (1 << LR_REGNUM);
15780 saved_regs_mask &= ~ (1 << PC_REGNUM);
15782 /* We must use SP as the base register, because SP is one of the
15783 registers being restored. If an interrupt or page fault
15784 happens in the ldm instruction, the SP might or might not
15785 have been restored. That would be bad, as then SP will no
15786 longer indicate the safe area of stack, and we can get stack
15787 corruption. Using SP as the base register means that it will
15788 be reset correctly to the original value, should an interrupt
15789 occur. If the stack pointer already points at the right
15790 place, then omit the subtraction. */
15791 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15792 || cfun->calls_alloca)
15793 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15794 4 * bit_count (saved_regs_mask));
15795 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15797 if (IS_INTERRUPT (func_type))
15798 /* Interrupt handlers will have pushed the
15799 IP onto the stack, so restore it now. */
15800 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15804 /* This branch is executed for ARM mode (non-apcs frames) and
15805 Thumb-2 mode. Frame layout is essentially the same for those
15806 cases, except that in ARM mode frame pointer points to the
15807 first saved register, while in Thumb-2 mode the frame pointer points
15808 to the last saved register.
15810 It is possible to make frame pointer point to last saved
15811 register in both cases, and remove some conditionals below.
15812 That means that fp setup in prologue would be just "mov fp, sp"
15813 and sp restore in epilogue would be just "mov sp, fp", whereas
15814 now we have to use add/sub in those cases. However, the value
15815 of that would be marginal, as both mov and add/sub are 32-bit
15816 in ARM mode, and it would require extra conditionals
15817 in arm_expand_prologue to distingish ARM-apcs-frame case
15818 (where frame pointer is required to point at first register)
15819 and ARM-non-apcs-frame. Therefore, such change is postponed
15820 until real need arise. */
15821 unsigned HOST_WIDE_INT amount;
15823 /* Restore stack pointer if necessary. */
15824 if (TARGET_ARM && frame_pointer_needed)
15826 operands[0] = stack_pointer_rtx;
15827 operands[1] = hard_frame_pointer_rtx;
15829 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15830 output_add_immediate (operands);
15834 if (frame_pointer_needed)
15836 /* For Thumb-2 restore sp from the frame pointer.
15837 Operand restrictions mean we have to incrememnt FP, then copy
15839 amount = offsets->locals_base - offsets->saved_regs;
15840 operands[0] = hard_frame_pointer_rtx;
15844 unsigned long count;
15845 operands[0] = stack_pointer_rtx;
15846 amount = offsets->outgoing_args - offsets->saved_regs;
15847 /* pop call clobbered registers if it avoids a
15848 separate stack adjustment. */
15849 count = offsets->saved_regs - offsets->saved_args;
15852 && !crtl->calls_eh_return
15853 && bit_count(saved_regs_mask) * 4 == count
15854 && !IS_INTERRUPT (func_type)
15855 && !IS_STACKALIGN (func_type)
15856 && !crtl->tail_call_emit)
15858 unsigned long mask;
15859 /* Preserve return values, of any size. */
15860 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15862 mask &= ~saved_regs_mask;
15864 while (bit_count (mask) * 4 > amount)
15866 while ((mask & (1 << reg)) == 0)
15868 mask &= ~(1 << reg);
15870 if (bit_count (mask) * 4 == amount) {
15872 saved_regs_mask |= mask;
15879 operands[1] = operands[0];
15880 operands[2] = GEN_INT (amount);
15881 output_add_immediate (operands);
15883 if (frame_pointer_needed)
15884 asm_fprintf (f, "\tmov\t%r, %r\n",
15885 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15888 if (TARGET_FPA_EMU2)
15890 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15891 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15892 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15897 start_reg = FIRST_FPA_REGNUM;
15899 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15901 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15903 if (reg - start_reg == 3)
15905 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15906 start_reg, SP_REGNUM);
15907 start_reg = reg + 1;
15912 if (reg != start_reg)
15913 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15914 start_reg, reg - start_reg,
15917 start_reg = reg + 1;
15921 /* Just in case the last register checked also needs unstacking. */
15922 if (reg != start_reg)
15923 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15924 start_reg, reg - start_reg, SP_REGNUM);
15927 if (TARGET_HARD_FLOAT && TARGET_VFP)
15929 int end_reg = LAST_VFP_REGNUM + 1;
15931 /* Scan the registers in reverse order. We need to match
15932 any groupings made in the prologue and generate matching
15934 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15936 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15937 && (!df_regs_ever_live_p (reg + 1)
15938 || call_used_regs[reg + 1]))
15940 if (end_reg > reg + 2)
15941 vfp_output_fldmd (f, SP_REGNUM,
15942 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15943 (end_reg - (reg + 2)) / 2);
15947 if (end_reg > reg + 2)
15948 vfp_output_fldmd (f, SP_REGNUM, 0,
15949 (end_reg - (reg + 2)) / 2);
15953 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15954 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15955 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15957 /* If we can, restore the LR into the PC. */
15958 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15959 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15960 && !IS_STACKALIGN (func_type)
15962 && crtl->args.pretend_args_size == 0
15963 && saved_regs_mask & (1 << LR_REGNUM)
15964 && !crtl->calls_eh_return)
15966 saved_regs_mask &= ~ (1 << LR_REGNUM);
15967 saved_regs_mask |= (1 << PC_REGNUM);
15968 rfe = IS_INTERRUPT (func_type);
15973 /* Load the registers off the stack. If we only have one register
15974 to load use the LDR instruction - it is faster. For Thumb-2
15975 always use pop and the assembler will pick the best instruction.*/
15976 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15977 && !IS_INTERRUPT(func_type))
15979 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15981 else if (saved_regs_mask)
15983 if (saved_regs_mask & (1 << SP_REGNUM))
15984 /* Note - write back to the stack register is not enabled
15985 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15986 in the list of registers and if we add writeback the
15987 instruction becomes UNPREDICTABLE. */
15988 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15990 else if (TARGET_ARM)
15991 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15994 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15997 if (crtl->args.pretend_args_size)
15999 /* Unwind the pre-pushed regs. */
16000 operands[0] = operands[1] = stack_pointer_rtx;
16001 operands[2] = GEN_INT (crtl->args.pretend_args_size);
16002 output_add_immediate (operands);
16006 /* We may have already restored PC directly from the stack. */
16007 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16010 /* Stack adjustment for exception handler. */
16011 if (crtl->calls_eh_return)
16012 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16013 ARM_EH_STACKADJ_REGNUM);
16015 /* Generate the return instruction. */
16016 switch ((int) ARM_FUNC_TYPE (func_type))
16020 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16023 case ARM_FT_EXCEPTION:
16024 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16027 case ARM_FT_INTERWORKED:
16028 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16032 if (IS_STACKALIGN (func_type))
16034 /* See comment in arm_expand_prologue. */
16035 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16037 if (arm_arch5 || arm_arch4t)
16038 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16040 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16048 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16049 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16051 arm_stack_offsets *offsets;
16057 /* Emit any call-via-reg trampolines that are needed for v4t support
16058 of call_reg and call_value_reg type insns. */
16059 for (regno = 0; regno < LR_REGNUM; regno++)
16061 rtx label = cfun->machine->call_via[regno];
16065 switch_to_section (function_section (current_function_decl));
16066 targetm.asm_out.internal_label (asm_out_file, "L",
16067 CODE_LABEL_NUMBER (label));
16068 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16072 /* ??? Probably not safe to set this here, since it assumes that a
16073 function will be emitted as assembly immediately after we generate
16074 RTL for it. This does not happen for inline functions. */
16075 cfun->machine->return_used_this_function = 0;
16077 else /* TARGET_32BIT */
16079 /* We need to take into account any stack-frame rounding. */
16080 offsets = arm_get_frame_offsets ();
16082 gcc_assert (!use_return_insn (FALSE, NULL)
16083 || (cfun->machine->return_used_this_function != 0)
16084 || offsets->saved_regs == offsets->outgoing_args
16085 || frame_pointer_needed);
16087 /* Reset the ARM-specific per-function variables. */
16088 after_arm_reorg = 0;
16092 /* Generate and emit an insn that we will recognize as a push_multi.
16093 Unfortunately, since this insn does not reflect very well the actual
16094 semantics of the operation, we need to annotate the insn for the benefit
16095 of DWARF2 frame unwind information. */
16097 emit_multi_reg_push (unsigned long mask)
16100 int num_dwarf_regs;
16104 int dwarf_par_index;
16107 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16108 if (mask & (1 << i))
16111 gcc_assert (num_regs && num_regs <= 16);
16113 /* We don't record the PC in the dwarf frame information. */
16114 num_dwarf_regs = num_regs;
16115 if (mask & (1 << PC_REGNUM))
16118 /* For the body of the insn we are going to generate an UNSPEC in
16119 parallel with several USEs. This allows the insn to be recognized
16120 by the push_multi pattern in the arm.md file.
16122 The body of the insn looks something like this:
16125 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16126 (const_int:SI <num>)))
16127 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16133 For the frame note however, we try to be more explicit and actually
16134 show each register being stored into the stack frame, plus a (single)
16135 decrement of the stack pointer. We do it this way in order to be
16136 friendly to the stack unwinding code, which only wants to see a single
16137 stack decrement per instruction. The RTL we generate for the note looks
16138 something like this:
16141 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16142 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16143 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16144 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16148 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16149 instead we'd have a parallel expression detailing all
16150 the stores to the various memory addresses so that debug
16151 information is more up-to-date. Remember however while writing
16152 this to take care of the constraints with the push instruction.
16154 Note also that this has to be taken care of for the VFP registers.
16156 For more see PR43399. */
16158 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16159 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16160 dwarf_par_index = 1;
16162 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16164 if (mask & (1 << i))
16166 reg = gen_rtx_REG (SImode, i);
16168 XVECEXP (par, 0, 0)
16169 = gen_rtx_SET (VOIDmode,
16172 gen_rtx_PRE_MODIFY (Pmode,
16175 (stack_pointer_rtx,
16178 gen_rtx_UNSPEC (BLKmode,
16179 gen_rtvec (1, reg),
16180 UNSPEC_PUSH_MULT));
16182 if (i != PC_REGNUM)
16184 tmp = gen_rtx_SET (VOIDmode,
16185 gen_frame_mem (SImode, stack_pointer_rtx),
16187 RTX_FRAME_RELATED_P (tmp) = 1;
16188 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16196 for (j = 1, i++; j < num_regs; i++)
16198 if (mask & (1 << i))
16200 reg = gen_rtx_REG (SImode, i);
16202 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16204 if (i != PC_REGNUM)
16207 = gen_rtx_SET (VOIDmode,
16210 plus_constant (stack_pointer_rtx,
16213 RTX_FRAME_RELATED_P (tmp) = 1;
16214 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16221 par = emit_insn (par);
16223 tmp = gen_rtx_SET (VOIDmode,
16225 plus_constant (stack_pointer_rtx, -4 * num_regs));
16226 RTX_FRAME_RELATED_P (tmp) = 1;
16227 XVECEXP (dwarf, 0, 0) = tmp;
16229 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16234 /* Calculate the size of the return value that is passed in registers. */
16236 arm_size_return_regs (void)
16238 enum machine_mode mode;
16240 if (crtl->return_rtx != 0)
16241 mode = GET_MODE (crtl->return_rtx);
16243 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16245 return GET_MODE_SIZE (mode);
16249 emit_sfm (int base_reg, int count)
16256 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16257 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16259 reg = gen_rtx_REG (XFmode, base_reg++);
16261 XVECEXP (par, 0, 0)
16262 = gen_rtx_SET (VOIDmode,
16265 gen_rtx_PRE_MODIFY (Pmode,
16268 (stack_pointer_rtx,
16271 gen_rtx_UNSPEC (BLKmode,
16272 gen_rtvec (1, reg),
16273 UNSPEC_PUSH_MULT));
16274 tmp = gen_rtx_SET (VOIDmode,
16275 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16276 RTX_FRAME_RELATED_P (tmp) = 1;
16277 XVECEXP (dwarf, 0, 1) = tmp;
16279 for (i = 1; i < count; i++)
16281 reg = gen_rtx_REG (XFmode, base_reg++);
16282 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16284 tmp = gen_rtx_SET (VOIDmode,
16285 gen_frame_mem (XFmode,
16286 plus_constant (stack_pointer_rtx,
16289 RTX_FRAME_RELATED_P (tmp) = 1;
16290 XVECEXP (dwarf, 0, i + 1) = tmp;
16293 tmp = gen_rtx_SET (VOIDmode,
16295 plus_constant (stack_pointer_rtx, -12 * count));
16297 RTX_FRAME_RELATED_P (tmp) = 1;
16298 XVECEXP (dwarf, 0, 0) = tmp;
16300 par = emit_insn (par);
16301 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16307 /* Return true if the current function needs to save/restore LR. */
16310 thumb_force_lr_save (void)
16312 return !cfun->machine->lr_save_eliminated
16313 && (!leaf_function_p ()
16314 || thumb_far_jump_used_p ()
16315 || df_regs_ever_live_p (LR_REGNUM));
16319 /* Return true if r3 is used by any of the tail call insns in the
16320 current function. */
16323 any_sibcall_uses_r3 (void)
16328 if (!crtl->tail_call_emit)
16330 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16331 if (e->flags & EDGE_SIBCALL)
16333 rtx call = BB_END (e->src);
16334 if (!CALL_P (call))
16335 call = prev_nonnote_nondebug_insn (call);
16336 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16337 if (find_regno_fusage (call, USE, 3))
16344 /* Compute the distance from register FROM to register TO.
16345 These can be the arg pointer (26), the soft frame pointer (25),
16346 the stack pointer (13) or the hard frame pointer (11).
16347 In thumb mode r7 is used as the soft frame pointer, if needed.
16348 Typical stack layout looks like this:
16350 old stack pointer -> | |
16353 | | saved arguments for
16354 | | vararg functions
16357 hard FP & arg pointer -> | | \
16365 soft frame pointer -> | | /
16370 locals base pointer -> | | /
16375 current stack pointer -> | | /
16378 For a given function some or all of these stack components
16379 may not be needed, giving rise to the possibility of
16380 eliminating some of the registers.
16382 The values returned by this function must reflect the behavior
16383 of arm_expand_prologue() and arm_compute_save_reg_mask().
16385 The sign of the number returned reflects the direction of stack
16386 growth, so the values are positive for all eliminations except
16387 from the soft frame pointer to the hard frame pointer.
16389 SFP may point just inside the local variables block to ensure correct
16393 /* Calculate stack offsets. These are used to calculate register elimination
16394 offsets and in prologue/epilogue code. Also calculates which registers
16395 should be saved. */
16397 static arm_stack_offsets *
16398 arm_get_frame_offsets (void)
16400 struct arm_stack_offsets *offsets;
16401 unsigned long func_type;
16405 HOST_WIDE_INT frame_size;
16408 offsets = &cfun->machine->stack_offsets;
16410 /* We need to know if we are a leaf function. Unfortunately, it
16411 is possible to be called after start_sequence has been called,
16412 which causes get_insns to return the insns for the sequence,
16413 not the function, which will cause leaf_function_p to return
16414 the incorrect result.
16416 to know about leaf functions once reload has completed, and the
16417 frame size cannot be changed after that time, so we can safely
16418 use the cached value. */
16420 if (reload_completed)
16423 /* Initially this is the size of the local variables. It will translated
16424 into an offset once we have determined the size of preceding data. */
16425 frame_size = ROUND_UP_WORD (get_frame_size ());
16427 leaf = leaf_function_p ();
16429 /* Space for variadic functions. */
16430 offsets->saved_args = crtl->args.pretend_args_size;
16432 /* In Thumb mode this is incorrect, but never used. */
16433 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16434 arm_compute_static_chain_stack_bytes();
16438 unsigned int regno;
16440 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16441 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16442 saved = core_saved;
16444 /* We know that SP will be doubleword aligned on entry, and we must
16445 preserve that condition at any subroutine call. We also require the
16446 soft frame pointer to be doubleword aligned. */
16448 if (TARGET_REALLY_IWMMXT)
16450 /* Check for the call-saved iWMMXt registers. */
16451 for (regno = FIRST_IWMMXT_REGNUM;
16452 regno <= LAST_IWMMXT_REGNUM;
16454 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16458 func_type = arm_current_func_type ();
16459 if (! IS_VOLATILE (func_type))
16461 /* Space for saved FPA registers. */
16462 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16463 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16466 /* Space for saved VFP registers. */
16467 if (TARGET_HARD_FLOAT && TARGET_VFP)
16468 saved += arm_get_vfp_saved_size ();
16471 else /* TARGET_THUMB1 */
16473 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16474 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16475 saved = core_saved;
16476 if (TARGET_BACKTRACE)
16480 /* Saved registers include the stack frame. */
16481 offsets->saved_regs = offsets->saved_args + saved +
16482 arm_compute_static_chain_stack_bytes();
16483 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16484 /* A leaf function does not need any stack alignment if it has nothing
16486 if (leaf && frame_size == 0
16487 /* However if it calls alloca(), we have a dynamically allocated
16488 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16489 && ! cfun->calls_alloca)
16491 offsets->outgoing_args = offsets->soft_frame;
16492 offsets->locals_base = offsets->soft_frame;
16496 /* Ensure SFP has the correct alignment. */
16497 if (ARM_DOUBLEWORD_ALIGN
16498 && (offsets->soft_frame & 7))
16500 offsets->soft_frame += 4;
16501 /* Try to align stack by pushing an extra reg. Don't bother doing this
16502 when there is a stack frame as the alignment will be rolled into
16503 the normal stack adjustment. */
16504 if (frame_size + crtl->outgoing_args_size == 0)
16508 /* If it is safe to use r3, then do so. This sometimes
16509 generates better code on Thumb-2 by avoiding the need to
16510 use 32-bit push/pop instructions. */
16511 if (! any_sibcall_uses_r3 ()
16512 && arm_size_return_regs () <= 12
16513 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16518 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16520 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16529 offsets->saved_regs += 4;
16530 offsets->saved_regs_mask |= (1 << reg);
16535 offsets->locals_base = offsets->soft_frame + frame_size;
16536 offsets->outgoing_args = (offsets->locals_base
16537 + crtl->outgoing_args_size);
16539 if (ARM_DOUBLEWORD_ALIGN)
16541 /* Ensure SP remains doubleword aligned. */
16542 if (offsets->outgoing_args & 7)
16543 offsets->outgoing_args += 4;
16544 gcc_assert (!(offsets->outgoing_args & 7));
16551 /* Calculate the relative offsets for the different stack pointers. Positive
16552 offsets are in the direction of stack growth. */
16555 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16557 arm_stack_offsets *offsets;
16559 offsets = arm_get_frame_offsets ();
16561 /* OK, now we have enough information to compute the distances.
16562 There must be an entry in these switch tables for each pair
16563 of registers in ELIMINABLE_REGS, even if some of the entries
16564 seem to be redundant or useless. */
16567 case ARG_POINTER_REGNUM:
16570 case THUMB_HARD_FRAME_POINTER_REGNUM:
16573 case FRAME_POINTER_REGNUM:
16574 /* This is the reverse of the soft frame pointer
16575 to hard frame pointer elimination below. */
16576 return offsets->soft_frame - offsets->saved_args;
16578 case ARM_HARD_FRAME_POINTER_REGNUM:
16579 /* This is only non-zero in the case where the static chain register
16580 is stored above the frame. */
16581 return offsets->frame - offsets->saved_args - 4;
16583 case STACK_POINTER_REGNUM:
16584 /* If nothing has been pushed on the stack at all
16585 then this will return -4. This *is* correct! */
16586 return offsets->outgoing_args - (offsets->saved_args + 4);
16589 gcc_unreachable ();
16591 gcc_unreachable ();
16593 case FRAME_POINTER_REGNUM:
16596 case THUMB_HARD_FRAME_POINTER_REGNUM:
16599 case ARM_HARD_FRAME_POINTER_REGNUM:
16600 /* The hard frame pointer points to the top entry in the
16601 stack frame. The soft frame pointer to the bottom entry
16602 in the stack frame. If there is no stack frame at all,
16603 then they are identical. */
16605 return offsets->frame - offsets->soft_frame;
16607 case STACK_POINTER_REGNUM:
16608 return offsets->outgoing_args - offsets->soft_frame;
16611 gcc_unreachable ();
16613 gcc_unreachable ();
16616 /* You cannot eliminate from the stack pointer.
16617 In theory you could eliminate from the hard frame
16618 pointer to the stack pointer, but this will never
16619 happen, since if a stack frame is not needed the
16620 hard frame pointer will never be used. */
16621 gcc_unreachable ();
16625 /* Given FROM and TO register numbers, say whether this elimination is
16626 allowed. Frame pointer elimination is automatically handled.
16628 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16629 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16630 pointer, we must eliminate FRAME_POINTER_REGNUM into
16631 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16632 ARG_POINTER_REGNUM. */
16635 arm_can_eliminate (const int from, const int to)
16637 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16638 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16639 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16640 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16644 /* Emit RTL to save coprocessor registers on function entry. Returns the
16645 number of bytes pushed. */
16648 arm_save_coproc_regs(void)
16650 int saved_size = 0;
16652 unsigned start_reg;
16655 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16656 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16658 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16659 insn = gen_rtx_MEM (V2SImode, insn);
16660 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16661 RTX_FRAME_RELATED_P (insn) = 1;
16665 /* Save any floating point call-saved registers used by this
16667 if (TARGET_FPA_EMU2)
16669 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16670 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16672 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16673 insn = gen_rtx_MEM (XFmode, insn);
16674 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16675 RTX_FRAME_RELATED_P (insn) = 1;
16681 start_reg = LAST_FPA_REGNUM;
16683 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16685 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16687 if (start_reg - reg == 3)
16689 insn = emit_sfm (reg, 4);
16690 RTX_FRAME_RELATED_P (insn) = 1;
16692 start_reg = reg - 1;
16697 if (start_reg != reg)
16699 insn = emit_sfm (reg + 1, start_reg - reg);
16700 RTX_FRAME_RELATED_P (insn) = 1;
16701 saved_size += (start_reg - reg) * 12;
16703 start_reg = reg - 1;
16707 if (start_reg != reg)
16709 insn = emit_sfm (reg + 1, start_reg - reg);
16710 saved_size += (start_reg - reg) * 12;
16711 RTX_FRAME_RELATED_P (insn) = 1;
16714 if (TARGET_HARD_FLOAT && TARGET_VFP)
16716 start_reg = FIRST_VFP_REGNUM;
16718 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16720 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16721 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16723 if (start_reg != reg)
16724 saved_size += vfp_emit_fstmd (start_reg,
16725 (reg - start_reg) / 2);
16726 start_reg = reg + 2;
16729 if (start_reg != reg)
16730 saved_size += vfp_emit_fstmd (start_reg,
16731 (reg - start_reg) / 2);
16737 /* Set the Thumb frame pointer from the stack pointer. */
16740 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16742 HOST_WIDE_INT amount;
16745 amount = offsets->outgoing_args - offsets->locals_base;
16747 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16748 stack_pointer_rtx, GEN_INT (amount)));
16751 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16752 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16753 expects the first two operands to be the same. */
16756 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16758 hard_frame_pointer_rtx));
16762 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16763 hard_frame_pointer_rtx,
16764 stack_pointer_rtx));
16766 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16767 plus_constant (stack_pointer_rtx, amount));
16768 RTX_FRAME_RELATED_P (dwarf) = 1;
16769 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16772 RTX_FRAME_RELATED_P (insn) = 1;
16775 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16778 arm_expand_prologue (void)
16783 unsigned long live_regs_mask;
16784 unsigned long func_type;
16786 int saved_pretend_args = 0;
16787 int saved_regs = 0;
16788 unsigned HOST_WIDE_INT args_to_push;
16789 arm_stack_offsets *offsets;
16791 func_type = arm_current_func_type ();
16793 /* Naked functions don't have prologues. */
16794 if (IS_NAKED (func_type))
16797 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16798 args_to_push = crtl->args.pretend_args_size;
16800 /* Compute which register we will have to save onto the stack. */
16801 offsets = arm_get_frame_offsets ();
16802 live_regs_mask = offsets->saved_regs_mask;
16804 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16806 if (IS_STACKALIGN (func_type))
16810 /* Handle a word-aligned stack pointer. We generate the following:
16815 <save and restore r0 in normal prologue/epilogue>
16819 The unwinder doesn't need to know about the stack realignment.
16820 Just tell it we saved SP in r0. */
16821 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16823 r0 = gen_rtx_REG (SImode, 0);
16824 r1 = gen_rtx_REG (SImode, 1);
16826 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16827 RTX_FRAME_RELATED_P (insn) = 1;
16828 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16830 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16832 /* ??? The CFA changes here, which may cause GDB to conclude that it
16833 has entered a different function. That said, the unwind info is
16834 correct, individually, before and after this instruction because
16835 we've described the save of SP, which will override the default
16836 handling of SP as restoring from the CFA. */
16837 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16840 /* For APCS frames, if IP register is clobbered
16841 when creating frame, save that register in a special
16843 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16845 if (IS_INTERRUPT (func_type))
16847 /* Interrupt functions must not corrupt any registers.
16848 Creating a frame pointer however, corrupts the IP
16849 register, so we must push it first. */
16850 emit_multi_reg_push (1 << IP_REGNUM);
16852 /* Do not set RTX_FRAME_RELATED_P on this insn.
16853 The dwarf stack unwinding code only wants to see one
16854 stack decrement per function, and this is not it. If
16855 this instruction is labeled as being part of the frame
16856 creation sequence then dwarf2out_frame_debug_expr will
16857 die when it encounters the assignment of IP to FP
16858 later on, since the use of SP here establishes SP as
16859 the CFA register and not IP.
16861 Anyway this instruction is not really part of the stack
16862 frame creation although it is part of the prologue. */
16864 else if (IS_NESTED (func_type))
16866 /* The Static chain register is the same as the IP register
16867 used as a scratch register during stack frame creation.
16868 To get around this need to find somewhere to store IP
16869 whilst the frame is being created. We try the following
16872 1. The last argument register.
16873 2. A slot on the stack above the frame. (This only
16874 works if the function is not a varargs function).
16875 3. Register r3, after pushing the argument registers
16878 Note - we only need to tell the dwarf2 backend about the SP
16879 adjustment in the second variant; the static chain register
16880 doesn't need to be unwound, as it doesn't contain a value
16881 inherited from the caller. */
16883 if (df_regs_ever_live_p (3) == false)
16884 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16885 else if (args_to_push == 0)
16889 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16892 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16893 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16896 /* Just tell the dwarf backend that we adjusted SP. */
16897 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16898 plus_constant (stack_pointer_rtx,
16900 RTX_FRAME_RELATED_P (insn) = 1;
16901 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16905 /* Store the args on the stack. */
16906 if (cfun->machine->uses_anonymous_args)
16907 insn = emit_multi_reg_push
16908 ((0xf0 >> (args_to_push / 4)) & 0xf);
16911 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16912 GEN_INT (- args_to_push)));
16914 RTX_FRAME_RELATED_P (insn) = 1;
16916 saved_pretend_args = 1;
16917 fp_offset = args_to_push;
16920 /* Now reuse r3 to preserve IP. */
16921 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16925 insn = emit_set_insn (ip_rtx,
16926 plus_constant (stack_pointer_rtx, fp_offset));
16927 RTX_FRAME_RELATED_P (insn) = 1;
16932 /* Push the argument registers, or reserve space for them. */
16933 if (cfun->machine->uses_anonymous_args)
16934 insn = emit_multi_reg_push
16935 ((0xf0 >> (args_to_push / 4)) & 0xf);
16938 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16939 GEN_INT (- args_to_push)));
16940 RTX_FRAME_RELATED_P (insn) = 1;
16943 /* If this is an interrupt service routine, and the link register
16944 is going to be pushed, and we're not generating extra
16945 push of IP (needed when frame is needed and frame layout if apcs),
16946 subtracting four from LR now will mean that the function return
16947 can be done with a single instruction. */
16948 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16949 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16950 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16953 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16955 emit_set_insn (lr, plus_constant (lr, -4));
16958 if (live_regs_mask)
16960 saved_regs += bit_count (live_regs_mask) * 4;
16961 if (optimize_size && !frame_pointer_needed
16962 && saved_regs == offsets->saved_regs - offsets->saved_args)
16964 /* If no coprocessor registers are being pushed and we don't have
16965 to worry about a frame pointer then push extra registers to
16966 create the stack frame. This is done is a way that does not
16967 alter the frame layout, so is independent of the epilogue. */
16971 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16973 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16974 if (frame && n * 4 >= frame)
16977 live_regs_mask |= (1 << n) - 1;
16978 saved_regs += frame;
16981 insn = emit_multi_reg_push (live_regs_mask);
16982 RTX_FRAME_RELATED_P (insn) = 1;
16985 if (! IS_VOLATILE (func_type))
16986 saved_regs += arm_save_coproc_regs ();
16988 if (frame_pointer_needed && TARGET_ARM)
16990 /* Create the new frame pointer. */
16991 if (TARGET_APCS_FRAME)
16993 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16994 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16995 RTX_FRAME_RELATED_P (insn) = 1;
16997 if (IS_NESTED (func_type))
16999 /* Recover the static chain register. */
17000 if (!df_regs_ever_live_p (3)
17001 || saved_pretend_args)
17002 insn = gen_rtx_REG (SImode, 3);
17003 else /* if (crtl->args.pretend_args_size == 0) */
17005 insn = plus_constant (hard_frame_pointer_rtx, 4);
17006 insn = gen_frame_mem (SImode, insn);
17008 emit_set_insn (ip_rtx, insn);
17009 /* Add a USE to stop propagate_one_insn() from barfing. */
17010 emit_insn (gen_prologue_use (ip_rtx));
17015 insn = GEN_INT (saved_regs - 4);
17016 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17017 stack_pointer_rtx, insn));
17018 RTX_FRAME_RELATED_P (insn) = 1;
17022 if (flag_stack_usage_info)
17023 current_function_static_stack_size
17024 = offsets->outgoing_args - offsets->saved_args;
17026 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17028 /* This add can produce multiple insns for a large constant, so we
17029 need to get tricky. */
17030 rtx last = get_last_insn ();
17032 amount = GEN_INT (offsets->saved_args + saved_regs
17033 - offsets->outgoing_args);
17035 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17039 last = last ? NEXT_INSN (last) : get_insns ();
17040 RTX_FRAME_RELATED_P (last) = 1;
17042 while (last != insn);
17044 /* If the frame pointer is needed, emit a special barrier that
17045 will prevent the scheduler from moving stores to the frame
17046 before the stack adjustment. */
17047 if (frame_pointer_needed)
17048 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17049 hard_frame_pointer_rtx));
17053 if (frame_pointer_needed && TARGET_THUMB2)
17054 thumb_set_frame_pointer (offsets);
17056 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17058 unsigned long mask;
17060 mask = live_regs_mask;
17061 mask &= THUMB2_WORK_REGS;
17062 if (!IS_NESTED (func_type))
17063 mask |= (1 << IP_REGNUM);
17064 arm_load_pic_register (mask);
17067 /* If we are profiling, make sure no instructions are scheduled before
17068 the call to mcount. Similarly if the user has requested no
17069 scheduling in the prolog. Similarly if we want non-call exceptions
17070 using the EABI unwinder, to prevent faulting instructions from being
17071 swapped with a stack adjustment. */
17072 if (crtl->profile || !TARGET_SCHED_PROLOG
17073 || (arm_except_unwind_info (&global_options) == UI_TARGET
17074 && cfun->can_throw_non_call_exceptions))
17075 emit_insn (gen_blockage ());
17077 /* If the link register is being kept alive, with the return address in it,
17078 then make sure that it does not get reused by the ce2 pass. */
17079 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17080 cfun->machine->lr_save_eliminated = 1;
17083 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17085 arm_print_condition (FILE *stream)
17087 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17089 /* Branch conversion is not implemented for Thumb-2. */
17092 output_operand_lossage ("predicated Thumb instruction");
17095 if (current_insn_predicate != NULL)
17097 output_operand_lossage
17098 ("predicated instruction in conditional sequence");
17102 fputs (arm_condition_codes[arm_current_cc], stream);
17104 else if (current_insn_predicate)
17106 enum arm_cond_code code;
17110 output_operand_lossage ("predicated Thumb instruction");
17114 code = get_arm_condition_code (current_insn_predicate);
17115 fputs (arm_condition_codes[code], stream);
17120 /* If CODE is 'd', then the X is a condition operand and the instruction
17121 should only be executed if the condition is true.
17122 if CODE is 'D', then the X is a condition operand and the instruction
17123 should only be executed if the condition is false: however, if the mode
17124 of the comparison is CCFPEmode, then always execute the instruction -- we
17125 do this because in these circumstances !GE does not necessarily imply LT;
17126 in these cases the instruction pattern will take care to make sure that
17127 an instruction containing %d will follow, thereby undoing the effects of
17128 doing this instruction unconditionally.
17129 If CODE is 'N' then X is a floating point operand that must be negated
17131 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17132 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17134 arm_print_operand (FILE *stream, rtx x, int code)
17139 fputs (ASM_COMMENT_START, stream);
17143 fputs (user_label_prefix, stream);
17147 fputs (REGISTER_PREFIX, stream);
17151 arm_print_condition (stream);
17155 /* Nothing in unified syntax, otherwise the current condition code. */
17156 if (!TARGET_UNIFIED_ASM)
17157 arm_print_condition (stream);
17161 /* The current condition code in unified syntax, otherwise nothing. */
17162 if (TARGET_UNIFIED_ASM)
17163 arm_print_condition (stream);
17167 /* The current condition code for a condition code setting instruction.
17168 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17169 if (TARGET_UNIFIED_ASM)
17171 fputc('s', stream);
17172 arm_print_condition (stream);
17176 arm_print_condition (stream);
17177 fputc('s', stream);
17182 /* If the instruction is conditionally executed then print
17183 the current condition code, otherwise print 's'. */
17184 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17185 if (current_insn_predicate)
17186 arm_print_condition (stream);
17188 fputc('s', stream);
17191 /* %# is a "break" sequence. It doesn't output anything, but is used to
17192 separate e.g. operand numbers from following text, if that text consists
17193 of further digits which we don't want to be part of the operand
17201 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17202 r = real_value_negate (&r);
17203 fprintf (stream, "%s", fp_const_from_val (&r));
17207 /* An integer or symbol address without a preceding # sign. */
17209 switch (GET_CODE (x))
17212 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17216 output_addr_const (stream, x);
17220 if (GET_CODE (XEXP (x, 0)) == PLUS
17221 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17223 output_addr_const (stream, x);
17226 /* Fall through. */
17229 output_operand_lossage ("Unsupported operand for code '%c'", code);
17234 if (GET_CODE (x) == CONST_INT)
17237 val = ARM_SIGN_EXTEND (~INTVAL (x));
17238 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17242 putc ('~', stream);
17243 output_addr_const (stream, x);
17248 /* The low 16 bits of an immediate constant. */
17249 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17253 fprintf (stream, "%s", arithmetic_instr (x, 1));
17256 /* Truncate Cirrus shift counts. */
17258 if (GET_CODE (x) == CONST_INT)
17260 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17263 arm_print_operand (stream, x, 0);
17267 fprintf (stream, "%s", arithmetic_instr (x, 0));
17275 if (!shift_operator (x, SImode))
17277 output_operand_lossage ("invalid shift operand");
17281 shift = shift_op (x, &val);
17285 fprintf (stream, ", %s ", shift);
17287 arm_print_operand (stream, XEXP (x, 1), 0);
17289 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17294 /* An explanation of the 'Q', 'R' and 'H' register operands:
17296 In a pair of registers containing a DI or DF value the 'Q'
17297 operand returns the register number of the register containing
17298 the least significant part of the value. The 'R' operand returns
17299 the register number of the register containing the most
17300 significant part of the value.
17302 The 'H' operand returns the higher of the two register numbers.
17303 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17304 same as the 'Q' operand, since the most significant part of the
17305 value is held in the lower number register. The reverse is true
17306 on systems where WORDS_BIG_ENDIAN is false.
17308 The purpose of these operands is to distinguish between cases
17309 where the endian-ness of the values is important (for example
17310 when they are added together), and cases where the endian-ness
17311 is irrelevant, but the order of register operations is important.
17312 For example when loading a value from memory into a register
17313 pair, the endian-ness does not matter. Provided that the value
17314 from the lower memory address is put into the lower numbered
17315 register, and the value from the higher address is put into the
17316 higher numbered register, the load will work regardless of whether
17317 the value being loaded is big-wordian or little-wordian. The
17318 order of the two register loads can matter however, if the address
17319 of the memory location is actually held in one of the registers
17320 being overwritten by the load.
17322 The 'Q' and 'R' constraints are also available for 64-bit
17325 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17327 rtx part = gen_lowpart (SImode, x);
17328 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17332 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17334 output_operand_lossage ("invalid operand for code '%c'", code);
17338 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17342 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17344 enum machine_mode mode = GET_MODE (x);
17347 if (mode == VOIDmode)
17349 part = gen_highpart_mode (SImode, mode, x);
17350 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17354 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17356 output_operand_lossage ("invalid operand for code '%c'", code);
17360 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17364 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17366 output_operand_lossage ("invalid operand for code '%c'", code);
17370 asm_fprintf (stream, "%r", REGNO (x) + 1);
17374 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17376 output_operand_lossage ("invalid operand for code '%c'", code);
17380 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17384 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17386 output_operand_lossage ("invalid operand for code '%c'", code);
17390 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17394 asm_fprintf (stream, "%r",
17395 GET_CODE (XEXP (x, 0)) == REG
17396 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17400 asm_fprintf (stream, "{%r-%r}",
17402 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17405 /* Like 'M', but writing doubleword vector registers, for use by Neon
17409 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17410 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17412 asm_fprintf (stream, "{d%d}", regno);
17414 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17419 /* CONST_TRUE_RTX means always -- that's the default. */
17420 if (x == const_true_rtx)
17423 if (!COMPARISON_P (x))
17425 output_operand_lossage ("invalid operand for code '%c'", code);
17429 fputs (arm_condition_codes[get_arm_condition_code (x)],
17434 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17435 want to do that. */
17436 if (x == const_true_rtx)
17438 output_operand_lossage ("instruction never executed");
17441 if (!COMPARISON_P (x))
17443 output_operand_lossage ("invalid operand for code '%c'", code);
17447 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17448 (get_arm_condition_code (x))],
17452 /* Cirrus registers can be accessed in a variety of ways:
17453 single floating point (f)
17454 double floating point (d)
17456 64bit integer (dx). */
17457 case 'W': /* Cirrus register in F mode. */
17458 case 'X': /* Cirrus register in D mode. */
17459 case 'Y': /* Cirrus register in FX mode. */
17460 case 'Z': /* Cirrus register in DX mode. */
17461 gcc_assert (GET_CODE (x) == REG
17462 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17464 fprintf (stream, "mv%s%s",
17466 : code == 'X' ? "d"
17467 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17471 /* Print cirrus register in the mode specified by the register's mode. */
17474 int mode = GET_MODE (x);
17476 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17478 output_operand_lossage ("invalid operand for code '%c'", code);
17482 fprintf (stream, "mv%s%s",
17483 mode == DFmode ? "d"
17484 : mode == SImode ? "fx"
17485 : mode == DImode ? "dx"
17486 : "f", reg_names[REGNO (x)] + 2);
17492 if (GET_CODE (x) != REG
17493 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17494 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17495 /* Bad value for wCG register number. */
17497 output_operand_lossage ("invalid operand for code '%c'", code);
17502 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17505 /* Print an iWMMXt control register name. */
17507 if (GET_CODE (x) != CONST_INT
17509 || INTVAL (x) >= 16)
17510 /* Bad value for wC register number. */
17512 output_operand_lossage ("invalid operand for code '%c'", code);
17518 static const char * wc_reg_names [16] =
17520 "wCID", "wCon", "wCSSF", "wCASF",
17521 "wC4", "wC5", "wC6", "wC7",
17522 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17523 "wC12", "wC13", "wC14", "wC15"
17526 fprintf (stream, wc_reg_names [INTVAL (x)]);
17530 /* Print the high single-precision register of a VFP double-precision
17534 int mode = GET_MODE (x);
17537 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17539 output_operand_lossage ("invalid operand for code '%c'", code);
17544 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17546 output_operand_lossage ("invalid operand for code '%c'", code);
17550 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17554 /* Print a VFP/Neon double precision or quad precision register name. */
17558 int mode = GET_MODE (x);
17559 int is_quad = (code == 'q');
17562 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17564 output_operand_lossage ("invalid operand for code '%c'", code);
17568 if (GET_CODE (x) != REG
17569 || !IS_VFP_REGNUM (REGNO (x)))
17571 output_operand_lossage ("invalid operand for code '%c'", code);
17576 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17577 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17579 output_operand_lossage ("invalid operand for code '%c'", code);
17583 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17584 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17588 /* These two codes print the low/high doubleword register of a Neon quad
17589 register, respectively. For pair-structure types, can also print
17590 low/high quadword registers. */
17594 int mode = GET_MODE (x);
17597 if ((GET_MODE_SIZE (mode) != 16
17598 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17600 output_operand_lossage ("invalid operand for code '%c'", code);
17605 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17607 output_operand_lossage ("invalid operand for code '%c'", code);
17611 if (GET_MODE_SIZE (mode) == 16)
17612 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17613 + (code == 'f' ? 1 : 0));
17615 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17616 + (code == 'f' ? 1 : 0));
17620 /* Print a VFPv3 floating-point constant, represented as an integer
17624 int index = vfp3_const_double_index (x);
17625 gcc_assert (index != -1);
17626 fprintf (stream, "%d", index);
17630 /* Print bits representing opcode features for Neon.
17632 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17633 and polynomials as unsigned.
17635 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17637 Bit 2 is 1 for rounding functions, 0 otherwise. */
17639 /* Identify the type as 's', 'u', 'p' or 'f'. */
17642 HOST_WIDE_INT bits = INTVAL (x);
17643 fputc ("uspf"[bits & 3], stream);
17647 /* Likewise, but signed and unsigned integers are both 'i'. */
17650 HOST_WIDE_INT bits = INTVAL (x);
17651 fputc ("iipf"[bits & 3], stream);
17655 /* As for 'T', but emit 'u' instead of 'p'. */
17658 HOST_WIDE_INT bits = INTVAL (x);
17659 fputc ("usuf"[bits & 3], stream);
17663 /* Bit 2: rounding (vs none). */
17666 HOST_WIDE_INT bits = INTVAL (x);
17667 fputs ((bits & 4) != 0 ? "r" : "", stream);
17671 /* Memory operand for vld1/vst1 instruction. */
17675 bool postinc = FALSE;
17676 unsigned align, memsize, align_bits;
17678 gcc_assert (GET_CODE (x) == MEM);
17679 addr = XEXP (x, 0);
17680 if (GET_CODE (addr) == POST_INC)
17683 addr = XEXP (addr, 0);
17685 asm_fprintf (stream, "[%r", REGNO (addr));
17687 /* We know the alignment of this access, so we can emit a hint in the
17688 instruction (for some alignments) as an aid to the memory subsystem
17690 align = MEM_ALIGN (x) >> 3;
17691 memsize = MEM_SIZE (x);
17693 /* Only certain alignment specifiers are supported by the hardware. */
17694 if (memsize == 16 && (align % 32) == 0)
17696 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
17698 else if ((align % 8) == 0)
17703 if (align_bits != 0)
17704 asm_fprintf (stream, ":%d", align_bits);
17706 asm_fprintf (stream, "]");
17709 fputs("!", stream);
17717 gcc_assert (GET_CODE (x) == MEM);
17718 addr = XEXP (x, 0);
17719 gcc_assert (GET_CODE (addr) == REG);
17720 asm_fprintf (stream, "[%r]", REGNO (addr));
17724 /* Translate an S register number into a D register number and element index. */
17727 int mode = GET_MODE (x);
17730 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17732 output_operand_lossage ("invalid operand for code '%c'", code);
17737 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17739 output_operand_lossage ("invalid operand for code '%c'", code);
17743 regno = regno - FIRST_VFP_REGNUM;
17744 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17749 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17750 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17753 /* Register specifier for vld1.16/vst1.16. Translate the S register
17754 number into a D register number and element index. */
17757 int mode = GET_MODE (x);
17760 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17762 output_operand_lossage ("invalid operand for code '%c'", code);
17767 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17769 output_operand_lossage ("invalid operand for code '%c'", code);
17773 regno = regno - FIRST_VFP_REGNUM;
17774 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17781 output_operand_lossage ("missing operand");
17785 switch (GET_CODE (x))
17788 asm_fprintf (stream, "%r", REGNO (x));
17792 output_memory_reference_mode = GET_MODE (x);
17793 output_address (XEXP (x, 0));
17800 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17801 sizeof (fpstr), 0, 1);
17802 fprintf (stream, "#%s", fpstr);
17805 fprintf (stream, "#%s", fp_immediate_constant (x));
17809 gcc_assert (GET_CODE (x) != NEG);
17810 fputc ('#', stream);
17811 if (GET_CODE (x) == HIGH)
17813 fputs (":lower16:", stream);
17817 output_addr_const (stream, x);
17823 /* Target hook for printing a memory address. */
17825 arm_print_operand_address (FILE *stream, rtx x)
17829 int is_minus = GET_CODE (x) == MINUS;
17831 if (GET_CODE (x) == REG)
17832 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17833 else if (GET_CODE (x) == PLUS || is_minus)
17835 rtx base = XEXP (x, 0);
17836 rtx index = XEXP (x, 1);
17837 HOST_WIDE_INT offset = 0;
17838 if (GET_CODE (base) != REG
17839 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17841 /* Ensure that BASE is a register. */
17842 /* (one of them must be). */
17843 /* Also ensure the SP is not used as in index register. */
17848 switch (GET_CODE (index))
17851 offset = INTVAL (index);
17854 asm_fprintf (stream, "[%r, #%wd]",
17855 REGNO (base), offset);
17859 asm_fprintf (stream, "[%r, %s%r]",
17860 REGNO (base), is_minus ? "-" : "",
17870 asm_fprintf (stream, "[%r, %s%r",
17871 REGNO (base), is_minus ? "-" : "",
17872 REGNO (XEXP (index, 0)));
17873 arm_print_operand (stream, index, 'S');
17874 fputs ("]", stream);
17879 gcc_unreachable ();
17882 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17883 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17885 extern enum machine_mode output_memory_reference_mode;
17887 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17889 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17890 asm_fprintf (stream, "[%r, #%s%d]!",
17891 REGNO (XEXP (x, 0)),
17892 GET_CODE (x) == PRE_DEC ? "-" : "",
17893 GET_MODE_SIZE (output_memory_reference_mode));
17895 asm_fprintf (stream, "[%r], #%s%d",
17896 REGNO (XEXP (x, 0)),
17897 GET_CODE (x) == POST_DEC ? "-" : "",
17898 GET_MODE_SIZE (output_memory_reference_mode));
17900 else if (GET_CODE (x) == PRE_MODIFY)
17902 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17903 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17904 asm_fprintf (stream, "#%wd]!",
17905 INTVAL (XEXP (XEXP (x, 1), 1)));
17907 asm_fprintf (stream, "%r]!",
17908 REGNO (XEXP (XEXP (x, 1), 1)));
17910 else if (GET_CODE (x) == POST_MODIFY)
17912 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17913 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17914 asm_fprintf (stream, "#%wd",
17915 INTVAL (XEXP (XEXP (x, 1), 1)));
17917 asm_fprintf (stream, "%r",
17918 REGNO (XEXP (XEXP (x, 1), 1)));
17920 else output_addr_const (stream, x);
17924 if (GET_CODE (x) == REG)
17925 asm_fprintf (stream, "[%r]", REGNO (x));
17926 else if (GET_CODE (x) == POST_INC)
17927 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17928 else if (GET_CODE (x) == PLUS)
17930 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17931 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17932 asm_fprintf (stream, "[%r, #%wd]",
17933 REGNO (XEXP (x, 0)),
17934 INTVAL (XEXP (x, 1)));
17936 asm_fprintf (stream, "[%r, %r]",
17937 REGNO (XEXP (x, 0)),
17938 REGNO (XEXP (x, 1)));
17941 output_addr_const (stream, x);
17945 /* Target hook for indicating whether a punctuation character for
17946 TARGET_PRINT_OPERAND is valid. */
17948 arm_print_operand_punct_valid_p (unsigned char code)
17950 return (code == '@' || code == '|' || code == '.'
17951 || code == '(' || code == ')' || code == '#'
17952 || (TARGET_32BIT && (code == '?'))
17953 || (TARGET_THUMB2 && (code == '!'))
17954 || (TARGET_THUMB && (code == '_')));
17957 /* Target hook for assembling integer objects. The ARM version needs to
17958 handle word-sized values specially. */
17960 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17962 enum machine_mode mode;
17964 if (size == UNITS_PER_WORD && aligned_p)
17966 fputs ("\t.word\t", asm_out_file);
17967 output_addr_const (asm_out_file, x);
17969 /* Mark symbols as position independent. We only do this in the
17970 .text segment, not in the .data segment. */
17971 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17972 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17974 /* See legitimize_pic_address for an explanation of the
17975 TARGET_VXWORKS_RTP check. */
17976 if (TARGET_VXWORKS_RTP
17977 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17978 fputs ("(GOT)", asm_out_file);
17980 fputs ("(GOTOFF)", asm_out_file);
17982 fputc ('\n', asm_out_file);
17986 mode = GET_MODE (x);
17988 if (arm_vector_mode_supported_p (mode))
17992 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17994 units = CONST_VECTOR_NUNITS (x);
17995 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17997 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17998 for (i = 0; i < units; i++)
18000 rtx elt = CONST_VECTOR_ELT (x, i);
18002 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18005 for (i = 0; i < units; i++)
18007 rtx elt = CONST_VECTOR_ELT (x, i);
18008 REAL_VALUE_TYPE rval;
18010 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18013 (rval, GET_MODE_INNER (mode),
18014 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18020 return default_assemble_integer (x, size, aligned_p);
18024 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18028 if (!TARGET_AAPCS_BASED)
18031 default_named_section_asm_out_constructor
18032 : default_named_section_asm_out_destructor) (symbol, priority);
18036 /* Put these in the .init_array section, using a special relocation. */
18037 if (priority != DEFAULT_INIT_PRIORITY)
18040 sprintf (buf, "%s.%.5u",
18041 is_ctor ? ".init_array" : ".fini_array",
18043 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18050 switch_to_section (s);
18051 assemble_align (POINTER_SIZE);
18052 fputs ("\t.word\t", asm_out_file);
18053 output_addr_const (asm_out_file, symbol);
18054 fputs ("(target1)\n", asm_out_file);
18057 /* Add a function to the list of static constructors. */
18060 arm_elf_asm_constructor (rtx symbol, int priority)
18062 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18065 /* Add a function to the list of static destructors. */
18068 arm_elf_asm_destructor (rtx symbol, int priority)
18070 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18073 /* A finite state machine takes care of noticing whether or not instructions
18074 can be conditionally executed, and thus decrease execution time and code
18075 size by deleting branch instructions. The fsm is controlled by
18076 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18078 /* The state of the fsm controlling condition codes are:
18079 0: normal, do nothing special
18080 1: make ASM_OUTPUT_OPCODE not output this instruction
18081 2: make ASM_OUTPUT_OPCODE not output this instruction
18082 3: make instructions conditional
18083 4: make instructions conditional
18085 State transitions (state->state by whom under condition):
18086 0 -> 1 final_prescan_insn if the `target' is a label
18087 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18088 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18089 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18090 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18091 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18092 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18093 (the target insn is arm_target_insn).
18095 If the jump clobbers the conditions then we use states 2 and 4.
18097 A similar thing can be done with conditional return insns.
18099 XXX In case the `target' is an unconditional branch, this conditionalising
18100 of the instructions always reduces code size, but not always execution
18101 time. But then, I want to reduce the code size to somewhere near what
18102 /bin/cc produces. */
18104 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18105 instructions. When a COND_EXEC instruction is seen the subsequent
18106 instructions are scanned so that multiple conditional instructions can be
18107 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18108 specify the length and true/false mask for the IT block. These will be
18109 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18111 /* Returns the index of the ARM condition code string in
18112 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18113 COMPARISON should be an rtx like `(eq (...) (...))'. */
18116 maybe_get_arm_condition_code (rtx comparison)
18118 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18119 enum arm_cond_code code;
18120 enum rtx_code comp_code = GET_CODE (comparison);
18122 if (GET_MODE_CLASS (mode) != MODE_CC)
18123 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18124 XEXP (comparison, 1));
18128 case CC_DNEmode: code = ARM_NE; goto dominance;
18129 case CC_DEQmode: code = ARM_EQ; goto dominance;
18130 case CC_DGEmode: code = ARM_GE; goto dominance;
18131 case CC_DGTmode: code = ARM_GT; goto dominance;
18132 case CC_DLEmode: code = ARM_LE; goto dominance;
18133 case CC_DLTmode: code = ARM_LT; goto dominance;
18134 case CC_DGEUmode: code = ARM_CS; goto dominance;
18135 case CC_DGTUmode: code = ARM_HI; goto dominance;
18136 case CC_DLEUmode: code = ARM_LS; goto dominance;
18137 case CC_DLTUmode: code = ARM_CC;
18140 if (comp_code == EQ)
18141 return ARM_INVERSE_CONDITION_CODE (code);
18142 if (comp_code == NE)
18149 case NE: return ARM_NE;
18150 case EQ: return ARM_EQ;
18151 case GE: return ARM_PL;
18152 case LT: return ARM_MI;
18153 default: return ARM_NV;
18159 case NE: return ARM_NE;
18160 case EQ: return ARM_EQ;
18161 default: return ARM_NV;
18167 case NE: return ARM_MI;
18168 case EQ: return ARM_PL;
18169 default: return ARM_NV;
18174 /* These encodings assume that AC=1 in the FPA system control
18175 byte. This allows us to handle all cases except UNEQ and
18179 case GE: return ARM_GE;
18180 case GT: return ARM_GT;
18181 case LE: return ARM_LS;
18182 case LT: return ARM_MI;
18183 case NE: return ARM_NE;
18184 case EQ: return ARM_EQ;
18185 case ORDERED: return ARM_VC;
18186 case UNORDERED: return ARM_VS;
18187 case UNLT: return ARM_LT;
18188 case UNLE: return ARM_LE;
18189 case UNGT: return ARM_HI;
18190 case UNGE: return ARM_PL;
18191 /* UNEQ and LTGT do not have a representation. */
18192 case UNEQ: /* Fall through. */
18193 case LTGT: /* Fall through. */
18194 default: return ARM_NV;
18200 case NE: return ARM_NE;
18201 case EQ: return ARM_EQ;
18202 case GE: return ARM_LE;
18203 case GT: return ARM_LT;
18204 case LE: return ARM_GE;
18205 case LT: return ARM_GT;
18206 case GEU: return ARM_LS;
18207 case GTU: return ARM_CC;
18208 case LEU: return ARM_CS;
18209 case LTU: return ARM_HI;
18210 default: return ARM_NV;
18216 case LTU: return ARM_CS;
18217 case GEU: return ARM_CC;
18218 default: return ARM_NV;
18224 case NE: return ARM_NE;
18225 case EQ: return ARM_EQ;
18226 case GEU: return ARM_CS;
18227 case GTU: return ARM_HI;
18228 case LEU: return ARM_LS;
18229 case LTU: return ARM_CC;
18230 default: return ARM_NV;
18236 case GE: return ARM_GE;
18237 case LT: return ARM_LT;
18238 case GEU: return ARM_CS;
18239 case LTU: return ARM_CC;
18240 default: return ARM_NV;
18246 case NE: return ARM_NE;
18247 case EQ: return ARM_EQ;
18248 case GE: return ARM_GE;
18249 case GT: return ARM_GT;
18250 case LE: return ARM_LE;
18251 case LT: return ARM_LT;
18252 case GEU: return ARM_CS;
18253 case GTU: return ARM_HI;
18254 case LEU: return ARM_LS;
18255 case LTU: return ARM_CC;
18256 default: return ARM_NV;
18259 default: gcc_unreachable ();
18263 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18264 static enum arm_cond_code
18265 get_arm_condition_code (rtx comparison)
18267 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18268 gcc_assert (code != ARM_NV);
18272 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18275 thumb2_final_prescan_insn (rtx insn)
18277 rtx first_insn = insn;
18278 rtx body = PATTERN (insn);
18280 enum arm_cond_code code;
18284 /* Remove the previous insn from the count of insns to be output. */
18285 if (arm_condexec_count)
18286 arm_condexec_count--;
18288 /* Nothing to do if we are already inside a conditional block. */
18289 if (arm_condexec_count)
18292 if (GET_CODE (body) != COND_EXEC)
18295 /* Conditional jumps are implemented directly. */
18296 if (GET_CODE (insn) == JUMP_INSN)
18299 predicate = COND_EXEC_TEST (body);
18300 arm_current_cc = get_arm_condition_code (predicate);
18302 n = get_attr_ce_count (insn);
18303 arm_condexec_count = 1;
18304 arm_condexec_mask = (1 << n) - 1;
18305 arm_condexec_masklen = n;
18306 /* See if subsequent instructions can be combined into the same block. */
18309 insn = next_nonnote_insn (insn);
18311 /* Jumping into the middle of an IT block is illegal, so a label or
18312 barrier terminates the block. */
18313 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18316 body = PATTERN (insn);
18317 /* USE and CLOBBER aren't really insns, so just skip them. */
18318 if (GET_CODE (body) == USE
18319 || GET_CODE (body) == CLOBBER)
18322 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18323 if (GET_CODE (body) != COND_EXEC)
18325 /* Allow up to 4 conditionally executed instructions in a block. */
18326 n = get_attr_ce_count (insn);
18327 if (arm_condexec_masklen + n > 4)
18330 predicate = COND_EXEC_TEST (body);
18331 code = get_arm_condition_code (predicate);
18332 mask = (1 << n) - 1;
18333 if (arm_current_cc == code)
18334 arm_condexec_mask |= (mask << arm_condexec_masklen);
18335 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18338 arm_condexec_count++;
18339 arm_condexec_masklen += n;
18341 /* A jump must be the last instruction in a conditional block. */
18342 if (GET_CODE(insn) == JUMP_INSN)
18345 /* Restore recog_data (getting the attributes of other insns can
18346 destroy this array, but final.c assumes that it remains intact
18347 across this call). */
18348 extract_constrain_insn_cached (first_insn);
18352 arm_final_prescan_insn (rtx insn)
18354 /* BODY will hold the body of INSN. */
18355 rtx body = PATTERN (insn);
18357 /* This will be 1 if trying to repeat the trick, and things need to be
18358 reversed if it appears to fail. */
18361 /* If we start with a return insn, we only succeed if we find another one. */
18362 int seeking_return = 0;
18363 enum rtx_code return_code = UNKNOWN;
18365 /* START_INSN will hold the insn from where we start looking. This is the
18366 first insn after the following code_label if REVERSE is true. */
18367 rtx start_insn = insn;
18369 /* If in state 4, check if the target branch is reached, in order to
18370 change back to state 0. */
18371 if (arm_ccfsm_state == 4)
18373 if (insn == arm_target_insn)
18375 arm_target_insn = NULL;
18376 arm_ccfsm_state = 0;
18381 /* If in state 3, it is possible to repeat the trick, if this insn is an
18382 unconditional branch to a label, and immediately following this branch
18383 is the previous target label which is only used once, and the label this
18384 branch jumps to is not too far off. */
18385 if (arm_ccfsm_state == 3)
18387 if (simplejump_p (insn))
18389 start_insn = next_nonnote_insn (start_insn);
18390 if (GET_CODE (start_insn) == BARRIER)
18392 /* XXX Isn't this always a barrier? */
18393 start_insn = next_nonnote_insn (start_insn);
18395 if (GET_CODE (start_insn) == CODE_LABEL
18396 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18397 && LABEL_NUSES (start_insn) == 1)
18402 else if (ANY_RETURN_P (body))
18404 start_insn = next_nonnote_insn (start_insn);
18405 if (GET_CODE (start_insn) == BARRIER)
18406 start_insn = next_nonnote_insn (start_insn);
18407 if (GET_CODE (start_insn) == CODE_LABEL
18408 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18409 && LABEL_NUSES (start_insn) == 1)
18412 seeking_return = 1;
18413 return_code = GET_CODE (body);
18422 gcc_assert (!arm_ccfsm_state || reverse);
18423 if (GET_CODE (insn) != JUMP_INSN)
18426 /* This jump might be paralleled with a clobber of the condition codes
18427 the jump should always come first */
18428 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18429 body = XVECEXP (body, 0, 0);
18432 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18433 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18436 int fail = FALSE, succeed = FALSE;
18437 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18438 int then_not_else = TRUE;
18439 rtx this_insn = start_insn, label = 0;
18441 /* Register the insn jumped to. */
18444 if (!seeking_return)
18445 label = XEXP (SET_SRC (body), 0);
18447 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18448 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18449 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18451 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18452 then_not_else = FALSE;
18454 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18456 seeking_return = 1;
18457 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18459 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18461 seeking_return = 1;
18462 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18463 then_not_else = FALSE;
18466 gcc_unreachable ();
18468 /* See how many insns this branch skips, and what kind of insns. If all
18469 insns are okay, and the label or unconditional branch to the same
18470 label is not too far away, succeed. */
18471 for (insns_skipped = 0;
18472 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18476 this_insn = next_nonnote_insn (this_insn);
18480 switch (GET_CODE (this_insn))
18483 /* Succeed if it is the target label, otherwise fail since
18484 control falls in from somewhere else. */
18485 if (this_insn == label)
18487 arm_ccfsm_state = 1;
18495 /* Succeed if the following insn is the target label.
18497 If return insns are used then the last insn in a function
18498 will be a barrier. */
18499 this_insn = next_nonnote_insn (this_insn);
18500 if (this_insn && this_insn == label)
18502 arm_ccfsm_state = 1;
18510 /* The AAPCS says that conditional calls should not be
18511 used since they make interworking inefficient (the
18512 linker can't transform BL<cond> into BLX). That's
18513 only a problem if the machine has BLX. */
18520 /* Succeed if the following insn is the target label, or
18521 if the following two insns are a barrier and the
18523 this_insn = next_nonnote_insn (this_insn);
18524 if (this_insn && GET_CODE (this_insn) == BARRIER)
18525 this_insn = next_nonnote_insn (this_insn);
18527 if (this_insn && this_insn == label
18528 && insns_skipped < max_insns_skipped)
18530 arm_ccfsm_state = 1;
18538 /* If this is an unconditional branch to the same label, succeed.
18539 If it is to another label, do nothing. If it is conditional,
18541 /* XXX Probably, the tests for SET and the PC are
18544 scanbody = PATTERN (this_insn);
18545 if (GET_CODE (scanbody) == SET
18546 && GET_CODE (SET_DEST (scanbody)) == PC)
18548 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18549 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18551 arm_ccfsm_state = 2;
18554 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18557 /* Fail if a conditional return is undesirable (e.g. on a
18558 StrongARM), but still allow this if optimizing for size. */
18559 else if (GET_CODE (scanbody) == return_code
18560 && !use_return_insn (TRUE, NULL)
18563 else if (GET_CODE (scanbody) == return_code)
18565 arm_ccfsm_state = 2;
18568 else if (GET_CODE (scanbody) == PARALLEL)
18570 switch (get_attr_conds (this_insn))
18580 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18585 /* Instructions using or affecting the condition codes make it
18587 scanbody = PATTERN (this_insn);
18588 if (!(GET_CODE (scanbody) == SET
18589 || GET_CODE (scanbody) == PARALLEL)
18590 || get_attr_conds (this_insn) != CONDS_NOCOND)
18593 /* A conditional cirrus instruction must be followed by
18594 a non Cirrus instruction. However, since we
18595 conditionalize instructions in this function and by
18596 the time we get here we can't add instructions
18597 (nops), because shorten_branches() has already been
18598 called, we will disable conditionalizing Cirrus
18599 instructions to be safe. */
18600 if (GET_CODE (scanbody) != USE
18601 && GET_CODE (scanbody) != CLOBBER
18602 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18612 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18613 arm_target_label = CODE_LABEL_NUMBER (label);
18616 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18618 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18620 this_insn = next_nonnote_insn (this_insn);
18621 gcc_assert (!this_insn
18622 || (GET_CODE (this_insn) != BARRIER
18623 && GET_CODE (this_insn) != CODE_LABEL));
18627 /* Oh, dear! we ran off the end.. give up. */
18628 extract_constrain_insn_cached (insn);
18629 arm_ccfsm_state = 0;
18630 arm_target_insn = NULL;
18633 arm_target_insn = this_insn;
18636 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18639 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18641 if (reverse || then_not_else)
18642 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18645 /* Restore recog_data (getting the attributes of other insns can
18646 destroy this array, but final.c assumes that it remains intact
18647 across this call. */
18648 extract_constrain_insn_cached (insn);
18652 /* Output IT instructions. */
18654 thumb2_asm_output_opcode (FILE * stream)
18659 if (arm_condexec_mask)
18661 for (n = 0; n < arm_condexec_masklen; n++)
18662 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18664 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18665 arm_condition_codes[arm_current_cc]);
18666 arm_condexec_mask = 0;
18670 /* Returns true if REGNO is a valid register
18671 for holding a quantity of type MODE. */
18673 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18675 if (GET_MODE_CLASS (mode) == MODE_CC)
18676 return (regno == CC_REGNUM
18677 || (TARGET_HARD_FLOAT && TARGET_VFP
18678 && regno == VFPCC_REGNUM));
18681 /* For the Thumb we only allow values bigger than SImode in
18682 registers 0 - 6, so that there is always a second low
18683 register available to hold the upper part of the value.
18684 We probably we ought to ensure that the register is the
18685 start of an even numbered register pair. */
18686 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18688 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18689 && IS_CIRRUS_REGNUM (regno))
18690 /* We have outlawed SI values in Cirrus registers because they
18691 reside in the lower 32 bits, but SF values reside in the
18692 upper 32 bits. This causes gcc all sorts of grief. We can't
18693 even split the registers into pairs because Cirrus SI values
18694 get sign extended to 64bits-- aldyh. */
18695 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18697 if (TARGET_HARD_FLOAT && TARGET_VFP
18698 && IS_VFP_REGNUM (regno))
18700 if (mode == SFmode || mode == SImode)
18701 return VFP_REGNO_OK_FOR_SINGLE (regno);
18703 if (mode == DFmode)
18704 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18706 /* VFP registers can hold HFmode values, but there is no point in
18707 putting them there unless we have hardware conversion insns. */
18708 if (mode == HFmode)
18709 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18712 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18713 || (VALID_NEON_QREG_MODE (mode)
18714 && NEON_REGNO_OK_FOR_QUAD (regno))
18715 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18716 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18717 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18718 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18719 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18724 if (TARGET_REALLY_IWMMXT)
18726 if (IS_IWMMXT_GR_REGNUM (regno))
18727 return mode == SImode;
18729 if (IS_IWMMXT_REGNUM (regno))
18730 return VALID_IWMMXT_REG_MODE (mode);
18733 /* We allow almost any value to be stored in the general registers.
18734 Restrict doubleword quantities to even register pairs so that we can
18735 use ldrd. Do not allow very large Neon structure opaque modes in
18736 general registers; they would use too many. */
18737 if (regno <= LAST_ARM_REGNUM)
18738 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18739 && ARM_NUM_REGS (mode) <= 4;
18741 if (regno == FRAME_POINTER_REGNUM
18742 || regno == ARG_POINTER_REGNUM)
18743 /* We only allow integers in the fake hard registers. */
18744 return GET_MODE_CLASS (mode) == MODE_INT;
18746 /* The only registers left are the FPA registers
18747 which we only allow to hold FP values. */
18748 return (TARGET_HARD_FLOAT && TARGET_FPA
18749 && GET_MODE_CLASS (mode) == MODE_FLOAT
18750 && regno >= FIRST_FPA_REGNUM
18751 && regno <= LAST_FPA_REGNUM);
18754 /* Implement MODES_TIEABLE_P. */
18757 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18759 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18762 /* We specifically want to allow elements of "structure" modes to
18763 be tieable to the structure. This more general condition allows
18764 other rarer situations too. */
18766 && (VALID_NEON_DREG_MODE (mode1)
18767 || VALID_NEON_QREG_MODE (mode1)
18768 || VALID_NEON_STRUCT_MODE (mode1))
18769 && (VALID_NEON_DREG_MODE (mode2)
18770 || VALID_NEON_QREG_MODE (mode2)
18771 || VALID_NEON_STRUCT_MODE (mode2)))
18777 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18778 not used in arm mode. */
18781 arm_regno_class (int regno)
18785 if (regno == STACK_POINTER_REGNUM)
18787 if (regno == CC_REGNUM)
18794 if (TARGET_THUMB2 && regno < 8)
18797 if ( regno <= LAST_ARM_REGNUM
18798 || regno == FRAME_POINTER_REGNUM
18799 || regno == ARG_POINTER_REGNUM)
18800 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18802 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18803 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18805 if (IS_CIRRUS_REGNUM (regno))
18806 return CIRRUS_REGS;
18808 if (IS_VFP_REGNUM (regno))
18810 if (regno <= D7_VFP_REGNUM)
18811 return VFP_D0_D7_REGS;
18812 else if (regno <= LAST_LO_VFP_REGNUM)
18813 return VFP_LO_REGS;
18815 return VFP_HI_REGS;
18818 if (IS_IWMMXT_REGNUM (regno))
18819 return IWMMXT_REGS;
18821 if (IS_IWMMXT_GR_REGNUM (regno))
18822 return IWMMXT_GR_REGS;
18827 /* Handle a special case when computing the offset
18828 of an argument from the frame pointer. */
18830 arm_debugger_arg_offset (int value, rtx addr)
18834 /* We are only interested if dbxout_parms() failed to compute the offset. */
18838 /* We can only cope with the case where the address is held in a register. */
18839 if (GET_CODE (addr) != REG)
18842 /* If we are using the frame pointer to point at the argument, then
18843 an offset of 0 is correct. */
18844 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18847 /* If we are using the stack pointer to point at the
18848 argument, then an offset of 0 is correct. */
18849 /* ??? Check this is consistent with thumb2 frame layout. */
18850 if ((TARGET_THUMB || !frame_pointer_needed)
18851 && REGNO (addr) == SP_REGNUM)
18854 /* Oh dear. The argument is pointed to by a register rather
18855 than being held in a register, or being stored at a known
18856 offset from the frame pointer. Since GDB only understands
18857 those two kinds of argument we must translate the address
18858 held in the register into an offset from the frame pointer.
18859 We do this by searching through the insns for the function
18860 looking to see where this register gets its value. If the
18861 register is initialized from the frame pointer plus an offset
18862 then we are in luck and we can continue, otherwise we give up.
18864 This code is exercised by producing debugging information
18865 for a function with arguments like this:
18867 double func (double a, double b, int c, double d) {return d;}
18869 Without this code the stab for parameter 'd' will be set to
18870 an offset of 0 from the frame pointer, rather than 8. */
18872 /* The if() statement says:
18874 If the insn is a normal instruction
18875 and if the insn is setting the value in a register
18876 and if the register being set is the register holding the address of the argument
18877 and if the address is computing by an addition
18878 that involves adding to a register
18879 which is the frame pointer
18884 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18886 if ( GET_CODE (insn) == INSN
18887 && GET_CODE (PATTERN (insn)) == SET
18888 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18889 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18890 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18891 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18892 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18895 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18904 warning (0, "unable to compute real location of stacked parameter");
18905 value = 8; /* XXX magic hack */
18925 T_MAX /* Size of enum. Keep last. */
18926 } neon_builtin_type_mode;
18928 #define TYPE_MODE_BIT(X) (1 << (X))
18930 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18931 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18932 | TYPE_MODE_BIT (T_DI))
18933 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18934 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18935 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18937 #define v8qi_UP T_V8QI
18938 #define v4hi_UP T_V4HI
18939 #define v2si_UP T_V2SI
18940 #define v2sf_UP T_V2SF
18942 #define v16qi_UP T_V16QI
18943 #define v8hi_UP T_V8HI
18944 #define v4si_UP T_V4SI
18945 #define v4sf_UP T_V4SF
18946 #define v2di_UP T_V2DI
18951 #define UP(X) X##_UP
18984 NEON_LOADSTRUCTLANE,
18986 NEON_STORESTRUCTLANE,
18995 const neon_itype itype;
18996 const neon_builtin_type_mode mode;
18997 const enum insn_code code;
18998 unsigned int fcode;
18999 } neon_builtin_datum;
19001 #define CF(N,X) CODE_FOR_neon_##N##X
19003 #define VAR1(T, N, A) \
19004 {#N, NEON_##T, UP (A), CF (N, A), 0}
19005 #define VAR2(T, N, A, B) \
19007 {#N, NEON_##T, UP (B), CF (N, B), 0}
19008 #define VAR3(T, N, A, B, C) \
19009 VAR2 (T, N, A, B), \
19010 {#N, NEON_##T, UP (C), CF (N, C), 0}
19011 #define VAR4(T, N, A, B, C, D) \
19012 VAR3 (T, N, A, B, C), \
19013 {#N, NEON_##T, UP (D), CF (N, D), 0}
19014 #define VAR5(T, N, A, B, C, D, E) \
19015 VAR4 (T, N, A, B, C, D), \
19016 {#N, NEON_##T, UP (E), CF (N, E), 0}
19017 #define VAR6(T, N, A, B, C, D, E, F) \
19018 VAR5 (T, N, A, B, C, D, E), \
19019 {#N, NEON_##T, UP (F), CF (N, F), 0}
19020 #define VAR7(T, N, A, B, C, D, E, F, G) \
19021 VAR6 (T, N, A, B, C, D, E, F), \
19022 {#N, NEON_##T, UP (G), CF (N, G), 0}
19023 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19024 VAR7 (T, N, A, B, C, D, E, F, G), \
19025 {#N, NEON_##T, UP (H), CF (N, H), 0}
19026 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19027 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19028 {#N, NEON_##T, UP (I), CF (N, I), 0}
19029 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19030 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19031 {#N, NEON_##T, UP (J), CF (N, J), 0}
19033 /* The mode entries in the following table correspond to the "key" type of the
19034 instruction variant, i.e. equivalent to that which would be specified after
19035 the assembler mnemonic, which usually refers to the last vector operand.
19036 (Signed/unsigned/polynomial types are not differentiated between though, and
19037 are all mapped onto the same mode for a given element size.) The modes
19038 listed per instruction should be the same as those defined for that
19039 instruction's pattern in neon.md. */
19041 static neon_builtin_datum neon_builtin_data[] =
19043 VAR10 (BINOP, vadd,
19044 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19045 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19046 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19047 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19048 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19049 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19050 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19051 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19052 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19053 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19054 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19055 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19056 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19057 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19058 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19059 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19060 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19061 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19062 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19063 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19064 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19065 VAR2 (BINOP, vqdmull, v4hi, v2si),
19066 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19067 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19068 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19069 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19070 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19071 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19072 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19073 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19074 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19075 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19076 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19077 VAR10 (BINOP, vsub,
19078 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19079 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19080 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19081 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19082 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19083 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19084 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19085 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19086 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19087 VAR2 (BINOP, vcage, v2sf, v4sf),
19088 VAR2 (BINOP, vcagt, v2sf, v4sf),
19089 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19090 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19091 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19092 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19093 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19094 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19095 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19096 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19097 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19098 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19099 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19100 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19101 VAR2 (BINOP, vrecps, v2sf, v4sf),
19102 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19103 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19104 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19105 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19106 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19107 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19108 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19109 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19110 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19111 VAR2 (UNOP, vcnt, v8qi, v16qi),
19112 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19113 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19114 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19115 /* FIXME: vget_lane supports more variants than this! */
19116 VAR10 (GETLANE, vget_lane,
19117 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19118 VAR10 (SETLANE, vset_lane,
19119 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19120 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19121 VAR10 (DUP, vdup_n,
19122 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19123 VAR10 (DUPLANE, vdup_lane,
19124 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19125 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19126 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19127 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19128 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19129 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19130 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19131 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19132 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19133 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19134 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19135 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19136 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19137 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19138 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19139 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19140 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19141 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19142 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19143 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19144 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19145 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19146 VAR10 (BINOP, vext,
19147 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19148 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19149 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19150 VAR2 (UNOP, vrev16, v8qi, v16qi),
19151 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19152 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19153 VAR10 (SELECT, vbsl,
19154 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19155 VAR1 (VTBL, vtbl1, v8qi),
19156 VAR1 (VTBL, vtbl2, v8qi),
19157 VAR1 (VTBL, vtbl3, v8qi),
19158 VAR1 (VTBL, vtbl4, v8qi),
19159 VAR1 (VTBX, vtbx1, v8qi),
19160 VAR1 (VTBX, vtbx2, v8qi),
19161 VAR1 (VTBX, vtbx3, v8qi),
19162 VAR1 (VTBX, vtbx4, v8qi),
19163 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19164 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19165 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19166 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19167 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19168 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19169 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19170 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19171 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19172 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19173 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19174 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19175 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19176 VAR10 (LOAD1, vld1,
19177 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19178 VAR10 (LOAD1LANE, vld1_lane,
19179 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19180 VAR10 (LOAD1, vld1_dup,
19181 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19182 VAR10 (STORE1, vst1,
19183 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19184 VAR10 (STORE1LANE, vst1_lane,
19185 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19187 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19188 VAR7 (LOADSTRUCTLANE, vld2_lane,
19189 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19190 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19191 VAR9 (STORESTRUCT, vst2,
19192 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19193 VAR7 (STORESTRUCTLANE, vst2_lane,
19194 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19196 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19197 VAR7 (LOADSTRUCTLANE, vld3_lane,
19198 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19199 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19200 VAR9 (STORESTRUCT, vst3,
19201 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19202 VAR7 (STORESTRUCTLANE, vst3_lane,
19203 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19204 VAR9 (LOADSTRUCT, vld4,
19205 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19206 VAR7 (LOADSTRUCTLANE, vld4_lane,
19207 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19208 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19209 VAR9 (STORESTRUCT, vst4,
19210 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19211 VAR7 (STORESTRUCTLANE, vst4_lane,
19212 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19213 VAR10 (LOGICBINOP, vand,
19214 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19215 VAR10 (LOGICBINOP, vorr,
19216 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19217 VAR10 (BINOP, veor,
19218 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19219 VAR10 (LOGICBINOP, vbic,
19220 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19221 VAR10 (LOGICBINOP, vorn,
19222 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19237 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19238 symbolic names defined here (which would require too much duplication).
19242 ARM_BUILTIN_GETWCX,
19243 ARM_BUILTIN_SETWCX,
19247 ARM_BUILTIN_WAVG2BR,
19248 ARM_BUILTIN_WAVG2HR,
19249 ARM_BUILTIN_WAVG2B,
19250 ARM_BUILTIN_WAVG2H,
19257 ARM_BUILTIN_WMACSZ,
19259 ARM_BUILTIN_WMACUZ,
19262 ARM_BUILTIN_WSADBZ,
19264 ARM_BUILTIN_WSADHZ,
19266 ARM_BUILTIN_WALIGN,
19269 ARM_BUILTIN_TMIAPH,
19270 ARM_BUILTIN_TMIABB,
19271 ARM_BUILTIN_TMIABT,
19272 ARM_BUILTIN_TMIATB,
19273 ARM_BUILTIN_TMIATT,
19275 ARM_BUILTIN_TMOVMSKB,
19276 ARM_BUILTIN_TMOVMSKH,
19277 ARM_BUILTIN_TMOVMSKW,
19279 ARM_BUILTIN_TBCSTB,
19280 ARM_BUILTIN_TBCSTH,
19281 ARM_BUILTIN_TBCSTW,
19283 ARM_BUILTIN_WMADDS,
19284 ARM_BUILTIN_WMADDU,
19286 ARM_BUILTIN_WPACKHSS,
19287 ARM_BUILTIN_WPACKWSS,
19288 ARM_BUILTIN_WPACKDSS,
19289 ARM_BUILTIN_WPACKHUS,
19290 ARM_BUILTIN_WPACKWUS,
19291 ARM_BUILTIN_WPACKDUS,
19296 ARM_BUILTIN_WADDSSB,
19297 ARM_BUILTIN_WADDSSH,
19298 ARM_BUILTIN_WADDSSW,
19299 ARM_BUILTIN_WADDUSB,
19300 ARM_BUILTIN_WADDUSH,
19301 ARM_BUILTIN_WADDUSW,
19305 ARM_BUILTIN_WSUBSSB,
19306 ARM_BUILTIN_WSUBSSH,
19307 ARM_BUILTIN_WSUBSSW,
19308 ARM_BUILTIN_WSUBUSB,
19309 ARM_BUILTIN_WSUBUSH,
19310 ARM_BUILTIN_WSUBUSW,
19317 ARM_BUILTIN_WCMPEQB,
19318 ARM_BUILTIN_WCMPEQH,
19319 ARM_BUILTIN_WCMPEQW,
19320 ARM_BUILTIN_WCMPGTUB,
19321 ARM_BUILTIN_WCMPGTUH,
19322 ARM_BUILTIN_WCMPGTUW,
19323 ARM_BUILTIN_WCMPGTSB,
19324 ARM_BUILTIN_WCMPGTSH,
19325 ARM_BUILTIN_WCMPGTSW,
19327 ARM_BUILTIN_TEXTRMSB,
19328 ARM_BUILTIN_TEXTRMSH,
19329 ARM_BUILTIN_TEXTRMSW,
19330 ARM_BUILTIN_TEXTRMUB,
19331 ARM_BUILTIN_TEXTRMUH,
19332 ARM_BUILTIN_TEXTRMUW,
19333 ARM_BUILTIN_TINSRB,
19334 ARM_BUILTIN_TINSRH,
19335 ARM_BUILTIN_TINSRW,
19337 ARM_BUILTIN_WMAXSW,
19338 ARM_BUILTIN_WMAXSH,
19339 ARM_BUILTIN_WMAXSB,
19340 ARM_BUILTIN_WMAXUW,
19341 ARM_BUILTIN_WMAXUH,
19342 ARM_BUILTIN_WMAXUB,
19343 ARM_BUILTIN_WMINSW,
19344 ARM_BUILTIN_WMINSH,
19345 ARM_BUILTIN_WMINSB,
19346 ARM_BUILTIN_WMINUW,
19347 ARM_BUILTIN_WMINUH,
19348 ARM_BUILTIN_WMINUB,
19350 ARM_BUILTIN_WMULUM,
19351 ARM_BUILTIN_WMULSM,
19352 ARM_BUILTIN_WMULUL,
19354 ARM_BUILTIN_PSADBH,
19355 ARM_BUILTIN_WSHUFH,
19369 ARM_BUILTIN_WSLLHI,
19370 ARM_BUILTIN_WSLLWI,
19371 ARM_BUILTIN_WSLLDI,
19372 ARM_BUILTIN_WSRAHI,
19373 ARM_BUILTIN_WSRAWI,
19374 ARM_BUILTIN_WSRADI,
19375 ARM_BUILTIN_WSRLHI,
19376 ARM_BUILTIN_WSRLWI,
19377 ARM_BUILTIN_WSRLDI,
19378 ARM_BUILTIN_WRORHI,
19379 ARM_BUILTIN_WRORWI,
19380 ARM_BUILTIN_WRORDI,
19382 ARM_BUILTIN_WUNPCKIHB,
19383 ARM_BUILTIN_WUNPCKIHH,
19384 ARM_BUILTIN_WUNPCKIHW,
19385 ARM_BUILTIN_WUNPCKILB,
19386 ARM_BUILTIN_WUNPCKILH,
19387 ARM_BUILTIN_WUNPCKILW,
19389 ARM_BUILTIN_WUNPCKEHSB,
19390 ARM_BUILTIN_WUNPCKEHSH,
19391 ARM_BUILTIN_WUNPCKEHSW,
19392 ARM_BUILTIN_WUNPCKEHUB,
19393 ARM_BUILTIN_WUNPCKEHUH,
19394 ARM_BUILTIN_WUNPCKEHUW,
19395 ARM_BUILTIN_WUNPCKELSB,
19396 ARM_BUILTIN_WUNPCKELSH,
19397 ARM_BUILTIN_WUNPCKELSW,
19398 ARM_BUILTIN_WUNPCKELUB,
19399 ARM_BUILTIN_WUNPCKELUH,
19400 ARM_BUILTIN_WUNPCKELUW,
19402 ARM_BUILTIN_THREAD_POINTER,
19404 ARM_BUILTIN_NEON_BASE,
19406 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19409 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19412 arm_init_neon_builtins (void)
19414 unsigned int i, fcode;
19417 tree neon_intQI_type_node;
19418 tree neon_intHI_type_node;
19419 tree neon_polyQI_type_node;
19420 tree neon_polyHI_type_node;
19421 tree neon_intSI_type_node;
19422 tree neon_intDI_type_node;
19423 tree neon_float_type_node;
19425 tree intQI_pointer_node;
19426 tree intHI_pointer_node;
19427 tree intSI_pointer_node;
19428 tree intDI_pointer_node;
19429 tree float_pointer_node;
19431 tree const_intQI_node;
19432 tree const_intHI_node;
19433 tree const_intSI_node;
19434 tree const_intDI_node;
19435 tree const_float_node;
19437 tree const_intQI_pointer_node;
19438 tree const_intHI_pointer_node;
19439 tree const_intSI_pointer_node;
19440 tree const_intDI_pointer_node;
19441 tree const_float_pointer_node;
19443 tree V8QI_type_node;
19444 tree V4HI_type_node;
19445 tree V2SI_type_node;
19446 tree V2SF_type_node;
19447 tree V16QI_type_node;
19448 tree V8HI_type_node;
19449 tree V4SI_type_node;
19450 tree V4SF_type_node;
19451 tree V2DI_type_node;
19453 tree intUQI_type_node;
19454 tree intUHI_type_node;
19455 tree intUSI_type_node;
19456 tree intUDI_type_node;
19458 tree intEI_type_node;
19459 tree intOI_type_node;
19460 tree intCI_type_node;
19461 tree intXI_type_node;
19463 tree V8QI_pointer_node;
19464 tree V4HI_pointer_node;
19465 tree V2SI_pointer_node;
19466 tree V2SF_pointer_node;
19467 tree V16QI_pointer_node;
19468 tree V8HI_pointer_node;
19469 tree V4SI_pointer_node;
19470 tree V4SF_pointer_node;
19471 tree V2DI_pointer_node;
19473 tree void_ftype_pv8qi_v8qi_v8qi;
19474 tree void_ftype_pv4hi_v4hi_v4hi;
19475 tree void_ftype_pv2si_v2si_v2si;
19476 tree void_ftype_pv2sf_v2sf_v2sf;
19477 tree void_ftype_pdi_di_di;
19478 tree void_ftype_pv16qi_v16qi_v16qi;
19479 tree void_ftype_pv8hi_v8hi_v8hi;
19480 tree void_ftype_pv4si_v4si_v4si;
19481 tree void_ftype_pv4sf_v4sf_v4sf;
19482 tree void_ftype_pv2di_v2di_v2di;
19484 tree reinterp_ftype_dreg[5][5];
19485 tree reinterp_ftype_qreg[5][5];
19486 tree dreg_types[5], qreg_types[5];
19488 /* Create distinguished type nodes for NEON vector element types,
19489 and pointers to values of such types, so we can detect them later. */
19490 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19491 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19492 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19493 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19494 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19495 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19496 neon_float_type_node = make_node (REAL_TYPE);
19497 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19498 layout_type (neon_float_type_node);
19500 /* Define typedefs which exactly correspond to the modes we are basing vector
19501 types on. If you change these names you'll need to change
19502 the table used by arm_mangle_type too. */
19503 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19504 "__builtin_neon_qi");
19505 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19506 "__builtin_neon_hi");
19507 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19508 "__builtin_neon_si");
19509 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19510 "__builtin_neon_sf");
19511 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19512 "__builtin_neon_di");
19513 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19514 "__builtin_neon_poly8");
19515 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19516 "__builtin_neon_poly16");
19518 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19519 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19520 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19521 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19522 float_pointer_node = build_pointer_type (neon_float_type_node);
19524 /* Next create constant-qualified versions of the above types. */
19525 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19527 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19529 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19531 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19533 const_float_node = build_qualified_type (neon_float_type_node,
19536 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19537 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19538 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19539 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19540 const_float_pointer_node = build_pointer_type (const_float_node);
19542 /* Now create vector types based on our NEON element types. */
19543 /* 64-bit vectors. */
19545 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19547 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19549 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19551 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19552 /* 128-bit vectors. */
19554 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19556 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19558 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19560 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19562 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19564 /* Unsigned integer types for various mode sizes. */
19565 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19566 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19567 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19568 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19570 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19571 "__builtin_neon_uqi");
19572 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19573 "__builtin_neon_uhi");
19574 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19575 "__builtin_neon_usi");
19576 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19577 "__builtin_neon_udi");
19579 /* Opaque integer types for structures of vectors. */
19580 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19581 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19582 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19583 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19585 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19586 "__builtin_neon_ti");
19587 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19588 "__builtin_neon_ei");
19589 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19590 "__builtin_neon_oi");
19591 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19592 "__builtin_neon_ci");
19593 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19594 "__builtin_neon_xi");
19596 /* Pointers to vector types. */
19597 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19598 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19599 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19600 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19601 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19602 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19603 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19604 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19605 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19607 /* Operations which return results as pairs. */
19608 void_ftype_pv8qi_v8qi_v8qi =
19609 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19610 V8QI_type_node, NULL);
19611 void_ftype_pv4hi_v4hi_v4hi =
19612 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19613 V4HI_type_node, NULL);
19614 void_ftype_pv2si_v2si_v2si =
19615 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19616 V2SI_type_node, NULL);
19617 void_ftype_pv2sf_v2sf_v2sf =
19618 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19619 V2SF_type_node, NULL);
19620 void_ftype_pdi_di_di =
19621 build_function_type_list (void_type_node, intDI_pointer_node,
19622 neon_intDI_type_node, neon_intDI_type_node, NULL);
19623 void_ftype_pv16qi_v16qi_v16qi =
19624 build_function_type_list (void_type_node, V16QI_pointer_node,
19625 V16QI_type_node, V16QI_type_node, NULL);
19626 void_ftype_pv8hi_v8hi_v8hi =
19627 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19628 V8HI_type_node, NULL);
19629 void_ftype_pv4si_v4si_v4si =
19630 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19631 V4SI_type_node, NULL);
19632 void_ftype_pv4sf_v4sf_v4sf =
19633 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19634 V4SF_type_node, NULL);
19635 void_ftype_pv2di_v2di_v2di =
19636 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19637 V2DI_type_node, NULL);
19639 dreg_types[0] = V8QI_type_node;
19640 dreg_types[1] = V4HI_type_node;
19641 dreg_types[2] = V2SI_type_node;
19642 dreg_types[3] = V2SF_type_node;
19643 dreg_types[4] = neon_intDI_type_node;
19645 qreg_types[0] = V16QI_type_node;
19646 qreg_types[1] = V8HI_type_node;
19647 qreg_types[2] = V4SI_type_node;
19648 qreg_types[3] = V4SF_type_node;
19649 qreg_types[4] = V2DI_type_node;
19651 for (i = 0; i < 5; i++)
19654 for (j = 0; j < 5; j++)
19656 reinterp_ftype_dreg[i][j]
19657 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19658 reinterp_ftype_qreg[i][j]
19659 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19663 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19664 i < ARRAY_SIZE (neon_builtin_data);
19667 neon_builtin_datum *d = &neon_builtin_data[i];
19669 const char* const modenames[] = {
19670 "v8qi", "v4hi", "v2si", "v2sf", "di",
19671 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19676 int is_load = 0, is_store = 0;
19678 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19685 case NEON_LOAD1LANE:
19686 case NEON_LOADSTRUCT:
19687 case NEON_LOADSTRUCTLANE:
19689 /* Fall through. */
19691 case NEON_STORE1LANE:
19692 case NEON_STORESTRUCT:
19693 case NEON_STORESTRUCTLANE:
19696 /* Fall through. */
19699 case NEON_LOGICBINOP:
19700 case NEON_SHIFTINSERT:
19707 case NEON_SHIFTIMM:
19708 case NEON_SHIFTACC:
19714 case NEON_LANEMULL:
19715 case NEON_LANEMULH:
19717 case NEON_SCALARMUL:
19718 case NEON_SCALARMULL:
19719 case NEON_SCALARMULH:
19720 case NEON_SCALARMAC:
19726 tree return_type = void_type_node, args = void_list_node;
19728 /* Build a function type directly from the insn_data for
19729 this builtin. The build_function_type() function takes
19730 care of removing duplicates for us. */
19731 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19735 if (is_load && k == 1)
19737 /* Neon load patterns always have the memory
19738 operand in the operand 1 position. */
19739 gcc_assert (insn_data[d->code].operand[k].predicate
19740 == neon_struct_operand);
19746 eltype = const_intQI_pointer_node;
19751 eltype = const_intHI_pointer_node;
19756 eltype = const_intSI_pointer_node;
19761 eltype = const_float_pointer_node;
19766 eltype = const_intDI_pointer_node;
19769 default: gcc_unreachable ();
19772 else if (is_store && k == 0)
19774 /* Similarly, Neon store patterns use operand 0 as
19775 the memory location to store to. */
19776 gcc_assert (insn_data[d->code].operand[k].predicate
19777 == neon_struct_operand);
19783 eltype = intQI_pointer_node;
19788 eltype = intHI_pointer_node;
19793 eltype = intSI_pointer_node;
19798 eltype = float_pointer_node;
19803 eltype = intDI_pointer_node;
19806 default: gcc_unreachable ();
19811 switch (insn_data[d->code].operand[k].mode)
19813 case VOIDmode: eltype = void_type_node; break;
19815 case QImode: eltype = neon_intQI_type_node; break;
19816 case HImode: eltype = neon_intHI_type_node; break;
19817 case SImode: eltype = neon_intSI_type_node; break;
19818 case SFmode: eltype = neon_float_type_node; break;
19819 case DImode: eltype = neon_intDI_type_node; break;
19820 case TImode: eltype = intTI_type_node; break;
19821 case EImode: eltype = intEI_type_node; break;
19822 case OImode: eltype = intOI_type_node; break;
19823 case CImode: eltype = intCI_type_node; break;
19824 case XImode: eltype = intXI_type_node; break;
19825 /* 64-bit vectors. */
19826 case V8QImode: eltype = V8QI_type_node; break;
19827 case V4HImode: eltype = V4HI_type_node; break;
19828 case V2SImode: eltype = V2SI_type_node; break;
19829 case V2SFmode: eltype = V2SF_type_node; break;
19830 /* 128-bit vectors. */
19831 case V16QImode: eltype = V16QI_type_node; break;
19832 case V8HImode: eltype = V8HI_type_node; break;
19833 case V4SImode: eltype = V4SI_type_node; break;
19834 case V4SFmode: eltype = V4SF_type_node; break;
19835 case V2DImode: eltype = V2DI_type_node; break;
19836 default: gcc_unreachable ();
19840 if (k == 0 && !is_store)
19841 return_type = eltype;
19843 args = tree_cons (NULL_TREE, eltype, args);
19846 ftype = build_function_type (return_type, args);
19850 case NEON_RESULTPAIR:
19852 switch (insn_data[d->code].operand[1].mode)
19854 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19855 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19856 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19857 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19858 case DImode: ftype = void_ftype_pdi_di_di; break;
19859 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19860 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19861 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19862 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19863 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19864 default: gcc_unreachable ();
19869 case NEON_REINTERP:
19871 /* We iterate over 5 doubleword types, then 5 quadword
19873 int rhs = d->mode % 5;
19874 switch (insn_data[d->code].operand[0].mode)
19876 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19877 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19878 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19879 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19880 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19881 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19882 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19883 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19884 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19885 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19886 default: gcc_unreachable ();
19892 gcc_unreachable ();
19895 gcc_assert (ftype != NULL);
19897 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19899 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19901 arm_builtin_decls[fcode] = decl;
19905 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19908 if ((MASK) & insn_flags) \
19911 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19912 BUILT_IN_MD, NULL, NULL_TREE); \
19913 arm_builtin_decls[CODE] = bdecl; \
19918 struct builtin_description
19920 const unsigned int mask;
19921 const enum insn_code icode;
19922 const char * const name;
19923 const enum arm_builtins code;
19924 const enum rtx_code comparison;
19925 const unsigned int flag;
19928 static const struct builtin_description bdesc_2arg[] =
19930 #define IWMMXT_BUILTIN(code, string, builtin) \
19931 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19932 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19934 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19935 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19936 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19937 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19938 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19939 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19940 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19941 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19942 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19943 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19944 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19945 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19946 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19947 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19948 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19949 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19950 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19951 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19952 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19953 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19954 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19955 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19956 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19957 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19958 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19959 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19960 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19961 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19962 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19963 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19964 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19965 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19966 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19967 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19968 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19969 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19970 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19971 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19972 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19973 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19974 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19975 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19976 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19977 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19978 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19979 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19980 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19981 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19982 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19983 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19984 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19985 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19986 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19987 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19988 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19989 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19990 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19991 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19993 #define IWMMXT_BUILTIN2(code, builtin) \
19994 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19996 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19997 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19998 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19999 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20000 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20001 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20002 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
20003 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20004 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
20005 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20006 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
20007 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
20008 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
20009 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20010 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
20011 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20012 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
20013 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
20014 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
20015 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20016 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
20017 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20018 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
20019 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
20020 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
20021 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
20022 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
20023 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
20024 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
20025 IWMMXT_BUILTIN2 (rordi3, WRORDI)
20026 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20027 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20030 static const struct builtin_description bdesc_1arg[] =
20032 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20033 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20034 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20035 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20036 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20037 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20038 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20039 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20040 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20041 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20042 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20043 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20044 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20045 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20046 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20047 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20048 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20049 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20052 /* Set up all the iWMMXt builtins. This is not called if
20053 TARGET_IWMMXT is zero. */
20056 arm_init_iwmmxt_builtins (void)
20058 const struct builtin_description * d;
20061 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20062 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20063 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20066 = build_function_type_list (integer_type_node,
20067 integer_type_node, NULL_TREE);
20068 tree v8qi_ftype_v8qi_v8qi_int
20069 = build_function_type_list (V8QI_type_node,
20070 V8QI_type_node, V8QI_type_node,
20071 integer_type_node, NULL_TREE);
20072 tree v4hi_ftype_v4hi_int
20073 = build_function_type_list (V4HI_type_node,
20074 V4HI_type_node, integer_type_node, NULL_TREE);
20075 tree v2si_ftype_v2si_int
20076 = build_function_type_list (V2SI_type_node,
20077 V2SI_type_node, integer_type_node, NULL_TREE);
20078 tree v2si_ftype_di_di
20079 = build_function_type_list (V2SI_type_node,
20080 long_long_integer_type_node,
20081 long_long_integer_type_node,
20083 tree di_ftype_di_int
20084 = build_function_type_list (long_long_integer_type_node,
20085 long_long_integer_type_node,
20086 integer_type_node, NULL_TREE);
20087 tree di_ftype_di_int_int
20088 = build_function_type_list (long_long_integer_type_node,
20089 long_long_integer_type_node,
20091 integer_type_node, NULL_TREE);
20092 tree int_ftype_v8qi
20093 = build_function_type_list (integer_type_node,
20094 V8QI_type_node, NULL_TREE);
20095 tree int_ftype_v4hi
20096 = build_function_type_list (integer_type_node,
20097 V4HI_type_node, NULL_TREE);
20098 tree int_ftype_v2si
20099 = build_function_type_list (integer_type_node,
20100 V2SI_type_node, NULL_TREE);
20101 tree int_ftype_v8qi_int
20102 = build_function_type_list (integer_type_node,
20103 V8QI_type_node, integer_type_node, NULL_TREE);
20104 tree int_ftype_v4hi_int
20105 = build_function_type_list (integer_type_node,
20106 V4HI_type_node, integer_type_node, NULL_TREE);
20107 tree int_ftype_v2si_int
20108 = build_function_type_list (integer_type_node,
20109 V2SI_type_node, integer_type_node, NULL_TREE);
20110 tree v8qi_ftype_v8qi_int_int
20111 = build_function_type_list (V8QI_type_node,
20112 V8QI_type_node, integer_type_node,
20113 integer_type_node, NULL_TREE);
20114 tree v4hi_ftype_v4hi_int_int
20115 = build_function_type_list (V4HI_type_node,
20116 V4HI_type_node, integer_type_node,
20117 integer_type_node, NULL_TREE);
20118 tree v2si_ftype_v2si_int_int
20119 = build_function_type_list (V2SI_type_node,
20120 V2SI_type_node, integer_type_node,
20121 integer_type_node, NULL_TREE);
20122 /* Miscellaneous. */
20123 tree v8qi_ftype_v4hi_v4hi
20124 = build_function_type_list (V8QI_type_node,
20125 V4HI_type_node, V4HI_type_node, NULL_TREE);
20126 tree v4hi_ftype_v2si_v2si
20127 = build_function_type_list (V4HI_type_node,
20128 V2SI_type_node, V2SI_type_node, NULL_TREE);
20129 tree v2si_ftype_v4hi_v4hi
20130 = build_function_type_list (V2SI_type_node,
20131 V4HI_type_node, V4HI_type_node, NULL_TREE);
20132 tree v2si_ftype_v8qi_v8qi
20133 = build_function_type_list (V2SI_type_node,
20134 V8QI_type_node, V8QI_type_node, NULL_TREE);
20135 tree v4hi_ftype_v4hi_di
20136 = build_function_type_list (V4HI_type_node,
20137 V4HI_type_node, long_long_integer_type_node,
20139 tree v2si_ftype_v2si_di
20140 = build_function_type_list (V2SI_type_node,
20141 V2SI_type_node, long_long_integer_type_node,
20143 tree void_ftype_int_int
20144 = build_function_type_list (void_type_node,
20145 integer_type_node, integer_type_node,
20148 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20150 = build_function_type_list (long_long_integer_type_node,
20151 V8QI_type_node, NULL_TREE);
20153 = build_function_type_list (long_long_integer_type_node,
20154 V4HI_type_node, NULL_TREE);
20156 = build_function_type_list (long_long_integer_type_node,
20157 V2SI_type_node, NULL_TREE);
20158 tree v2si_ftype_v4hi
20159 = build_function_type_list (V2SI_type_node,
20160 V4HI_type_node, NULL_TREE);
20161 tree v4hi_ftype_v8qi
20162 = build_function_type_list (V4HI_type_node,
20163 V8QI_type_node, NULL_TREE);
20165 tree di_ftype_di_v4hi_v4hi
20166 = build_function_type_list (long_long_unsigned_type_node,
20167 long_long_unsigned_type_node,
20168 V4HI_type_node, V4HI_type_node,
20171 tree di_ftype_v4hi_v4hi
20172 = build_function_type_list (long_long_unsigned_type_node,
20173 V4HI_type_node,V4HI_type_node,
20176 /* Normal vector binops. */
20177 tree v8qi_ftype_v8qi_v8qi
20178 = build_function_type_list (V8QI_type_node,
20179 V8QI_type_node, V8QI_type_node, NULL_TREE);
20180 tree v4hi_ftype_v4hi_v4hi
20181 = build_function_type_list (V4HI_type_node,
20182 V4HI_type_node,V4HI_type_node, NULL_TREE);
20183 tree v2si_ftype_v2si_v2si
20184 = build_function_type_list (V2SI_type_node,
20185 V2SI_type_node, V2SI_type_node, NULL_TREE);
20186 tree di_ftype_di_di
20187 = build_function_type_list (long_long_unsigned_type_node,
20188 long_long_unsigned_type_node,
20189 long_long_unsigned_type_node,
20192 /* Add all builtins that are more or less simple operations on two
20194 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20196 /* Use one of the operands; the target can have a different mode for
20197 mask-generating compares. */
20198 enum machine_mode mode;
20204 mode = insn_data[d->icode].operand[1].mode;
20209 type = v8qi_ftype_v8qi_v8qi;
20212 type = v4hi_ftype_v4hi_v4hi;
20215 type = v2si_ftype_v2si_v2si;
20218 type = di_ftype_di_di;
20222 gcc_unreachable ();
20225 def_mbuiltin (d->mask, d->name, type, d->code);
20228 /* Add the remaining MMX insns with somewhat more complicated types. */
20229 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20231 ARM_BUILTIN_ ## CODE)
20233 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20234 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20235 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20237 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20238 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20239 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20240 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20241 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20242 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20244 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20245 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20246 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20247 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20248 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20249 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20251 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20252 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20253 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20254 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20255 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20256 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20258 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20259 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20260 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20261 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20262 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20263 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20265 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20267 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20268 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20269 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20270 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20272 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20273 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20274 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20275 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20276 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20277 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20278 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20279 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20280 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20282 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20283 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20284 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20286 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20287 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20288 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20290 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20291 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20292 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20293 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20294 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20295 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20297 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20298 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20299 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20300 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20301 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20302 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20303 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20304 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20305 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20306 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20307 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20308 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20310 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20311 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20312 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20313 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20315 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20316 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20317 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20318 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20319 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20320 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20321 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20323 #undef iwmmx_mbuiltin
20327 arm_init_tls_builtins (void)
20331 ftype = build_function_type (ptr_type_node, void_list_node);
20332 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20333 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20335 TREE_NOTHROW (decl) = 1;
20336 TREE_READONLY (decl) = 1;
20337 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20341 arm_init_fp16_builtins (void)
20343 tree fp16_type = make_node (REAL_TYPE);
20344 TYPE_PRECISION (fp16_type) = 16;
20345 layout_type (fp16_type);
20346 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20350 arm_init_builtins (void)
20352 arm_init_tls_builtins ();
20354 if (TARGET_REALLY_IWMMXT)
20355 arm_init_iwmmxt_builtins ();
20358 arm_init_neon_builtins ();
20360 if (arm_fp16_format)
20361 arm_init_fp16_builtins ();
20364 /* Return the ARM builtin for CODE. */
20367 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20369 if (code >= ARM_BUILTIN_MAX)
20370 return error_mark_node;
20372 return arm_builtin_decls[code];
20375 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20377 static const char *
20378 arm_invalid_parameter_type (const_tree t)
20380 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20381 return N_("function parameters cannot have __fp16 type");
20385 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20387 static const char *
20388 arm_invalid_return_type (const_tree t)
20390 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20391 return N_("functions cannot return __fp16 type");
20395 /* Implement TARGET_PROMOTED_TYPE. */
20398 arm_promoted_type (const_tree t)
20400 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20401 return float_type_node;
20405 /* Implement TARGET_CONVERT_TO_TYPE.
20406 Specifically, this hook implements the peculiarity of the ARM
20407 half-precision floating-point C semantics that requires conversions between
20408 __fp16 to or from double to do an intermediate conversion to float. */
20411 arm_convert_to_type (tree type, tree expr)
20413 tree fromtype = TREE_TYPE (expr);
20414 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20416 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20417 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20418 return convert (type, convert (float_type_node, expr));
20422 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20423 This simply adds HFmode as a supported mode; even though we don't
20424 implement arithmetic on this type directly, it's supported by
20425 optabs conversions, much the way the double-word arithmetic is
20426 special-cased in the default hook. */
20429 arm_scalar_mode_supported_p (enum machine_mode mode)
20431 if (mode == HFmode)
20432 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20433 else if (ALL_FIXED_POINT_MODE_P (mode))
20436 return default_scalar_mode_supported_p (mode);
20439 /* Errors in the source file can cause expand_expr to return const0_rtx
20440 where we expect a vector. To avoid crashing, use one of the vector
20441 clear instructions. */
20444 safe_vector_operand (rtx x, enum machine_mode mode)
20446 if (x != const0_rtx)
20448 x = gen_reg_rtx (mode);
20450 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20451 : gen_rtx_SUBREG (DImode, x, 0)));
20455 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20458 arm_expand_binop_builtin (enum insn_code icode,
20459 tree exp, rtx target)
20462 tree arg0 = CALL_EXPR_ARG (exp, 0);
20463 tree arg1 = CALL_EXPR_ARG (exp, 1);
20464 rtx op0 = expand_normal (arg0);
20465 rtx op1 = expand_normal (arg1);
20466 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20467 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20468 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20470 if (VECTOR_MODE_P (mode0))
20471 op0 = safe_vector_operand (op0, mode0);
20472 if (VECTOR_MODE_P (mode1))
20473 op1 = safe_vector_operand (op1, mode1);
20476 || GET_MODE (target) != tmode
20477 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20478 target = gen_reg_rtx (tmode);
20480 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20482 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20483 op0 = copy_to_mode_reg (mode0, op0);
20484 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20485 op1 = copy_to_mode_reg (mode1, op1);
20487 pat = GEN_FCN (icode) (target, op0, op1);
20494 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20497 arm_expand_unop_builtin (enum insn_code icode,
20498 tree exp, rtx target, int do_load)
20501 tree arg0 = CALL_EXPR_ARG (exp, 0);
20502 rtx op0 = expand_normal (arg0);
20503 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20504 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20507 || GET_MODE (target) != tmode
20508 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20509 target = gen_reg_rtx (tmode);
20511 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20514 if (VECTOR_MODE_P (mode0))
20515 op0 = safe_vector_operand (op0, mode0);
20517 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20518 op0 = copy_to_mode_reg (mode0, op0);
20521 pat = GEN_FCN (icode) (target, op0);
20529 NEON_ARG_COPY_TO_REG,
20535 #define NEON_MAX_BUILTIN_ARGS 5
20537 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20538 and return an expression for the accessed memory.
20540 The intrinsic function operates on a block of registers that has
20541 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20542 The function references the memory at EXP in mode MEM_MODE;
20543 this mode may be BLKmode if no more suitable mode is available. */
20546 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20547 enum machine_mode reg_mode,
20548 neon_builtin_type_mode type_mode)
20550 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20551 tree elem_type, upper_bound, array_type;
20553 /* Work out the size of the register block in bytes. */
20554 reg_size = GET_MODE_SIZE (reg_mode);
20556 /* Work out the size of each vector in bytes. */
20557 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20558 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20560 /* Work out how many vectors there are. */
20561 gcc_assert (reg_size % vector_size == 0);
20562 nvectors = reg_size / vector_size;
20564 /* Work out how many elements are being loaded or stored.
20565 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20566 and memory elements; anything else implies a lane load or store. */
20567 if (mem_mode == reg_mode)
20568 nelems = vector_size * nvectors;
20572 /* Work out the type of each element. */
20573 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20574 elem_type = TREE_TYPE (TREE_TYPE (exp));
20576 /* Create a type that describes the full access. */
20577 upper_bound = build_int_cst (size_type_node, nelems - 1);
20578 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20580 /* Dereference EXP using that type. */
20581 exp = convert (build_pointer_type (array_type), exp);
20582 return fold_build2 (MEM_REF, array_type, exp,
20583 build_int_cst (TREE_TYPE (exp), 0));
20586 /* Expand a Neon builtin. */
20588 arm_expand_neon_args (rtx target, int icode, int have_retval,
20589 neon_builtin_type_mode type_mode,
20594 tree arg[NEON_MAX_BUILTIN_ARGS];
20595 rtx op[NEON_MAX_BUILTIN_ARGS];
20596 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20597 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20598 enum machine_mode other_mode;
20604 || GET_MODE (target) != tmode
20605 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20606 target = gen_reg_rtx (tmode);
20608 va_start (ap, exp);
20612 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20614 if (thisarg == NEON_ARG_STOP)
20618 opno = argc + have_retval;
20619 mode[argc] = insn_data[icode].operand[opno].mode;
20620 arg[argc] = CALL_EXPR_ARG (exp, argc);
20621 if (thisarg == NEON_ARG_MEMORY)
20623 other_mode = insn_data[icode].operand[1 - opno].mode;
20624 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20625 other_mode, type_mode);
20627 op[argc] = expand_normal (arg[argc]);
20631 case NEON_ARG_COPY_TO_REG:
20632 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20633 if (!(*insn_data[icode].operand[opno].predicate)
20634 (op[argc], mode[argc]))
20635 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20638 case NEON_ARG_CONSTANT:
20639 /* FIXME: This error message is somewhat unhelpful. */
20640 if (!(*insn_data[icode].operand[opno].predicate)
20641 (op[argc], mode[argc]))
20642 error ("argument must be a constant");
20645 case NEON_ARG_MEMORY:
20646 gcc_assert (MEM_P (op[argc]));
20647 PUT_MODE (op[argc], mode[argc]);
20648 /* ??? arm_neon.h uses the same built-in functions for signed
20649 and unsigned accesses, casting where necessary. This isn't
20651 set_mem_alias_set (op[argc], 0);
20652 if (!(*insn_data[icode].operand[opno].predicate)
20653 (op[argc], mode[argc]))
20654 op[argc] = (replace_equiv_address
20655 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20658 case NEON_ARG_STOP:
20659 gcc_unreachable ();
20672 pat = GEN_FCN (icode) (target, op[0]);
20676 pat = GEN_FCN (icode) (target, op[0], op[1]);
20680 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20684 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20688 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20692 gcc_unreachable ();
20698 pat = GEN_FCN (icode) (op[0]);
20702 pat = GEN_FCN (icode) (op[0], op[1]);
20706 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20710 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20714 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20718 gcc_unreachable ();
20729 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20730 constants defined per-instruction or per instruction-variant. Instead, the
20731 required info is looked up in the table neon_builtin_data. */
20733 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20735 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20736 neon_itype itype = d->itype;
20737 enum insn_code icode = d->code;
20738 neon_builtin_type_mode type_mode = d->mode;
20745 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20746 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20750 case NEON_SCALARMUL:
20751 case NEON_SCALARMULL:
20752 case NEON_SCALARMULH:
20753 case NEON_SHIFTINSERT:
20754 case NEON_LOGICBINOP:
20755 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20756 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20760 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20761 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20762 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20766 case NEON_SHIFTIMM:
20767 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20768 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20772 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20773 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20777 case NEON_REINTERP:
20778 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20779 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20783 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20784 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20786 case NEON_RESULTPAIR:
20787 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20788 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20792 case NEON_LANEMULL:
20793 case NEON_LANEMULH:
20794 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20795 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20796 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20799 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20800 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20801 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20803 case NEON_SHIFTACC:
20804 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20805 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20806 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20808 case NEON_SCALARMAC:
20809 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20810 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20811 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20815 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20816 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20820 case NEON_LOADSTRUCT:
20821 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20822 NEON_ARG_MEMORY, NEON_ARG_STOP);
20824 case NEON_LOAD1LANE:
20825 case NEON_LOADSTRUCTLANE:
20826 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20827 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20831 case NEON_STORESTRUCT:
20832 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20833 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20835 case NEON_STORE1LANE:
20836 case NEON_STORESTRUCTLANE:
20837 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20838 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20842 gcc_unreachable ();
20845 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20847 neon_reinterpret (rtx dest, rtx src)
20849 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20852 /* Emit code to place a Neon pair result in memory locations (with equal
20855 neon_emit_pair_result_insn (enum machine_mode mode,
20856 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20859 rtx mem = gen_rtx_MEM (mode, destaddr);
20860 rtx tmp1 = gen_reg_rtx (mode);
20861 rtx tmp2 = gen_reg_rtx (mode);
20863 emit_insn (intfn (tmp1, op1, op2, tmp2));
20865 emit_move_insn (mem, tmp1);
20866 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20867 emit_move_insn (mem, tmp2);
20870 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20871 not to early-clobber SRC registers in the process.
20873 We assume that the operands described by SRC and DEST represent a
20874 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20875 number of components into which the copy has been decomposed. */
20877 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20881 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20882 || REGNO (operands[0]) < REGNO (operands[1]))
20884 for (i = 0; i < count; i++)
20886 operands[2 * i] = dest[i];
20887 operands[2 * i + 1] = src[i];
20892 for (i = 0; i < count; i++)
20894 operands[2 * i] = dest[count - i - 1];
20895 operands[2 * i + 1] = src[count - i - 1];
20900 /* Expand an expression EXP that calls a built-in function,
20901 with result going to TARGET if that's convenient
20902 (and in mode MODE if that's convenient).
20903 SUBTARGET may be used as the target for computing one of EXP's operands.
20904 IGNORE is nonzero if the value is to be ignored. */
20907 arm_expand_builtin (tree exp,
20909 rtx subtarget ATTRIBUTE_UNUSED,
20910 enum machine_mode mode ATTRIBUTE_UNUSED,
20911 int ignore ATTRIBUTE_UNUSED)
20913 const struct builtin_description * d;
20914 enum insn_code icode;
20915 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20923 int fcode = DECL_FUNCTION_CODE (fndecl);
20925 enum machine_mode tmode;
20926 enum machine_mode mode0;
20927 enum machine_mode mode1;
20928 enum machine_mode mode2;
20930 if (fcode >= ARM_BUILTIN_NEON_BASE)
20931 return arm_expand_neon_builtin (fcode, exp, target);
20935 case ARM_BUILTIN_TEXTRMSB:
20936 case ARM_BUILTIN_TEXTRMUB:
20937 case ARM_BUILTIN_TEXTRMSH:
20938 case ARM_BUILTIN_TEXTRMUH:
20939 case ARM_BUILTIN_TEXTRMSW:
20940 case ARM_BUILTIN_TEXTRMUW:
20941 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20942 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20943 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20944 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20945 : CODE_FOR_iwmmxt_textrmw);
20947 arg0 = CALL_EXPR_ARG (exp, 0);
20948 arg1 = CALL_EXPR_ARG (exp, 1);
20949 op0 = expand_normal (arg0);
20950 op1 = expand_normal (arg1);
20951 tmode = insn_data[icode].operand[0].mode;
20952 mode0 = insn_data[icode].operand[1].mode;
20953 mode1 = insn_data[icode].operand[2].mode;
20955 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20956 op0 = copy_to_mode_reg (mode0, op0);
20957 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20959 /* @@@ better error message */
20960 error ("selector must be an immediate");
20961 return gen_reg_rtx (tmode);
20964 || GET_MODE (target) != tmode
20965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20966 target = gen_reg_rtx (tmode);
20967 pat = GEN_FCN (icode) (target, op0, op1);
20973 case ARM_BUILTIN_TINSRB:
20974 case ARM_BUILTIN_TINSRH:
20975 case ARM_BUILTIN_TINSRW:
20976 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20977 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20978 : CODE_FOR_iwmmxt_tinsrw);
20979 arg0 = CALL_EXPR_ARG (exp, 0);
20980 arg1 = CALL_EXPR_ARG (exp, 1);
20981 arg2 = CALL_EXPR_ARG (exp, 2);
20982 op0 = expand_normal (arg0);
20983 op1 = expand_normal (arg1);
20984 op2 = expand_normal (arg2);
20985 tmode = insn_data[icode].operand[0].mode;
20986 mode0 = insn_data[icode].operand[1].mode;
20987 mode1 = insn_data[icode].operand[2].mode;
20988 mode2 = insn_data[icode].operand[3].mode;
20990 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20991 op0 = copy_to_mode_reg (mode0, op0);
20992 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20993 op1 = copy_to_mode_reg (mode1, op1);
20994 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20996 /* @@@ better error message */
20997 error ("selector must be an immediate");
21001 || GET_MODE (target) != tmode
21002 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21003 target = gen_reg_rtx (tmode);
21004 pat = GEN_FCN (icode) (target, op0, op1, op2);
21010 case ARM_BUILTIN_SETWCX:
21011 arg0 = CALL_EXPR_ARG (exp, 0);
21012 arg1 = CALL_EXPR_ARG (exp, 1);
21013 op0 = force_reg (SImode, expand_normal (arg0));
21014 op1 = expand_normal (arg1);
21015 emit_insn (gen_iwmmxt_tmcr (op1, op0));
21018 case ARM_BUILTIN_GETWCX:
21019 arg0 = CALL_EXPR_ARG (exp, 0);
21020 op0 = expand_normal (arg0);
21021 target = gen_reg_rtx (SImode);
21022 emit_insn (gen_iwmmxt_tmrc (target, op0));
21025 case ARM_BUILTIN_WSHUFH:
21026 icode = CODE_FOR_iwmmxt_wshufh;
21027 arg0 = CALL_EXPR_ARG (exp, 0);
21028 arg1 = CALL_EXPR_ARG (exp, 1);
21029 op0 = expand_normal (arg0);
21030 op1 = expand_normal (arg1);
21031 tmode = insn_data[icode].operand[0].mode;
21032 mode1 = insn_data[icode].operand[1].mode;
21033 mode2 = insn_data[icode].operand[2].mode;
21035 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21036 op0 = copy_to_mode_reg (mode1, op0);
21037 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21039 /* @@@ better error message */
21040 error ("mask must be an immediate");
21044 || GET_MODE (target) != tmode
21045 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21046 target = gen_reg_rtx (tmode);
21047 pat = GEN_FCN (icode) (target, op0, op1);
21053 case ARM_BUILTIN_WSADB:
21054 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21055 case ARM_BUILTIN_WSADH:
21056 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21057 case ARM_BUILTIN_WSADBZ:
21058 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21059 case ARM_BUILTIN_WSADHZ:
21060 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21062 /* Several three-argument builtins. */
21063 case ARM_BUILTIN_WMACS:
21064 case ARM_BUILTIN_WMACU:
21065 case ARM_BUILTIN_WALIGN:
21066 case ARM_BUILTIN_TMIA:
21067 case ARM_BUILTIN_TMIAPH:
21068 case ARM_BUILTIN_TMIATT:
21069 case ARM_BUILTIN_TMIATB:
21070 case ARM_BUILTIN_TMIABT:
21071 case ARM_BUILTIN_TMIABB:
21072 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21073 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21074 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21075 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21076 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21077 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21078 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21079 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21080 : CODE_FOR_iwmmxt_walign);
21081 arg0 = CALL_EXPR_ARG (exp, 0);
21082 arg1 = CALL_EXPR_ARG (exp, 1);
21083 arg2 = CALL_EXPR_ARG (exp, 2);
21084 op0 = expand_normal (arg0);
21085 op1 = expand_normal (arg1);
21086 op2 = expand_normal (arg2);
21087 tmode = insn_data[icode].operand[0].mode;
21088 mode0 = insn_data[icode].operand[1].mode;
21089 mode1 = insn_data[icode].operand[2].mode;
21090 mode2 = insn_data[icode].operand[3].mode;
21092 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21093 op0 = copy_to_mode_reg (mode0, op0);
21094 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21095 op1 = copy_to_mode_reg (mode1, op1);
21096 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21097 op2 = copy_to_mode_reg (mode2, op2);
21099 || GET_MODE (target) != tmode
21100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21101 target = gen_reg_rtx (tmode);
21102 pat = GEN_FCN (icode) (target, op0, op1, op2);
21108 case ARM_BUILTIN_WZERO:
21109 target = gen_reg_rtx (DImode);
21110 emit_insn (gen_iwmmxt_clrdi (target));
21113 case ARM_BUILTIN_THREAD_POINTER:
21114 return arm_load_tp (target);
21120 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21121 if (d->code == (const enum arm_builtins) fcode)
21122 return arm_expand_binop_builtin (d->icode, exp, target);
21124 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21125 if (d->code == (const enum arm_builtins) fcode)
21126 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21128 /* @@@ Should really do something sensible here. */
21132 /* Return the number (counting from 0) of
21133 the least significant set bit in MASK. */
21136 number_of_first_bit_set (unsigned mask)
21138 return ctz_hwi (mask);
21141 /* Like emit_multi_reg_push, but allowing for a different set of
21142 registers to be described as saved. MASK is the set of registers
21143 to be saved; REAL_REGS is the set of registers to be described as
21144 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21147 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21149 unsigned long regno;
21150 rtx par[10], tmp, reg, insn;
21153 /* Build the parallel of the registers actually being stored. */
21154 for (i = 0; mask; ++i, mask &= mask - 1)
21156 regno = ctz_hwi (mask);
21157 reg = gen_rtx_REG (SImode, regno);
21160 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21162 tmp = gen_rtx_USE (VOIDmode, reg);
21167 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21168 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21169 tmp = gen_frame_mem (BLKmode, tmp);
21170 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21173 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21174 insn = emit_insn (tmp);
21176 /* Always build the stack adjustment note for unwind info. */
21177 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21178 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21181 /* Build the parallel of the registers recorded as saved for unwind. */
21182 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21184 regno = ctz_hwi (real_regs);
21185 reg = gen_rtx_REG (SImode, regno);
21187 tmp = plus_constant (stack_pointer_rtx, j * 4);
21188 tmp = gen_frame_mem (SImode, tmp);
21189 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21190 RTX_FRAME_RELATED_P (tmp) = 1;
21198 RTX_FRAME_RELATED_P (par[0]) = 1;
21199 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21202 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21207 /* Emit code to push or pop registers to or from the stack. F is the
21208 assembly file. MASK is the registers to pop. */
21210 thumb_pop (FILE *f, unsigned long mask)
21213 int lo_mask = mask & 0xFF;
21214 int pushed_words = 0;
21218 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21220 /* Special case. Do not generate a POP PC statement here, do it in
21222 thumb_exit (f, -1);
21226 fprintf (f, "\tpop\t{");
21228 /* Look at the low registers first. */
21229 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21233 asm_fprintf (f, "%r", regno);
21235 if ((lo_mask & ~1) != 0)
21242 if (mask & (1 << PC_REGNUM))
21244 /* Catch popping the PC. */
21245 if (TARGET_INTERWORK || TARGET_BACKTRACE
21246 || crtl->calls_eh_return)
21248 /* The PC is never poped directly, instead
21249 it is popped into r3 and then BX is used. */
21250 fprintf (f, "}\n");
21252 thumb_exit (f, -1);
21261 asm_fprintf (f, "%r", PC_REGNUM);
21265 fprintf (f, "}\n");
21268 /* Generate code to return from a thumb function.
21269 If 'reg_containing_return_addr' is -1, then the return address is
21270 actually on the stack, at the stack pointer. */
21272 thumb_exit (FILE *f, int reg_containing_return_addr)
21274 unsigned regs_available_for_popping;
21275 unsigned regs_to_pop;
21277 unsigned available;
21281 int restore_a4 = FALSE;
21283 /* Compute the registers we need to pop. */
21287 if (reg_containing_return_addr == -1)
21289 regs_to_pop |= 1 << LR_REGNUM;
21293 if (TARGET_BACKTRACE)
21295 /* Restore the (ARM) frame pointer and stack pointer. */
21296 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21300 /* If there is nothing to pop then just emit the BX instruction and
21302 if (pops_needed == 0)
21304 if (crtl->calls_eh_return)
21305 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21307 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21310 /* Otherwise if we are not supporting interworking and we have not created
21311 a backtrace structure and the function was not entered in ARM mode then
21312 just pop the return address straight into the PC. */
21313 else if (!TARGET_INTERWORK
21314 && !TARGET_BACKTRACE
21315 && !is_called_in_ARM_mode (current_function_decl)
21316 && !crtl->calls_eh_return)
21318 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21322 /* Find out how many of the (return) argument registers we can corrupt. */
21323 regs_available_for_popping = 0;
21325 /* If returning via __builtin_eh_return, the bottom three registers
21326 all contain information needed for the return. */
21327 if (crtl->calls_eh_return)
21331 /* If we can deduce the registers used from the function's
21332 return value. This is more reliable that examining
21333 df_regs_ever_live_p () because that will be set if the register is
21334 ever used in the function, not just if the register is used
21335 to hold a return value. */
21337 if (crtl->return_rtx != 0)
21338 mode = GET_MODE (crtl->return_rtx);
21340 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21342 size = GET_MODE_SIZE (mode);
21346 /* In a void function we can use any argument register.
21347 In a function that returns a structure on the stack
21348 we can use the second and third argument registers. */
21349 if (mode == VOIDmode)
21350 regs_available_for_popping =
21351 (1 << ARG_REGISTER (1))
21352 | (1 << ARG_REGISTER (2))
21353 | (1 << ARG_REGISTER (3));
21355 regs_available_for_popping =
21356 (1 << ARG_REGISTER (2))
21357 | (1 << ARG_REGISTER (3));
21359 else if (size <= 4)
21360 regs_available_for_popping =
21361 (1 << ARG_REGISTER (2))
21362 | (1 << ARG_REGISTER (3));
21363 else if (size <= 8)
21364 regs_available_for_popping =
21365 (1 << ARG_REGISTER (3));
21368 /* Match registers to be popped with registers into which we pop them. */
21369 for (available = regs_available_for_popping,
21370 required = regs_to_pop;
21371 required != 0 && available != 0;
21372 available &= ~(available & - available),
21373 required &= ~(required & - required))
21376 /* If we have any popping registers left over, remove them. */
21378 regs_available_for_popping &= ~available;
21380 /* Otherwise if we need another popping register we can use
21381 the fourth argument register. */
21382 else if (pops_needed)
21384 /* If we have not found any free argument registers and
21385 reg a4 contains the return address, we must move it. */
21386 if (regs_available_for_popping == 0
21387 && reg_containing_return_addr == LAST_ARG_REGNUM)
21389 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21390 reg_containing_return_addr = LR_REGNUM;
21392 else if (size > 12)
21394 /* Register a4 is being used to hold part of the return value,
21395 but we have dire need of a free, low register. */
21398 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21401 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21403 /* The fourth argument register is available. */
21404 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21410 /* Pop as many registers as we can. */
21411 thumb_pop (f, regs_available_for_popping);
21413 /* Process the registers we popped. */
21414 if (reg_containing_return_addr == -1)
21416 /* The return address was popped into the lowest numbered register. */
21417 regs_to_pop &= ~(1 << LR_REGNUM);
21419 reg_containing_return_addr =
21420 number_of_first_bit_set (regs_available_for_popping);
21422 /* Remove this register for the mask of available registers, so that
21423 the return address will not be corrupted by further pops. */
21424 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21427 /* If we popped other registers then handle them here. */
21428 if (regs_available_for_popping)
21432 /* Work out which register currently contains the frame pointer. */
21433 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21435 /* Move it into the correct place. */
21436 asm_fprintf (f, "\tmov\t%r, %r\n",
21437 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21439 /* (Temporarily) remove it from the mask of popped registers. */
21440 regs_available_for_popping &= ~(1 << frame_pointer);
21441 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21443 if (regs_available_for_popping)
21447 /* We popped the stack pointer as well,
21448 find the register that contains it. */
21449 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21451 /* Move it into the stack register. */
21452 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21454 /* At this point we have popped all necessary registers, so
21455 do not worry about restoring regs_available_for_popping
21456 to its correct value:
21458 assert (pops_needed == 0)
21459 assert (regs_available_for_popping == (1 << frame_pointer))
21460 assert (regs_to_pop == (1 << STACK_POINTER)) */
21464 /* Since we have just move the popped value into the frame
21465 pointer, the popping register is available for reuse, and
21466 we know that we still have the stack pointer left to pop. */
21467 regs_available_for_popping |= (1 << frame_pointer);
21471 /* If we still have registers left on the stack, but we no longer have
21472 any registers into which we can pop them, then we must move the return
21473 address into the link register and make available the register that
21475 if (regs_available_for_popping == 0 && pops_needed > 0)
21477 regs_available_for_popping |= 1 << reg_containing_return_addr;
21479 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21480 reg_containing_return_addr);
21482 reg_containing_return_addr = LR_REGNUM;
21485 /* If we have registers left on the stack then pop some more.
21486 We know that at most we will want to pop FP and SP. */
21487 if (pops_needed > 0)
21492 thumb_pop (f, regs_available_for_popping);
21494 /* We have popped either FP or SP.
21495 Move whichever one it is into the correct register. */
21496 popped_into = number_of_first_bit_set (regs_available_for_popping);
21497 move_to = number_of_first_bit_set (regs_to_pop);
21499 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21501 regs_to_pop &= ~(1 << move_to);
21506 /* If we still have not popped everything then we must have only
21507 had one register available to us and we are now popping the SP. */
21508 if (pops_needed > 0)
21512 thumb_pop (f, regs_available_for_popping);
21514 popped_into = number_of_first_bit_set (regs_available_for_popping);
21516 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21518 assert (regs_to_pop == (1 << STACK_POINTER))
21519 assert (pops_needed == 1)
21523 /* If necessary restore the a4 register. */
21526 if (reg_containing_return_addr != LR_REGNUM)
21528 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21529 reg_containing_return_addr = LR_REGNUM;
21532 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21535 if (crtl->calls_eh_return)
21536 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21538 /* Return to caller. */
21539 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21542 /* Scan INSN just before assembler is output for it.
21543 For Thumb-1, we track the status of the condition codes; this
21544 information is used in the cbranchsi4_insn pattern. */
21546 thumb1_final_prescan_insn (rtx insn)
21548 if (flag_print_asm_name)
21549 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21550 INSN_ADDRESSES (INSN_UID (insn)));
21551 /* Don't overwrite the previous setter when we get to a cbranch. */
21552 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21554 enum attr_conds conds;
21556 if (cfun->machine->thumb1_cc_insn)
21558 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21559 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21562 conds = get_attr_conds (insn);
21563 if (conds == CONDS_SET)
21565 rtx set = single_set (insn);
21566 cfun->machine->thumb1_cc_insn = insn;
21567 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21568 cfun->machine->thumb1_cc_op1 = const0_rtx;
21569 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21570 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21572 rtx src1 = XEXP (SET_SRC (set), 1);
21573 if (src1 == const0_rtx)
21574 cfun->machine->thumb1_cc_mode = CCmode;
21577 else if (conds != CONDS_NOCOND)
21578 cfun->machine->thumb1_cc_insn = NULL_RTX;
21583 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21585 unsigned HOST_WIDE_INT mask = 0xff;
21588 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21589 if (val == 0) /* XXX */
21592 for (i = 0; i < 25; i++)
21593 if ((val & (mask << i)) == val)
21599 /* Returns nonzero if the current function contains,
21600 or might contain a far jump. */
21602 thumb_far_jump_used_p (void)
21606 /* This test is only important for leaf functions. */
21607 /* assert (!leaf_function_p ()); */
21609 /* If we have already decided that far jumps may be used,
21610 do not bother checking again, and always return true even if
21611 it turns out that they are not being used. Once we have made
21612 the decision that far jumps are present (and that hence the link
21613 register will be pushed onto the stack) we cannot go back on it. */
21614 if (cfun->machine->far_jump_used)
21617 /* If this function is not being called from the prologue/epilogue
21618 generation code then it must be being called from the
21619 INITIAL_ELIMINATION_OFFSET macro. */
21620 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21622 /* In this case we know that we are being asked about the elimination
21623 of the arg pointer register. If that register is not being used,
21624 then there are no arguments on the stack, and we do not have to
21625 worry that a far jump might force the prologue to push the link
21626 register, changing the stack offsets. In this case we can just
21627 return false, since the presence of far jumps in the function will
21628 not affect stack offsets.
21630 If the arg pointer is live (or if it was live, but has now been
21631 eliminated and so set to dead) then we do have to test to see if
21632 the function might contain a far jump. This test can lead to some
21633 false negatives, since before reload is completed, then length of
21634 branch instructions is not known, so gcc defaults to returning their
21635 longest length, which in turn sets the far jump attribute to true.
21637 A false negative will not result in bad code being generated, but it
21638 will result in a needless push and pop of the link register. We
21639 hope that this does not occur too often.
21641 If we need doubleword stack alignment this could affect the other
21642 elimination offsets so we can't risk getting it wrong. */
21643 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21644 cfun->machine->arg_pointer_live = 1;
21645 else if (!cfun->machine->arg_pointer_live)
21649 /* Check to see if the function contains a branch
21650 insn with the far jump attribute set. */
21651 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21653 if (GET_CODE (insn) == JUMP_INSN
21654 /* Ignore tablejump patterns. */
21655 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21656 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21657 && get_attr_far_jump (insn) == FAR_JUMP_YES
21660 /* Record the fact that we have decided that
21661 the function does use far jumps. */
21662 cfun->machine->far_jump_used = 1;
21670 /* Return nonzero if FUNC must be entered in ARM mode. */
21672 is_called_in_ARM_mode (tree func)
21674 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21676 /* Ignore the problem about functions whose address is taken. */
21677 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21681 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21687 /* Given the stack offsets and register mask in OFFSETS, decide how
21688 many additional registers to push instead of subtracting a constant
21689 from SP. For epilogues the principle is the same except we use pop.
21690 FOR_PROLOGUE indicates which we're generating. */
21692 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21694 HOST_WIDE_INT amount;
21695 unsigned long live_regs_mask = offsets->saved_regs_mask;
21696 /* Extract a mask of the ones we can give to the Thumb's push/pop
21698 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21699 /* Then count how many other high registers will need to be pushed. */
21700 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21701 int n_free, reg_base;
21703 if (!for_prologue && frame_pointer_needed)
21704 amount = offsets->locals_base - offsets->saved_regs;
21706 amount = offsets->outgoing_args - offsets->saved_regs;
21708 /* If the stack frame size is 512 exactly, we can save one load
21709 instruction, which should make this a win even when optimizing
21711 if (!optimize_size && amount != 512)
21714 /* Can't do this if there are high registers to push. */
21715 if (high_regs_pushed != 0)
21718 /* Shouldn't do it in the prologue if no registers would normally
21719 be pushed at all. In the epilogue, also allow it if we'll have
21720 a pop insn for the PC. */
21723 || TARGET_BACKTRACE
21724 || (live_regs_mask & 1 << LR_REGNUM) == 0
21725 || TARGET_INTERWORK
21726 || crtl->args.pretend_args_size != 0))
21729 /* Don't do this if thumb_expand_prologue wants to emit instructions
21730 between the push and the stack frame allocation. */
21732 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21733 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21740 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21741 live_regs_mask >>= reg_base;
21744 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21745 && (for_prologue || call_used_regs[reg_base + n_free]))
21747 live_regs_mask >>= 1;
21753 gcc_assert (amount / 4 * 4 == amount);
21755 if (amount >= 512 && (amount - n_free * 4) < 512)
21756 return (amount - 508) / 4;
21757 if (amount <= n_free * 4)
21762 /* The bits which aren't usefully expanded as rtl. */
21764 thumb_unexpanded_epilogue (void)
21766 arm_stack_offsets *offsets;
21768 unsigned long live_regs_mask = 0;
21769 int high_regs_pushed = 0;
21771 int had_to_push_lr;
21774 if (cfun->machine->return_used_this_function != 0)
21777 if (IS_NAKED (arm_current_func_type ()))
21780 offsets = arm_get_frame_offsets ();
21781 live_regs_mask = offsets->saved_regs_mask;
21782 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21784 /* If we can deduce the registers used from the function's return value.
21785 This is more reliable that examining df_regs_ever_live_p () because that
21786 will be set if the register is ever used in the function, not just if
21787 the register is used to hold a return value. */
21788 size = arm_size_return_regs ();
21790 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21793 unsigned long extra_mask = (1 << extra_pop) - 1;
21794 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21798 /* The prolog may have pushed some high registers to use as
21799 work registers. e.g. the testsuite file:
21800 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21801 compiles to produce:
21802 push {r4, r5, r6, r7, lr}
21806 as part of the prolog. We have to undo that pushing here. */
21808 if (high_regs_pushed)
21810 unsigned long mask = live_regs_mask & 0xff;
21813 /* The available low registers depend on the size of the value we are
21821 /* Oh dear! We have no low registers into which we can pop
21824 ("no low registers available for popping high registers");
21826 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21827 if (live_regs_mask & (1 << next_hi_reg))
21830 while (high_regs_pushed)
21832 /* Find lo register(s) into which the high register(s) can
21834 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21836 if (mask & (1 << regno))
21837 high_regs_pushed--;
21838 if (high_regs_pushed == 0)
21842 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21844 /* Pop the values into the low register(s). */
21845 thumb_pop (asm_out_file, mask);
21847 /* Move the value(s) into the high registers. */
21848 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21850 if (mask & (1 << regno))
21852 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21855 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21856 if (live_regs_mask & (1 << next_hi_reg))
21861 live_regs_mask &= ~0x0f00;
21864 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21865 live_regs_mask &= 0xff;
21867 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21869 /* Pop the return address into the PC. */
21870 if (had_to_push_lr)
21871 live_regs_mask |= 1 << PC_REGNUM;
21873 /* Either no argument registers were pushed or a backtrace
21874 structure was created which includes an adjusted stack
21875 pointer, so just pop everything. */
21876 if (live_regs_mask)
21877 thumb_pop (asm_out_file, live_regs_mask);
21879 /* We have either just popped the return address into the
21880 PC or it is was kept in LR for the entire function.
21881 Note that thumb_pop has already called thumb_exit if the
21882 PC was in the list. */
21883 if (!had_to_push_lr)
21884 thumb_exit (asm_out_file, LR_REGNUM);
21888 /* Pop everything but the return address. */
21889 if (live_regs_mask)
21890 thumb_pop (asm_out_file, live_regs_mask);
21892 if (had_to_push_lr)
21896 /* We have no free low regs, so save one. */
21897 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21901 /* Get the return address into a temporary register. */
21902 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21906 /* Move the return address to lr. */
21907 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21909 /* Restore the low register. */
21910 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21915 regno = LAST_ARG_REGNUM;
21920 /* Remove the argument registers that were pushed onto the stack. */
21921 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21922 SP_REGNUM, SP_REGNUM,
21923 crtl->args.pretend_args_size);
21925 thumb_exit (asm_out_file, regno);
21931 /* Functions to save and restore machine-specific function data. */
21932 static struct machine_function *
21933 arm_init_machine_status (void)
21935 struct machine_function *machine;
21936 machine = ggc_alloc_cleared_machine_function ();
21938 #if ARM_FT_UNKNOWN != 0
21939 machine->func_type = ARM_FT_UNKNOWN;
21944 /* Return an RTX indicating where the return address to the
21945 calling function can be found. */
21947 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21952 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21955 /* Do anything needed before RTL is emitted for each function. */
21957 arm_init_expanders (void)
21959 /* Arrange to initialize and mark the machine per-function status. */
21960 init_machine_status = arm_init_machine_status;
21962 /* This is to stop the combine pass optimizing away the alignment
21963 adjustment of va_arg. */
21964 /* ??? It is claimed that this should not be necessary. */
21966 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21970 /* Like arm_compute_initial_elimination offset. Simpler because there
21971 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21972 to point at the base of the local variables after static stack
21973 space for a function has been allocated. */
21976 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21978 arm_stack_offsets *offsets;
21980 offsets = arm_get_frame_offsets ();
21984 case ARG_POINTER_REGNUM:
21987 case STACK_POINTER_REGNUM:
21988 return offsets->outgoing_args - offsets->saved_args;
21990 case FRAME_POINTER_REGNUM:
21991 return offsets->soft_frame - offsets->saved_args;
21993 case ARM_HARD_FRAME_POINTER_REGNUM:
21994 return offsets->saved_regs - offsets->saved_args;
21996 case THUMB_HARD_FRAME_POINTER_REGNUM:
21997 return offsets->locals_base - offsets->saved_args;
22000 gcc_unreachable ();
22004 case FRAME_POINTER_REGNUM:
22007 case STACK_POINTER_REGNUM:
22008 return offsets->outgoing_args - offsets->soft_frame;
22010 case ARM_HARD_FRAME_POINTER_REGNUM:
22011 return offsets->saved_regs - offsets->soft_frame;
22013 case THUMB_HARD_FRAME_POINTER_REGNUM:
22014 return offsets->locals_base - offsets->soft_frame;
22017 gcc_unreachable ();
22022 gcc_unreachable ();
22026 /* Generate the function's prologue. */
22029 thumb1_expand_prologue (void)
22033 HOST_WIDE_INT amount;
22034 arm_stack_offsets *offsets;
22035 unsigned long func_type;
22037 unsigned long live_regs_mask;
22038 unsigned long l_mask;
22039 unsigned high_regs_pushed = 0;
22041 func_type = arm_current_func_type ();
22043 /* Naked functions don't have prologues. */
22044 if (IS_NAKED (func_type))
22047 if (IS_INTERRUPT (func_type))
22049 error ("interrupt Service Routines cannot be coded in Thumb mode");
22053 if (is_called_in_ARM_mode (current_function_decl))
22054 emit_insn (gen_prologue_thumb1_interwork ());
22056 offsets = arm_get_frame_offsets ();
22057 live_regs_mask = offsets->saved_regs_mask;
22059 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22060 l_mask = live_regs_mask & 0x40ff;
22061 /* Then count how many other high registers will need to be pushed. */
22062 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22064 if (crtl->args.pretend_args_size)
22066 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22068 if (cfun->machine->uses_anonymous_args)
22070 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22071 unsigned long mask;
22073 mask = 1ul << (LAST_ARG_REGNUM + 1);
22074 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22076 insn = thumb1_emit_multi_reg_push (mask, 0);
22080 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22081 stack_pointer_rtx, x));
22083 RTX_FRAME_RELATED_P (insn) = 1;
22086 if (TARGET_BACKTRACE)
22088 HOST_WIDE_INT offset = 0;
22089 unsigned work_register;
22090 rtx work_reg, x, arm_hfp_rtx;
22092 /* We have been asked to create a stack backtrace structure.
22093 The code looks like this:
22097 0 sub SP, #16 Reserve space for 4 registers.
22098 2 push {R7} Push low registers.
22099 4 add R7, SP, #20 Get the stack pointer before the push.
22100 6 str R7, [SP, #8] Store the stack pointer
22101 (before reserving the space).
22102 8 mov R7, PC Get hold of the start of this code + 12.
22103 10 str R7, [SP, #16] Store it.
22104 12 mov R7, FP Get hold of the current frame pointer.
22105 14 str R7, [SP, #4] Store it.
22106 16 mov R7, LR Get hold of the current return address.
22107 18 str R7, [SP, #12] Store it.
22108 20 add R7, SP, #16 Point at the start of the
22109 backtrace structure.
22110 22 mov FP, R7 Put this value into the frame pointer. */
22112 work_register = thumb_find_work_register (live_regs_mask);
22113 work_reg = gen_rtx_REG (SImode, work_register);
22114 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22116 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22117 stack_pointer_rtx, GEN_INT (-16)));
22118 RTX_FRAME_RELATED_P (insn) = 1;
22122 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22123 RTX_FRAME_RELATED_P (insn) = 1;
22125 offset = bit_count (l_mask) * UNITS_PER_WORD;
22128 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22129 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22131 x = plus_constant (stack_pointer_rtx, offset + 4);
22132 x = gen_frame_mem (SImode, x);
22133 emit_move_insn (x, work_reg);
22135 /* Make sure that the instruction fetching the PC is in the right place
22136 to calculate "start of backtrace creation code + 12". */
22137 /* ??? The stores using the common WORK_REG ought to be enough to
22138 prevent the scheduler from doing anything weird. Failing that
22139 we could always move all of the following into an UNSPEC_VOLATILE. */
22142 x = gen_rtx_REG (SImode, PC_REGNUM);
22143 emit_move_insn (work_reg, x);
22145 x = plus_constant (stack_pointer_rtx, offset + 12);
22146 x = gen_frame_mem (SImode, x);
22147 emit_move_insn (x, work_reg);
22149 emit_move_insn (work_reg, arm_hfp_rtx);
22151 x = plus_constant (stack_pointer_rtx, offset);
22152 x = gen_frame_mem (SImode, x);
22153 emit_move_insn (x, work_reg);
22157 emit_move_insn (work_reg, arm_hfp_rtx);
22159 x = plus_constant (stack_pointer_rtx, offset);
22160 x = gen_frame_mem (SImode, x);
22161 emit_move_insn (x, work_reg);
22163 x = gen_rtx_REG (SImode, PC_REGNUM);
22164 emit_move_insn (work_reg, x);
22166 x = plus_constant (stack_pointer_rtx, offset + 12);
22167 x = gen_frame_mem (SImode, x);
22168 emit_move_insn (x, work_reg);
22171 x = gen_rtx_REG (SImode, LR_REGNUM);
22172 emit_move_insn (work_reg, x);
22174 x = plus_constant (stack_pointer_rtx, offset + 8);
22175 x = gen_frame_mem (SImode, x);
22176 emit_move_insn (x, work_reg);
22178 x = GEN_INT (offset + 12);
22179 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22181 emit_move_insn (arm_hfp_rtx, work_reg);
22183 /* Optimization: If we are not pushing any low registers but we are going
22184 to push some high registers then delay our first push. This will just
22185 be a push of LR and we can combine it with the push of the first high
22187 else if ((l_mask & 0xff) != 0
22188 || (high_regs_pushed == 0 && l_mask))
22190 unsigned long mask = l_mask;
22191 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22192 insn = thumb1_emit_multi_reg_push (mask, mask);
22193 RTX_FRAME_RELATED_P (insn) = 1;
22196 if (high_regs_pushed)
22198 unsigned pushable_regs;
22199 unsigned next_hi_reg;
22201 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22202 if (live_regs_mask & (1 << next_hi_reg))
22205 pushable_regs = l_mask & 0xff;
22207 if (pushable_regs == 0)
22208 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22210 while (high_regs_pushed > 0)
22212 unsigned long real_regs_mask = 0;
22214 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22216 if (pushable_regs & (1 << regno))
22218 emit_move_insn (gen_rtx_REG (SImode, regno),
22219 gen_rtx_REG (SImode, next_hi_reg));
22221 high_regs_pushed --;
22222 real_regs_mask |= (1 << next_hi_reg);
22224 if (high_regs_pushed)
22226 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22228 if (live_regs_mask & (1 << next_hi_reg))
22233 pushable_regs &= ~((1 << regno) - 1);
22239 /* If we had to find a work register and we have not yet
22240 saved the LR then add it to the list of regs to push. */
22241 if (l_mask == (1 << LR_REGNUM))
22243 pushable_regs |= l_mask;
22244 real_regs_mask |= l_mask;
22248 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22249 RTX_FRAME_RELATED_P (insn) = 1;
22253 /* Load the pic register before setting the frame pointer,
22254 so we can use r7 as a temporary work register. */
22255 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22256 arm_load_pic_register (live_regs_mask);
22258 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22259 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22260 stack_pointer_rtx);
22262 if (flag_stack_usage_info)
22263 current_function_static_stack_size
22264 = offsets->outgoing_args - offsets->saved_args;
22266 amount = offsets->outgoing_args - offsets->saved_regs;
22267 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22272 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22273 GEN_INT (- amount)));
22274 RTX_FRAME_RELATED_P (insn) = 1;
22280 /* The stack decrement is too big for an immediate value in a single
22281 insn. In theory we could issue multiple subtracts, but after
22282 three of them it becomes more space efficient to place the full
22283 value in the constant pool and load into a register. (Also the
22284 ARM debugger really likes to see only one stack decrement per
22285 function). So instead we look for a scratch register into which
22286 we can load the decrement, and then we subtract this from the
22287 stack pointer. Unfortunately on the thumb the only available
22288 scratch registers are the argument registers, and we cannot use
22289 these as they may hold arguments to the function. Instead we
22290 attempt to locate a call preserved register which is used by this
22291 function. If we can find one, then we know that it will have
22292 been pushed at the start of the prologue and so we can corrupt
22294 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22295 if (live_regs_mask & (1 << regno))
22298 gcc_assert(regno <= LAST_LO_REGNUM);
22300 reg = gen_rtx_REG (SImode, regno);
22302 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22304 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22305 stack_pointer_rtx, reg));
22307 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22308 plus_constant (stack_pointer_rtx,
22310 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22311 RTX_FRAME_RELATED_P (insn) = 1;
22315 if (frame_pointer_needed)
22316 thumb_set_frame_pointer (offsets);
22318 /* If we are profiling, make sure no instructions are scheduled before
22319 the call to mcount. Similarly if the user has requested no
22320 scheduling in the prolog. Similarly if we want non-call exceptions
22321 using the EABI unwinder, to prevent faulting instructions from being
22322 swapped with a stack adjustment. */
22323 if (crtl->profile || !TARGET_SCHED_PROLOG
22324 || (arm_except_unwind_info (&global_options) == UI_TARGET
22325 && cfun->can_throw_non_call_exceptions))
22326 emit_insn (gen_blockage ());
22328 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22329 if (live_regs_mask & 0xff)
22330 cfun->machine->lr_save_eliminated = 0;
22335 thumb1_expand_epilogue (void)
22337 HOST_WIDE_INT amount;
22338 arm_stack_offsets *offsets;
22341 /* Naked functions don't have prologues. */
22342 if (IS_NAKED (arm_current_func_type ()))
22345 offsets = arm_get_frame_offsets ();
22346 amount = offsets->outgoing_args - offsets->saved_regs;
22348 if (frame_pointer_needed)
22350 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22351 amount = offsets->locals_base - offsets->saved_regs;
22353 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22355 gcc_assert (amount >= 0);
22358 emit_insn (gen_blockage ());
22361 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22362 GEN_INT (amount)));
22365 /* r3 is always free in the epilogue. */
22366 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22368 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22369 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22373 /* Emit a USE (stack_pointer_rtx), so that
22374 the stack adjustment will not be deleted. */
22375 emit_insn (gen_prologue_use (stack_pointer_rtx));
22377 if (crtl->profile || !TARGET_SCHED_PROLOG)
22378 emit_insn (gen_blockage ());
22380 /* Emit a clobber for each insn that will be restored in the epilogue,
22381 so that flow2 will get register lifetimes correct. */
22382 for (regno = 0; regno < 13; regno++)
22383 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22384 emit_clobber (gen_rtx_REG (SImode, regno));
22386 if (! df_regs_ever_live_p (LR_REGNUM))
22387 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22390 /* Implementation of insn prologue_thumb1_interwork. This is the first
22391 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22394 thumb1_output_interwork (void)
22397 FILE *f = asm_out_file;
22399 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22400 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22402 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22404 /* Generate code sequence to switch us into Thumb mode. */
22405 /* The .code 32 directive has already been emitted by
22406 ASM_DECLARE_FUNCTION_NAME. */
22407 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22408 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22410 /* Generate a label, so that the debugger will notice the
22411 change in instruction sets. This label is also used by
22412 the assembler to bypass the ARM code when this function
22413 is called from a Thumb encoded function elsewhere in the
22414 same file. Hence the definition of STUB_NAME here must
22415 agree with the definition in gas/config/tc-arm.c. */
22417 #define STUB_NAME ".real_start_of"
22419 fprintf (f, "\t.code\t16\n");
22421 if (arm_dllexport_name_p (name))
22422 name = arm_strip_name_encoding (name);
22424 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22425 fprintf (f, "\t.thumb_func\n");
22426 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22431 /* Handle the case of a double word load into a low register from
22432 a computed memory address. The computed address may involve a
22433 register which is overwritten by the load. */
22435 thumb_load_double_from_address (rtx *operands)
22443 gcc_assert (GET_CODE (operands[0]) == REG);
22444 gcc_assert (GET_CODE (operands[1]) == MEM);
22446 /* Get the memory address. */
22447 addr = XEXP (operands[1], 0);
22449 /* Work out how the memory address is computed. */
22450 switch (GET_CODE (addr))
22453 operands[2] = adjust_address (operands[1], SImode, 4);
22455 if (REGNO (operands[0]) == REGNO (addr))
22457 output_asm_insn ("ldr\t%H0, %2", operands);
22458 output_asm_insn ("ldr\t%0, %1", operands);
22462 output_asm_insn ("ldr\t%0, %1", operands);
22463 output_asm_insn ("ldr\t%H0, %2", operands);
22468 /* Compute <address> + 4 for the high order load. */
22469 operands[2] = adjust_address (operands[1], SImode, 4);
22471 output_asm_insn ("ldr\t%0, %1", operands);
22472 output_asm_insn ("ldr\t%H0, %2", operands);
22476 arg1 = XEXP (addr, 0);
22477 arg2 = XEXP (addr, 1);
22479 if (CONSTANT_P (arg1))
22480 base = arg2, offset = arg1;
22482 base = arg1, offset = arg2;
22484 gcc_assert (GET_CODE (base) == REG);
22486 /* Catch the case of <address> = <reg> + <reg> */
22487 if (GET_CODE (offset) == REG)
22489 int reg_offset = REGNO (offset);
22490 int reg_base = REGNO (base);
22491 int reg_dest = REGNO (operands[0]);
22493 /* Add the base and offset registers together into the
22494 higher destination register. */
22495 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22496 reg_dest + 1, reg_base, reg_offset);
22498 /* Load the lower destination register from the address in
22499 the higher destination register. */
22500 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22501 reg_dest, reg_dest + 1);
22503 /* Load the higher destination register from its own address
22505 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22506 reg_dest + 1, reg_dest + 1);
22510 /* Compute <address> + 4 for the high order load. */
22511 operands[2] = adjust_address (operands[1], SImode, 4);
22513 /* If the computed address is held in the low order register
22514 then load the high order register first, otherwise always
22515 load the low order register first. */
22516 if (REGNO (operands[0]) == REGNO (base))
22518 output_asm_insn ("ldr\t%H0, %2", operands);
22519 output_asm_insn ("ldr\t%0, %1", operands);
22523 output_asm_insn ("ldr\t%0, %1", operands);
22524 output_asm_insn ("ldr\t%H0, %2", operands);
22530 /* With no registers to worry about we can just load the value
22532 operands[2] = adjust_address (operands[1], SImode, 4);
22534 output_asm_insn ("ldr\t%H0, %2", operands);
22535 output_asm_insn ("ldr\t%0, %1", operands);
22539 gcc_unreachable ();
22546 thumb_output_move_mem_multiple (int n, rtx *operands)
22553 if (REGNO (operands[4]) > REGNO (operands[5]))
22556 operands[4] = operands[5];
22559 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22560 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22564 if (REGNO (operands[4]) > REGNO (operands[5]))
22567 operands[4] = operands[5];
22570 if (REGNO (operands[5]) > REGNO (operands[6]))
22573 operands[5] = operands[6];
22576 if (REGNO (operands[4]) > REGNO (operands[5]))
22579 operands[4] = operands[5];
22583 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22584 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22588 gcc_unreachable ();
22594 /* Output a call-via instruction for thumb state. */
22596 thumb_call_via_reg (rtx reg)
22598 int regno = REGNO (reg);
22601 gcc_assert (regno < LR_REGNUM);
22603 /* If we are in the normal text section we can use a single instance
22604 per compilation unit. If we are doing function sections, then we need
22605 an entry per section, since we can't rely on reachability. */
22606 if (in_section == text_section)
22608 thumb_call_reg_needed = 1;
22610 if (thumb_call_via_label[regno] == NULL)
22611 thumb_call_via_label[regno] = gen_label_rtx ();
22612 labelp = thumb_call_via_label + regno;
22616 if (cfun->machine->call_via[regno] == NULL)
22617 cfun->machine->call_via[regno] = gen_label_rtx ();
22618 labelp = cfun->machine->call_via + regno;
22621 output_asm_insn ("bl\t%a0", labelp);
22625 /* Routines for generating rtl. */
22627 thumb_expand_movmemqi (rtx *operands)
22629 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22630 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22631 HOST_WIDE_INT len = INTVAL (operands[2]);
22632 HOST_WIDE_INT offset = 0;
22636 emit_insn (gen_movmem12b (out, in, out, in));
22642 emit_insn (gen_movmem8b (out, in, out, in));
22648 rtx reg = gen_reg_rtx (SImode);
22649 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22650 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22657 rtx reg = gen_reg_rtx (HImode);
22658 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22659 plus_constant (in, offset))));
22660 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22668 rtx reg = gen_reg_rtx (QImode);
22669 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22670 plus_constant (in, offset))));
22671 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22677 thumb_reload_out_hi (rtx *operands)
22679 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22682 /* Handle reading a half-word from memory during reload. */
22684 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22686 gcc_unreachable ();
22689 /* Return the length of a function name prefix
22690 that starts with the character 'c'. */
22692 arm_get_strip_length (int c)
22696 ARM_NAME_ENCODING_LENGTHS
22701 /* Return a pointer to a function's name with any
22702 and all prefix encodings stripped from it. */
22704 arm_strip_name_encoding (const char *name)
22708 while ((skip = arm_get_strip_length (* name)))
22714 /* If there is a '*' anywhere in the name's prefix, then
22715 emit the stripped name verbatim, otherwise prepend an
22716 underscore if leading underscores are being used. */
22718 arm_asm_output_labelref (FILE *stream, const char *name)
22723 while ((skip = arm_get_strip_length (* name)))
22725 verbatim |= (*name == '*');
22730 fputs (name, stream);
22732 asm_fprintf (stream, "%U%s", name);
22736 arm_file_start (void)
22740 if (TARGET_UNIFIED_ASM)
22741 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22745 const char *fpu_name;
22746 if (arm_selected_arch)
22747 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22748 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22749 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22751 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22753 if (TARGET_SOFT_FLOAT)
22756 fpu_name = "softvfp";
22758 fpu_name = "softfpa";
22762 fpu_name = arm_fpu_desc->name;
22763 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22765 if (TARGET_HARD_FLOAT)
22766 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22767 if (TARGET_HARD_FLOAT_ABI)
22768 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22771 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22773 /* Some of these attributes only apply when the corresponding features
22774 are used. However we don't have any easy way of figuring this out.
22775 Conservatively record the setting that would have been used. */
22777 if (flag_rounding_math)
22778 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22780 if (!flag_unsafe_math_optimizations)
22782 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22783 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22785 if (flag_signaling_nans)
22786 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22788 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22789 flag_finite_math_only ? 1 : 3);
22791 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22792 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22793 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22795 /* Tag_ABI_optimization_goals. */
22798 else if (optimize >= 2)
22804 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22806 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22808 if (arm_fp16_format)
22809 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22811 if (arm_lang_output_object_attributes_hook)
22812 arm_lang_output_object_attributes_hook();
22815 default_file_start ();
22819 arm_file_end (void)
22823 if (NEED_INDICATE_EXEC_STACK)
22824 /* Add .note.GNU-stack. */
22825 file_end_indicate_exec_stack ();
22827 if (! thumb_call_reg_needed)
22830 switch_to_section (text_section);
22831 asm_fprintf (asm_out_file, "\t.code 16\n");
22832 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22834 for (regno = 0; regno < LR_REGNUM; regno++)
22836 rtx label = thumb_call_via_label[regno];
22840 targetm.asm_out.internal_label (asm_out_file, "L",
22841 CODE_LABEL_NUMBER (label));
22842 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22848 /* Symbols in the text segment can be accessed without indirecting via the
22849 constant pool; it may take an extra binary operation, but this is still
22850 faster than indirecting via memory. Don't do this when not optimizing,
22851 since we won't be calculating al of the offsets necessary to do this
22855 arm_encode_section_info (tree decl, rtx rtl, int first)
22857 if (optimize > 0 && TREE_CONSTANT (decl))
22858 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22860 default_encode_section_info (decl, rtl, first);
22862 #endif /* !ARM_PE */
22865 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22867 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22868 && !strcmp (prefix, "L"))
22870 arm_ccfsm_state = 0;
22871 arm_target_insn = NULL;
22873 default_internal_label (stream, prefix, labelno);
22876 /* Output code to add DELTA to the first argument, and then jump
22877 to FUNCTION. Used for C++ multiple inheritance. */
22879 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22880 HOST_WIDE_INT delta,
22881 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22884 static int thunk_label = 0;
22887 int mi_delta = delta;
22888 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22890 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22893 mi_delta = - mi_delta;
22897 int labelno = thunk_label++;
22898 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22899 /* Thunks are entered in arm mode when avaiable. */
22900 if (TARGET_THUMB1_ONLY)
22902 /* push r3 so we can use it as a temporary. */
22903 /* TODO: Omit this save if r3 is not used. */
22904 fputs ("\tpush {r3}\n", file);
22905 fputs ("\tldr\tr3, ", file);
22909 fputs ("\tldr\tr12, ", file);
22911 assemble_name (file, label);
22912 fputc ('\n', file);
22915 /* If we are generating PIC, the ldr instruction below loads
22916 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22917 the address of the add + 8, so we have:
22919 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22922 Note that we have "+ 1" because some versions of GNU ld
22923 don't set the low bit of the result for R_ARM_REL32
22924 relocations against thumb function symbols.
22925 On ARMv6M this is +4, not +8. */
22926 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22927 assemble_name (file, labelpc);
22928 fputs (":\n", file);
22929 if (TARGET_THUMB1_ONLY)
22931 /* This is 2 insns after the start of the thunk, so we know it
22932 is 4-byte aligned. */
22933 fputs ("\tadd\tr3, pc, r3\n", file);
22934 fputs ("\tmov r12, r3\n", file);
22937 fputs ("\tadd\tr12, pc, r12\n", file);
22939 else if (TARGET_THUMB1_ONLY)
22940 fputs ("\tmov r12, r3\n", file);
22942 if (TARGET_THUMB1_ONLY)
22944 if (mi_delta > 255)
22946 fputs ("\tldr\tr3, ", file);
22947 assemble_name (file, label);
22948 fputs ("+4\n", file);
22949 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22950 mi_op, this_regno, this_regno);
22952 else if (mi_delta != 0)
22954 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22955 mi_op, this_regno, this_regno,
22961 /* TODO: Use movw/movt for large constants when available. */
22962 while (mi_delta != 0)
22964 if ((mi_delta & (3 << shift)) == 0)
22968 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22969 mi_op, this_regno, this_regno,
22970 mi_delta & (0xff << shift));
22971 mi_delta &= ~(0xff << shift);
22978 if (TARGET_THUMB1_ONLY)
22979 fputs ("\tpop\t{r3}\n", file);
22981 fprintf (file, "\tbx\tr12\n");
22982 ASM_OUTPUT_ALIGN (file, 2);
22983 assemble_name (file, label);
22984 fputs (":\n", file);
22987 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22988 rtx tem = XEXP (DECL_RTL (function), 0);
22989 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22990 tem = gen_rtx_MINUS (GET_MODE (tem),
22992 gen_rtx_SYMBOL_REF (Pmode,
22993 ggc_strdup (labelpc)));
22994 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22997 /* Output ".word .LTHUNKn". */
22998 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23000 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23001 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23005 fputs ("\tb\t", file);
23006 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23007 if (NEED_PLT_RELOC)
23008 fputs ("(PLT)", file);
23009 fputc ('\n', file);
23014 arm_emit_vector_const (FILE *file, rtx x)
23017 const char * pattern;
23019 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23021 switch (GET_MODE (x))
23023 case V2SImode: pattern = "%08x"; break;
23024 case V4HImode: pattern = "%04x"; break;
23025 case V8QImode: pattern = "%02x"; break;
23026 default: gcc_unreachable ();
23029 fprintf (file, "0x");
23030 for (i = CONST_VECTOR_NUNITS (x); i--;)
23034 element = CONST_VECTOR_ELT (x, i);
23035 fprintf (file, pattern, INTVAL (element));
23041 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23042 HFmode constant pool entries are actually loaded with ldr. */
23044 arm_emit_fp16_const (rtx c)
23049 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23050 bits = real_to_target (NULL, &r, HFmode);
23051 if (WORDS_BIG_ENDIAN)
23052 assemble_zeros (2);
23053 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23054 if (!WORDS_BIG_ENDIAN)
23055 assemble_zeros (2);
23059 arm_output_load_gr (rtx *operands)
23066 if (GET_CODE (operands [1]) != MEM
23067 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23068 || GET_CODE (reg = XEXP (sum, 0)) != REG
23069 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23070 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23071 return "wldrw%?\t%0, %1";
23073 /* Fix up an out-of-range load of a GR register. */
23074 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23075 wcgr = operands[0];
23077 output_asm_insn ("ldr%?\t%0, %1", operands);
23079 operands[0] = wcgr;
23081 output_asm_insn ("tmcr%?\t%0, %1", operands);
23082 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23087 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23089 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23090 named arg and all anonymous args onto the stack.
23091 XXX I know the prologue shouldn't be pushing registers, but it is faster
23095 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23096 enum machine_mode mode,
23099 int second_time ATTRIBUTE_UNUSED)
23101 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23104 cfun->machine->uses_anonymous_args = 1;
23105 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23107 nregs = pcum->aapcs_ncrn;
23108 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23112 nregs = pcum->nregs;
23114 if (nregs < NUM_ARG_REGS)
23115 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23118 /* Return nonzero if the CONSUMER instruction (a store) does not need
23119 PRODUCER's value to calculate the address. */
23122 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23124 rtx value = PATTERN (producer);
23125 rtx addr = PATTERN (consumer);
23127 if (GET_CODE (value) == COND_EXEC)
23128 value = COND_EXEC_CODE (value);
23129 if (GET_CODE (value) == PARALLEL)
23130 value = XVECEXP (value, 0, 0);
23131 value = XEXP (value, 0);
23132 if (GET_CODE (addr) == COND_EXEC)
23133 addr = COND_EXEC_CODE (addr);
23134 if (GET_CODE (addr) == PARALLEL)
23135 addr = XVECEXP (addr, 0, 0);
23136 addr = XEXP (addr, 0);
23138 return !reg_overlap_mentioned_p (value, addr);
23141 /* Return nonzero if the CONSUMER instruction (a store) does need
23142 PRODUCER's value to calculate the address. */
23145 arm_early_store_addr_dep (rtx producer, rtx consumer)
23147 return !arm_no_early_store_addr_dep (producer, consumer);
23150 /* Return nonzero if the CONSUMER instruction (a load) does need
23151 PRODUCER's value to calculate the address. */
23154 arm_early_load_addr_dep (rtx producer, rtx consumer)
23156 rtx value = PATTERN (producer);
23157 rtx addr = PATTERN (consumer);
23159 if (GET_CODE (value) == COND_EXEC)
23160 value = COND_EXEC_CODE (value);
23161 if (GET_CODE (value) == PARALLEL)
23162 value = XVECEXP (value, 0, 0);
23163 value = XEXP (value, 0);
23164 if (GET_CODE (addr) == COND_EXEC)
23165 addr = COND_EXEC_CODE (addr);
23166 if (GET_CODE (addr) == PARALLEL)
23167 addr = XVECEXP (addr, 0, 0);
23168 addr = XEXP (addr, 1);
23170 return reg_overlap_mentioned_p (value, addr);
23173 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23174 have an early register shift value or amount dependency on the
23175 result of PRODUCER. */
23178 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23180 rtx value = PATTERN (producer);
23181 rtx op = PATTERN (consumer);
23184 if (GET_CODE (value) == COND_EXEC)
23185 value = COND_EXEC_CODE (value);
23186 if (GET_CODE (value) == PARALLEL)
23187 value = XVECEXP (value, 0, 0);
23188 value = XEXP (value, 0);
23189 if (GET_CODE (op) == COND_EXEC)
23190 op = COND_EXEC_CODE (op);
23191 if (GET_CODE (op) == PARALLEL)
23192 op = XVECEXP (op, 0, 0);
23195 early_op = XEXP (op, 0);
23196 /* This is either an actual independent shift, or a shift applied to
23197 the first operand of another operation. We want the whole shift
23199 if (GET_CODE (early_op) == REG)
23202 return !reg_overlap_mentioned_p (value, early_op);
23205 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23206 have an early register shift value dependency on the result of
23210 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23212 rtx value = PATTERN (producer);
23213 rtx op = PATTERN (consumer);
23216 if (GET_CODE (value) == COND_EXEC)
23217 value = COND_EXEC_CODE (value);
23218 if (GET_CODE (value) == PARALLEL)
23219 value = XVECEXP (value, 0, 0);
23220 value = XEXP (value, 0);
23221 if (GET_CODE (op) == COND_EXEC)
23222 op = COND_EXEC_CODE (op);
23223 if (GET_CODE (op) == PARALLEL)
23224 op = XVECEXP (op, 0, 0);
23227 early_op = XEXP (op, 0);
23229 /* This is either an actual independent shift, or a shift applied to
23230 the first operand of another operation. We want the value being
23231 shifted, in either case. */
23232 if (GET_CODE (early_op) != REG)
23233 early_op = XEXP (early_op, 0);
23235 return !reg_overlap_mentioned_p (value, early_op);
23238 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23239 have an early register mult dependency on the result of
23243 arm_no_early_mul_dep (rtx producer, rtx consumer)
23245 rtx value = PATTERN (producer);
23246 rtx op = PATTERN (consumer);
23248 if (GET_CODE (value) == COND_EXEC)
23249 value = COND_EXEC_CODE (value);
23250 if (GET_CODE (value) == PARALLEL)
23251 value = XVECEXP (value, 0, 0);
23252 value = XEXP (value, 0);
23253 if (GET_CODE (op) == COND_EXEC)
23254 op = COND_EXEC_CODE (op);
23255 if (GET_CODE (op) == PARALLEL)
23256 op = XVECEXP (op, 0, 0);
23259 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23261 if (GET_CODE (XEXP (op, 0)) == MULT)
23262 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23264 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23270 /* We can't rely on the caller doing the proper promotion when
23271 using APCS or ATPCS. */
23274 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23276 return !TARGET_AAPCS_BASED;
23279 static enum machine_mode
23280 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23281 enum machine_mode mode,
23282 int *punsignedp ATTRIBUTE_UNUSED,
23283 const_tree fntype ATTRIBUTE_UNUSED,
23284 int for_return ATTRIBUTE_UNUSED)
23286 if (GET_MODE_CLASS (mode) == MODE_INT
23287 && GET_MODE_SIZE (mode) < 4)
23293 /* AAPCS based ABIs use short enums by default. */
23296 arm_default_short_enums (void)
23298 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23302 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23305 arm_align_anon_bitfield (void)
23307 return TARGET_AAPCS_BASED;
23311 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23314 arm_cxx_guard_type (void)
23316 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23319 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23320 has an accumulator dependency on the result of the producer (a
23321 multiplication instruction) and no other dependency on that result. */
23323 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23325 rtx mul = PATTERN (producer);
23326 rtx mac = PATTERN (consumer);
23328 rtx mac_op0, mac_op1, mac_acc;
23330 if (GET_CODE (mul) == COND_EXEC)
23331 mul = COND_EXEC_CODE (mul);
23332 if (GET_CODE (mac) == COND_EXEC)
23333 mac = COND_EXEC_CODE (mac);
23335 /* Check that mul is of the form (set (...) (mult ...))
23336 and mla is of the form (set (...) (plus (mult ...) (...))). */
23337 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23338 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23339 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23342 mul_result = XEXP (mul, 0);
23343 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23344 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23345 mac_acc = XEXP (XEXP (mac, 1), 1);
23347 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23348 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23349 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23353 /* The EABI says test the least significant bit of a guard variable. */
23356 arm_cxx_guard_mask_bit (void)
23358 return TARGET_AAPCS_BASED;
23362 /* The EABI specifies that all array cookies are 8 bytes long. */
23365 arm_get_cookie_size (tree type)
23369 if (!TARGET_AAPCS_BASED)
23370 return default_cxx_get_cookie_size (type);
23372 size = build_int_cst (sizetype, 8);
23377 /* The EABI says that array cookies should also contain the element size. */
23380 arm_cookie_has_size (void)
23382 return TARGET_AAPCS_BASED;
23386 /* The EABI says constructors and destructors should return a pointer to
23387 the object constructed/destroyed. */
23390 arm_cxx_cdtor_returns_this (void)
23392 return TARGET_AAPCS_BASED;
23395 /* The EABI says that an inline function may never be the key
23399 arm_cxx_key_method_may_be_inline (void)
23401 return !TARGET_AAPCS_BASED;
23405 arm_cxx_determine_class_data_visibility (tree decl)
23407 if (!TARGET_AAPCS_BASED
23408 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23411 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23412 is exported. However, on systems without dynamic vague linkage,
23413 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23414 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23415 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23417 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23418 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23422 arm_cxx_class_data_always_comdat (void)
23424 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23425 vague linkage if the class has no key function. */
23426 return !TARGET_AAPCS_BASED;
23430 /* The EABI says __aeabi_atexit should be used to register static
23434 arm_cxx_use_aeabi_atexit (void)
23436 return TARGET_AAPCS_BASED;
23441 arm_set_return_address (rtx source, rtx scratch)
23443 arm_stack_offsets *offsets;
23444 HOST_WIDE_INT delta;
23446 unsigned long saved_regs;
23448 offsets = arm_get_frame_offsets ();
23449 saved_regs = offsets->saved_regs_mask;
23451 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23452 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23455 if (frame_pointer_needed)
23456 addr = plus_constant(hard_frame_pointer_rtx, -4);
23459 /* LR will be the first saved register. */
23460 delta = offsets->outgoing_args - (offsets->frame + 4);
23465 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23466 GEN_INT (delta & ~4095)));
23471 addr = stack_pointer_rtx;
23473 addr = plus_constant (addr, delta);
23475 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23481 thumb_set_return_address (rtx source, rtx scratch)
23483 arm_stack_offsets *offsets;
23484 HOST_WIDE_INT delta;
23485 HOST_WIDE_INT limit;
23488 unsigned long mask;
23492 offsets = arm_get_frame_offsets ();
23493 mask = offsets->saved_regs_mask;
23494 if (mask & (1 << LR_REGNUM))
23497 /* Find the saved regs. */
23498 if (frame_pointer_needed)
23500 delta = offsets->soft_frame - offsets->saved_args;
23501 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23507 delta = offsets->outgoing_args - offsets->saved_args;
23510 /* Allow for the stack frame. */
23511 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23513 /* The link register is always the first saved register. */
23516 /* Construct the address. */
23517 addr = gen_rtx_REG (SImode, reg);
23520 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23521 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23525 addr = plus_constant (addr, delta);
23527 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23530 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23533 /* Implements target hook vector_mode_supported_p. */
23535 arm_vector_mode_supported_p (enum machine_mode mode)
23537 /* Neon also supports V2SImode, etc. listed in the clause below. */
23538 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23539 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23542 if ((TARGET_NEON || TARGET_IWMMXT)
23543 && ((mode == V2SImode)
23544 || (mode == V4HImode)
23545 || (mode == V8QImode)))
23548 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23549 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23550 || mode == V2HAmode))
23556 /* Implements target hook array_mode_supported_p. */
23559 arm_array_mode_supported_p (enum machine_mode mode,
23560 unsigned HOST_WIDE_INT nelems)
23563 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23564 && (nelems >= 2 && nelems <= 4))
23570 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23571 registers when autovectorizing for Neon, at least until multiple vector
23572 widths are supported properly by the middle-end. */
23574 static enum machine_mode
23575 arm_preferred_simd_mode (enum machine_mode mode)
23581 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23583 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23585 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23587 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23589 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23596 if (TARGET_REALLY_IWMMXT)
23612 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23614 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23615 using r0-r4 for function arguments, r7 for the stack frame and don't have
23616 enough left over to do doubleword arithmetic. For Thumb-2 all the
23617 potentially problematic instructions accept high registers so this is not
23618 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23619 that require many low registers. */
23621 arm_class_likely_spilled_p (reg_class_t rclass)
23623 if ((TARGET_THUMB1 && rclass == LO_REGS)
23624 || rclass == CC_REG)
23630 /* Implements target hook small_register_classes_for_mode_p. */
23632 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23634 return TARGET_THUMB1;
23637 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23638 ARM insns and therefore guarantee that the shift count is modulo 256.
23639 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23640 guarantee no particular behavior for out-of-range counts. */
23642 static unsigned HOST_WIDE_INT
23643 arm_shift_truncation_mask (enum machine_mode mode)
23645 return mode == SImode ? 255 : 0;
23649 /* Map internal gcc register numbers to DWARF2 register numbers. */
23652 arm_dbx_register_number (unsigned int regno)
23657 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23658 compatibility. The EABI defines them as registers 96-103. */
23659 if (IS_FPA_REGNUM (regno))
23660 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23662 if (IS_VFP_REGNUM (regno))
23664 /* See comment in arm_dwarf_register_span. */
23665 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23666 return 64 + regno - FIRST_VFP_REGNUM;
23668 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23671 if (IS_IWMMXT_GR_REGNUM (regno))
23672 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23674 if (IS_IWMMXT_REGNUM (regno))
23675 return 112 + regno - FIRST_IWMMXT_REGNUM;
23677 gcc_unreachable ();
23680 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23681 GCC models tham as 64 32-bit registers, so we need to describe this to
23682 the DWARF generation code. Other registers can use the default. */
23684 arm_dwarf_register_span (rtx rtl)
23691 regno = REGNO (rtl);
23692 if (!IS_VFP_REGNUM (regno))
23695 /* XXX FIXME: The EABI defines two VFP register ranges:
23696 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23698 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23699 corresponding D register. Until GDB supports this, we shall use the
23700 legacy encodings. We also use these encodings for D0-D15 for
23701 compatibility with older debuggers. */
23702 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23705 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23706 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23707 regno = (regno - FIRST_VFP_REGNUM) / 2;
23708 for (i = 0; i < nregs; i++)
23709 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23714 #if ARM_UNWIND_INFO
23715 /* Emit unwind directives for a store-multiple instruction or stack pointer
23716 push during alignment.
23717 These should only ever be generated by the function prologue code, so
23718 expect them to have a particular form. */
23721 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23724 HOST_WIDE_INT offset;
23725 HOST_WIDE_INT nregs;
23731 e = XVECEXP (p, 0, 0);
23732 if (GET_CODE (e) != SET)
23735 /* First insn will adjust the stack pointer. */
23736 if (GET_CODE (e) != SET
23737 || GET_CODE (XEXP (e, 0)) != REG
23738 || REGNO (XEXP (e, 0)) != SP_REGNUM
23739 || GET_CODE (XEXP (e, 1)) != PLUS)
23742 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23743 nregs = XVECLEN (p, 0) - 1;
23745 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23748 /* The function prologue may also push pc, but not annotate it as it is
23749 never restored. We turn this into a stack pointer adjustment. */
23750 if (nregs * 4 == offset - 4)
23752 fprintf (asm_out_file, "\t.pad #4\n");
23756 fprintf (asm_out_file, "\t.save {");
23758 else if (IS_VFP_REGNUM (reg))
23761 fprintf (asm_out_file, "\t.vsave {");
23763 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23765 /* FPA registers are done differently. */
23766 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23770 /* Unknown register type. */
23773 /* If the stack increment doesn't match the size of the saved registers,
23774 something has gone horribly wrong. */
23775 if (offset != nregs * reg_size)
23780 /* The remaining insns will describe the stores. */
23781 for (i = 1; i <= nregs; i++)
23783 /* Expect (set (mem <addr>) (reg)).
23784 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23785 e = XVECEXP (p, 0, i);
23786 if (GET_CODE (e) != SET
23787 || GET_CODE (XEXP (e, 0)) != MEM
23788 || GET_CODE (XEXP (e, 1)) != REG)
23791 reg = REGNO (XEXP (e, 1));
23796 fprintf (asm_out_file, ", ");
23797 /* We can't use %r for vfp because we need to use the
23798 double precision register names. */
23799 if (IS_VFP_REGNUM (reg))
23800 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23802 asm_fprintf (asm_out_file, "%r", reg);
23804 #ifdef ENABLE_CHECKING
23805 /* Check that the addresses are consecutive. */
23806 e = XEXP (XEXP (e, 0), 0);
23807 if (GET_CODE (e) == PLUS)
23809 offset += reg_size;
23810 if (GET_CODE (XEXP (e, 0)) != REG
23811 || REGNO (XEXP (e, 0)) != SP_REGNUM
23812 || GET_CODE (XEXP (e, 1)) != CONST_INT
23813 || offset != INTVAL (XEXP (e, 1)))
23817 || GET_CODE (e) != REG
23818 || REGNO (e) != SP_REGNUM)
23822 fprintf (asm_out_file, "}\n");
23825 /* Emit unwind directives for a SET. */
23828 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23836 switch (GET_CODE (e0))
23839 /* Pushing a single register. */
23840 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23841 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23842 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23845 asm_fprintf (asm_out_file, "\t.save ");
23846 if (IS_VFP_REGNUM (REGNO (e1)))
23847 asm_fprintf(asm_out_file, "{d%d}\n",
23848 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23850 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23854 if (REGNO (e0) == SP_REGNUM)
23856 /* A stack increment. */
23857 if (GET_CODE (e1) != PLUS
23858 || GET_CODE (XEXP (e1, 0)) != REG
23859 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23860 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23863 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23864 -INTVAL (XEXP (e1, 1)));
23866 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23868 HOST_WIDE_INT offset;
23870 if (GET_CODE (e1) == PLUS)
23872 if (GET_CODE (XEXP (e1, 0)) != REG
23873 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23875 reg = REGNO (XEXP (e1, 0));
23876 offset = INTVAL (XEXP (e1, 1));
23877 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23878 HARD_FRAME_POINTER_REGNUM, reg,
23881 else if (GET_CODE (e1) == REG)
23884 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23885 HARD_FRAME_POINTER_REGNUM, reg);
23890 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23892 /* Move from sp to reg. */
23893 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23895 else if (GET_CODE (e1) == PLUS
23896 && GET_CODE (XEXP (e1, 0)) == REG
23897 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23898 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23900 /* Set reg to offset from sp. */
23901 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23902 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23914 /* Emit unwind directives for the given insn. */
23917 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23920 bool handled_one = false;
23922 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23925 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23926 && (TREE_NOTHROW (current_function_decl)
23927 || crtl->all_throwers_are_sibcalls))
23930 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23933 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23935 pat = XEXP (note, 0);
23936 switch (REG_NOTE_KIND (note))
23938 case REG_FRAME_RELATED_EXPR:
23941 case REG_CFA_REGISTER:
23944 pat = PATTERN (insn);
23945 if (GET_CODE (pat) == PARALLEL)
23946 pat = XVECEXP (pat, 0, 0);
23949 /* Only emitted for IS_STACKALIGN re-alignment. */
23954 src = SET_SRC (pat);
23955 dest = SET_DEST (pat);
23957 gcc_assert (src == stack_pointer_rtx);
23958 reg = REGNO (dest);
23959 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23962 handled_one = true;
23965 case REG_CFA_DEF_CFA:
23966 case REG_CFA_EXPRESSION:
23967 case REG_CFA_ADJUST_CFA:
23968 case REG_CFA_OFFSET:
23969 /* ??? Only handling here what we actually emit. */
23970 gcc_unreachable ();
23978 pat = PATTERN (insn);
23981 switch (GET_CODE (pat))
23984 arm_unwind_emit_set (asm_out_file, pat);
23988 /* Store multiple. */
23989 arm_unwind_emit_sequence (asm_out_file, pat);
23998 /* Output a reference from a function exception table to the type_info
23999 object X. The EABI specifies that the symbol should be relocated by
24000 an R_ARM_TARGET2 relocation. */
24003 arm_output_ttype (rtx x)
24005 fputs ("\t.word\t", asm_out_file);
24006 output_addr_const (asm_out_file, x);
24007 /* Use special relocations for symbol references. */
24008 if (GET_CODE (x) != CONST_INT)
24009 fputs ("(TARGET2)", asm_out_file);
24010 fputc ('\n', asm_out_file);
24015 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24018 arm_asm_emit_except_personality (rtx personality)
24020 fputs ("\t.personality\t", asm_out_file);
24021 output_addr_const (asm_out_file, personality);
24022 fputc ('\n', asm_out_file);
24025 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24028 arm_asm_init_sections (void)
24030 exception_section = get_unnamed_section (0, output_section_asm_op,
24033 #endif /* ARM_UNWIND_INFO */
24035 /* Output unwind directives for the start/end of a function. */
24038 arm_output_fn_unwind (FILE * f, bool prologue)
24040 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24044 fputs ("\t.fnstart\n", f);
24047 /* If this function will never be unwound, then mark it as such.
24048 The came condition is used in arm_unwind_emit to suppress
24049 the frame annotations. */
24050 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24051 && (TREE_NOTHROW (current_function_decl)
24052 || crtl->all_throwers_are_sibcalls))
24053 fputs("\t.cantunwind\n", f);
24055 fputs ("\t.fnend\n", f);
24060 arm_emit_tls_decoration (FILE *fp, rtx x)
24062 enum tls_reloc reloc;
24065 val = XVECEXP (x, 0, 0);
24066 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24068 output_addr_const (fp, val);
24073 fputs ("(tlsgd)", fp);
24076 fputs ("(tlsldm)", fp);
24079 fputs ("(tlsldo)", fp);
24082 fputs ("(gottpoff)", fp);
24085 fputs ("(tpoff)", fp);
24088 fputs ("(tlsdesc)", fp);
24091 gcc_unreachable ();
24100 fputs (" + (. - ", fp);
24101 output_addr_const (fp, XVECEXP (x, 0, 2));
24102 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24103 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24104 output_addr_const (fp, XVECEXP (x, 0, 3));
24114 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24117 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24119 gcc_assert (size == 4);
24120 fputs ("\t.word\t", file);
24121 output_addr_const (file, x);
24122 fputs ("(tlsldo)", file);
24125 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24128 arm_output_addr_const_extra (FILE *fp, rtx x)
24130 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24131 return arm_emit_tls_decoration (fp, x);
24132 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24135 int labelno = INTVAL (XVECEXP (x, 0, 0));
24137 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24138 assemble_name_raw (fp, label);
24142 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24144 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24148 output_addr_const (fp, XVECEXP (x, 0, 0));
24152 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24154 output_addr_const (fp, XVECEXP (x, 0, 0));
24158 output_addr_const (fp, XVECEXP (x, 0, 1));
24162 else if (GET_CODE (x) == CONST_VECTOR)
24163 return arm_emit_vector_const (fp, x);
24168 /* Output assembly for a shift instruction.
24169 SET_FLAGS determines how the instruction modifies the condition codes.
24170 0 - Do not set condition codes.
24171 1 - Set condition codes.
24172 2 - Use smallest instruction. */
24174 arm_output_shift(rtx * operands, int set_flags)
24177 static const char flag_chars[3] = {'?', '.', '!'};
24182 c = flag_chars[set_flags];
24183 if (TARGET_UNIFIED_ASM)
24185 shift = shift_op(operands[3], &val);
24189 operands[2] = GEN_INT(val);
24190 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24193 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24196 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24197 output_asm_insn (pattern, operands);
24201 /* Output a Thumb-1 casesi dispatch sequence. */
24203 thumb1_output_casesi (rtx *operands)
24205 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24207 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24209 switch (GET_MODE(diff_vec))
24212 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24213 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24215 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24216 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24218 return "bl\t%___gnu_thumb1_case_si";
24220 gcc_unreachable ();
24224 /* Output a Thumb-2 casesi instruction. */
24226 thumb2_output_casesi (rtx *operands)
24228 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24230 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24232 output_asm_insn ("cmp\t%0, %1", operands);
24233 output_asm_insn ("bhi\t%l3", operands);
24234 switch (GET_MODE(diff_vec))
24237 return "tbb\t[%|pc, %0]";
24239 return "tbh\t[%|pc, %0, lsl #1]";
24243 output_asm_insn ("adr\t%4, %l2", operands);
24244 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24245 output_asm_insn ("add\t%4, %4, %5", operands);
24250 output_asm_insn ("adr\t%4, %l2", operands);
24251 return "ldr\t%|pc, [%4, %0, lsl #2]";
24254 gcc_unreachable ();
24258 /* Most ARM cores are single issue, but some newer ones can dual issue.
24259 The scheduler descriptions rely on this being correct. */
24261 arm_issue_rate (void)
24283 /* A table and a function to perform ARM-specific name mangling for
24284 NEON vector types in order to conform to the AAPCS (see "Procedure
24285 Call Standard for the ARM Architecture", Appendix A). To qualify
24286 for emission with the mangled names defined in that document, a
24287 vector type must not only be of the correct mode but also be
24288 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24291 enum machine_mode mode;
24292 const char *element_type_name;
24293 const char *aapcs_name;
24294 } arm_mangle_map_entry;
24296 static arm_mangle_map_entry arm_mangle_map[] = {
24297 /* 64-bit containerized types. */
24298 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24299 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24300 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24301 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24302 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24303 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24304 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24305 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24306 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24307 /* 128-bit containerized types. */
24308 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24309 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24310 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24311 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24312 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24313 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24314 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24315 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24316 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24317 { VOIDmode, NULL, NULL }
24321 arm_mangle_type (const_tree type)
24323 arm_mangle_map_entry *pos = arm_mangle_map;
24325 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24326 has to be managled as if it is in the "std" namespace. */
24327 if (TARGET_AAPCS_BASED
24328 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24330 static bool warned;
24331 if (!warned && warn_psabi && !in_system_header)
24334 inform (input_location,
24335 "the mangling of %<va_list%> has changed in GCC 4.4");
24337 return "St9__va_list";
24340 /* Half-precision float. */
24341 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24344 if (TREE_CODE (type) != VECTOR_TYPE)
24347 /* Check the mode of the vector type, and the name of the vector
24348 element type, against the table. */
24349 while (pos->mode != VOIDmode)
24351 tree elt_type = TREE_TYPE (type);
24353 if (pos->mode == TYPE_MODE (type)
24354 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24355 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24356 pos->element_type_name))
24357 return pos->aapcs_name;
24362 /* Use the default mangling for unrecognized (possibly user-defined)
24367 /* Order of allocation of core registers for Thumb: this allocation is
24368 written over the corresponding initial entries of the array
24369 initialized with REG_ALLOC_ORDER. We allocate all low registers
24370 first. Saving and restoring a low register is usually cheaper than
24371 using a call-clobbered high register. */
24373 static const int thumb_core_reg_alloc_order[] =
24375 3, 2, 1, 0, 4, 5, 6, 7,
24376 14, 12, 8, 9, 10, 11, 13, 15
24379 /* Adjust register allocation order when compiling for Thumb. */
24382 arm_order_regs_for_local_alloc (void)
24384 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24385 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24387 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24388 sizeof (thumb_core_reg_alloc_order));
24391 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24394 arm_frame_pointer_required (void)
24396 return (cfun->has_nonlocal_label
24397 || SUBTARGET_FRAME_POINTER_REQUIRED
24398 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24401 /* Only thumb1 can't support conditional execution, so return true if
24402 the target is not thumb1. */
24404 arm_have_conditional_execution (void)
24406 return !TARGET_THUMB1;
24409 /* Legitimize a memory reference for sync primitive implemented using
24410 ldrex / strex. We currently force the form of the reference to be
24411 indirect without offset. We do not yet support the indirect offset
24412 addressing supported by some ARM targets for these
24415 arm_legitimize_sync_memory (rtx memory)
24417 rtx addr = force_reg (Pmode, XEXP (memory, 0));
24418 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
24420 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
24421 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
24422 return legitimate_memory;
24425 /* An instruction emitter. */
24426 typedef void (* emit_f) (int label, const char *, rtx *);
24428 /* An instruction emitter that emits via the conventional
24429 output_asm_insn. */
24431 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
24433 output_asm_insn (pattern, operands);
24436 /* Count the number of emitted synchronization instructions. */
24437 static unsigned arm_insn_count;
24439 /* An emitter that counts emitted instructions but does not actually
24440 emit instruction into the instruction stream. */
24442 arm_count (int label,
24443 const char *pattern ATTRIBUTE_UNUSED,
24444 rtx *operands ATTRIBUTE_UNUSED)
24450 /* Construct a pattern using conventional output formatting and feed
24451 it to output_asm_insn. Provides a mechanism to construct the
24452 output pattern on the fly. Note the hard limit on the pattern
24454 static void ATTRIBUTE_PRINTF_4
24455 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
24456 const char *pattern, ...)
24461 va_start (ap, pattern);
24462 vsprintf (buffer, pattern, ap);
24464 emit (label, buffer, operands);
24467 /* Emit the memory barrier instruction, if any, provided by this
24468 target to a specified emitter. */
24470 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
24472 if (TARGET_HAVE_DMB)
24474 /* Note we issue a system level barrier. We should consider
24475 issuing a inner shareabilty zone barrier here instead, ie.
24477 emit (0, "dmb\tsy", operands);
24481 if (TARGET_HAVE_DMB_MCR)
24483 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
24487 gcc_unreachable ();
24490 /* Emit the memory barrier instruction, if any, provided by this
24493 arm_output_memory_barrier (rtx *operands)
24495 arm_process_output_memory_barrier (arm_emit, operands);
24499 /* Helper to figure out the instruction suffix required on ldrex/strex
24500 for operations on an object of the specified mode. */
24501 static const char *
24502 arm_ldrex_suffix (enum machine_mode mode)
24506 case QImode: return "b";
24507 case HImode: return "h";
24508 case SImode: return "";
24509 case DImode: return "d";
24511 gcc_unreachable ();
24516 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
24519 arm_output_ldrex (emit_f emit,
24520 enum machine_mode mode,
24526 operands[0] = target;
24527 if (mode != DImode)
24529 const char *suffix = arm_ldrex_suffix (mode);
24530 operands[1] = memory;
24531 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
24535 /* The restrictions on target registers in ARM mode are that the two
24536 registers are consecutive and the first one is even; Thumb is
24537 actually more flexible, but DI should give us this anyway.
24538 Note that the 1st register always gets the lowest word in memory. */
24539 gcc_assert ((REGNO (target) & 1) == 0);
24540 operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
24541 operands[2] = memory;
24542 arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
24546 /* Emit a strex{b,h,d, } instruction appropriate for the specified
24549 arm_output_strex (emit_f emit,
24550 enum machine_mode mode,
24558 operands[0] = result;
24559 operands[1] = value;
24560 if (mode != DImode)
24562 const char *suffix = arm_ldrex_suffix (mode);
24563 operands[2] = memory;
24564 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
24569 /* The restrictions on target registers in ARM mode are that the two
24570 registers are consecutive and the first one is even; Thumb is
24571 actually more flexible, but DI should give us this anyway.
24572 Note that the 1st register always gets the lowest word in memory. */
24573 gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
24574 operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
24575 operands[3] = memory;
24576 arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
24581 /* Helper to emit an it instruction in Thumb2 mode only; although the assembler
24582 will ignore it in ARM mode, emitting it will mess up instruction counts we
24583 sometimes keep 'flags' are the extra t's and e's if it's more than one
24584 instruction that is conditional. */
24586 arm_output_it (emit_f emit, const char *flags, const char *cond)
24588 rtx operands[1]; /* Don't actually use the operand. */
24590 arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
24593 /* Helper to emit a two operand instruction. */
24595 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
24601 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
24604 /* Helper to emit a three operand instruction. */
24606 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
24613 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
24616 /* Emit a load store exclusive synchronization loop.
24620 if old_value != required_value
24622 t1 = sync_op (old_value, new_value)
24623 [mem] = t1, t2 = [0|1]
24627 t1 == t2 is not permitted
24628 t1 == old_value is permitted
24632 RTX register representing the required old_value for
24633 the modify to continue, if NULL no comparsion is performed. */
24635 arm_output_sync_loop (emit_f emit,
24636 enum machine_mode mode,
24639 rtx required_value,
24643 enum attr_sync_op sync_op,
24644 int early_barrier_required)
24647 /* We'll use the lo for the normal rtx in the none-DI case
24648 as well as the least-sig word in the DI case. */
24649 rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
24650 rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
24652 bool is_di = mode == DImode;
24654 gcc_assert (t1 != t2);
24656 if (early_barrier_required)
24657 arm_process_output_memory_barrier (emit, NULL);
24659 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
24661 arm_output_ldrex (emit, mode, old_value, memory);
24665 old_value_lo = gen_lowpart (SImode, old_value);
24666 old_value_hi = gen_highpart (SImode, old_value);
24667 if (required_value)
24669 required_value_lo = gen_lowpart (SImode, required_value);
24670 required_value_hi = gen_highpart (SImode, required_value);
24674 /* Silence false potentially unused warning. */
24675 required_value_lo = NULL_RTX;
24676 required_value_hi = NULL_RTX;
24678 new_value_lo = gen_lowpart (SImode, new_value);
24679 new_value_hi = gen_highpart (SImode, new_value);
24680 t1_lo = gen_lowpart (SImode, t1);
24681 t1_hi = gen_highpart (SImode, t1);
24685 old_value_lo = old_value;
24686 new_value_lo = new_value;
24687 required_value_lo = required_value;
24690 /* Silence false potentially unused warning. */
24692 new_value_hi = NULL_RTX;
24693 required_value_hi = NULL_RTX;
24694 old_value_hi = NULL_RTX;
24697 if (required_value)
24699 operands[0] = old_value_lo;
24700 operands[1] = required_value_lo;
24702 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
24705 arm_output_it (emit, "", "eq");
24706 arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
24708 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
24714 arm_output_op3 (emit, is_di ? "adds" : "add",
24715 t1_lo, old_value_lo, new_value_lo);
24717 arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
24721 arm_output_op3 (emit, is_di ? "subs" : "sub",
24722 t1_lo, old_value_lo, new_value_lo);
24724 arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
24728 arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
24730 arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
24734 arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
24736 arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
24740 arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
24742 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24746 arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
24748 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24749 arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
24751 arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
24756 t1_lo = new_value_lo;
24758 t1_hi = new_value_hi;
24762 /* Note that the result of strex is a 0/1 flag that's always 1 register. */
24765 arm_output_strex (emit, mode, "", t2, t1, memory);
24767 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24768 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24769 LOCAL_LABEL_PREFIX);
24773 /* Use old_value for the return value because for some operations
24774 the old_value can easily be restored. This saves one register. */
24775 arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
24776 operands[0] = old_value_lo;
24777 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24778 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24779 LOCAL_LABEL_PREFIX);
24781 /* Note that we only used the _lo half of old_value as a temporary
24782 so in DI we don't have to restore the _hi part. */
24786 arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
24790 arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
24794 arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
24798 arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
24802 gcc_unreachable ();
24806 /* Note: label is before barrier so that in cmp failure case we still get
24807 a barrier to stop subsequent loads floating upwards past the ldrex
24808 PR target/48126. */
24809 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
24810 arm_process_output_memory_barrier (emit, NULL);
24814 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
24817 default_value = operands[index - 1];
24819 return default_value;
24822 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
24823 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
24825 /* Extract the operands for a synchroniztion instruction from the
24826 instructions attributes and emit the instruction. */
24828 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
24830 rtx result, memory, required_value, new_value, t1, t2;
24832 enum machine_mode mode;
24833 enum attr_sync_op sync_op;
24835 result = FETCH_SYNC_OPERAND(result, 0);
24836 memory = FETCH_SYNC_OPERAND(memory, 0);
24837 required_value = FETCH_SYNC_OPERAND(required_value, 0);
24838 new_value = FETCH_SYNC_OPERAND(new_value, 0);
24839 t1 = FETCH_SYNC_OPERAND(t1, 0);
24840 t2 = FETCH_SYNC_OPERAND(t2, 0);
24842 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
24843 sync_op = get_attr_sync_op (insn);
24844 mode = GET_MODE (memory);
24846 arm_output_sync_loop (emit, mode, result, memory, required_value,
24847 new_value, t1, t2, sync_op, early_barrier);
24850 /* Emit a synchronization instruction loop. */
24852 arm_output_sync_insn (rtx insn, rtx *operands)
24854 arm_process_output_sync_insn (arm_emit, insn, operands);
24858 /* Count the number of machine instruction that will be emitted for a
24859 synchronization instruction. Note that the emitter used does not
24860 emit instructions, it just counts instructions being carefull not
24861 to count labels. */
24863 arm_sync_loop_insns (rtx insn, rtx *operands)
24865 arm_insn_count = 0;
24866 arm_process_output_sync_insn (arm_count, insn, operands);
24867 return arm_insn_count;
24870 /* Helper to call a target sync instruction generator, dealing with
24871 the variation in operands required by the different generators. */
24873 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
24874 rtx memory, rtx required_value, rtx new_value)
24876 switch (generator->op)
24878 case arm_sync_generator_omn:
24879 gcc_assert (! required_value);
24880 return generator->u.omn (old_value, memory, new_value);
24882 case arm_sync_generator_omrn:
24883 gcc_assert (required_value);
24884 return generator->u.omrn (old_value, memory, required_value, new_value);
24890 /* Expand a synchronization loop. The synchronization loop is expanded
24891 as an opaque block of instructions in order to ensure that we do
24892 not subsequently get extraneous memory accesses inserted within the
24893 critical region. The exclusive access property of ldrex/strex is
24894 only guaranteed in there are no intervening memory accesses. */
24896 arm_expand_sync (enum machine_mode mode,
24897 struct arm_sync_generator *generator,
24898 rtx target, rtx memory, rtx required_value, rtx new_value)
24900 if (target == NULL)
24901 target = gen_reg_rtx (mode);
24903 memory = arm_legitimize_sync_memory (memory);
24904 if (mode != SImode && mode != DImode)
24906 rtx load_temp = gen_reg_rtx (SImode);
24908 if (required_value)
24909 required_value = convert_modes (SImode, mode, required_value, true);
24911 new_value = convert_modes (SImode, mode, new_value, true);
24912 emit_insn (arm_call_generator (generator, load_temp, memory,
24913 required_value, new_value));
24914 emit_move_insn (target, gen_lowpart (mode, load_temp));
24918 emit_insn (arm_call_generator (generator, target, memory, required_value,
24923 static unsigned int
24924 arm_autovectorize_vector_sizes (void)
24926 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24930 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24932 /* Vectors which aren't in packed structures will not be less aligned than
24933 the natural alignment of their element type, so this is safe. */
24934 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24937 return default_builtin_vector_alignment_reachable (type, is_packed);
24941 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24942 const_tree type, int misalignment,
24945 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24947 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24952 /* If the misalignment is unknown, we should be able to handle the access
24953 so long as it is not to a member of a packed data structure. */
24954 if (misalignment == -1)
24957 /* Return true if the misalignment is a multiple of the natural alignment
24958 of the vector's element type. This is probably always going to be
24959 true in practice, since we've already established that this isn't a
24961 return ((misalignment % align) == 0);
24964 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24969 arm_conditional_register_usage (void)
24973 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24975 for (regno = FIRST_FPA_REGNUM;
24976 regno <= LAST_FPA_REGNUM; ++regno)
24977 fixed_regs[regno] = call_used_regs[regno] = 1;
24980 if (TARGET_THUMB1 && optimize_size)
24982 /* When optimizing for size on Thumb-1, it's better not
24983 to use the HI regs, because of the overhead of
24985 for (regno = FIRST_HI_REGNUM;
24986 regno <= LAST_HI_REGNUM; ++regno)
24987 fixed_regs[regno] = call_used_regs[regno] = 1;
24990 /* The link register can be clobbered by any branch insn,
24991 but we have no way to track that at present, so mark
24992 it as unavailable. */
24994 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24996 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24998 if (TARGET_MAVERICK)
25000 for (regno = FIRST_FPA_REGNUM;
25001 regno <= LAST_FPA_REGNUM; ++ regno)
25002 fixed_regs[regno] = call_used_regs[regno] = 1;
25003 for (regno = FIRST_CIRRUS_FP_REGNUM;
25004 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
25006 fixed_regs[regno] = 0;
25007 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
25012 /* VFPv3 registers are disabled when earlier VFP
25013 versions are selected due to the definition of
25014 LAST_VFP_REGNUM. */
25015 for (regno = FIRST_VFP_REGNUM;
25016 regno <= LAST_VFP_REGNUM; ++ regno)
25018 fixed_regs[regno] = 0;
25019 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25020 || regno >= FIRST_VFP_REGNUM + 32;
25025 if (TARGET_REALLY_IWMMXT)
25027 regno = FIRST_IWMMXT_GR_REGNUM;
25028 /* The 2002/10/09 revision of the XScale ABI has wCG0
25029 and wCG1 as call-preserved registers. The 2002/11/21
25030 revision changed this so that all wCG registers are
25031 scratch registers. */
25032 for (regno = FIRST_IWMMXT_GR_REGNUM;
25033 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25034 fixed_regs[regno] = 0;
25035 /* The XScale ABI has wR0 - wR9 as scratch registers,
25036 the rest as call-preserved registers. */
25037 for (regno = FIRST_IWMMXT_REGNUM;
25038 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25040 fixed_regs[regno] = 0;
25041 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25045 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25047 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25048 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25050 else if (TARGET_APCS_STACK)
25052 fixed_regs[10] = 1;
25053 call_used_regs[10] = 1;
25055 /* -mcaller-super-interworking reserves r11 for calls to
25056 _interwork_r11_call_via_rN(). Making the register global
25057 is an easy way of ensuring that it remains valid for all
25059 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25060 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25062 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25063 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25064 if (TARGET_CALLER_INTERWORKING)
25065 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25067 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25071 arm_preferred_rename_class (reg_class_t rclass)
25073 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25074 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25075 and code size can be reduced. */
25076 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25082 /* Compute the atrribute "length" of insn "*push_multi".
25083 So this function MUST be kept in sync with that insn pattern. */
25085 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25087 int i, regno, hi_reg;
25088 int num_saves = XVECLEN (parallel_op, 0);
25098 regno = REGNO (first_op);
25099 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25100 for (i = 1; i < num_saves && !hi_reg; i++)
25102 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25103 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25111 /* Compute the number of instructions emitted by output_move_double. */
25113 arm_count_output_move_double_insns (rtx *operands)
25116 output_move_double (operands, false, &count);
25121 vfp3_const_double_for_fract_bits (rtx operand)
25123 REAL_VALUE_TYPE r0;
25125 if (GET_CODE (operand) != CONST_DOUBLE)
25128 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25129 if (exact_real_inverse (DFmode, &r0))
25131 if (exact_real_truncate (DFmode, &r0))
25133 HOST_WIDE_INT value = real_to_integer (&r0);
25134 value = value & 0xffffffff;
25135 if ((value != 0) && ( (value & (value - 1)) == 0))
25136 return int_log2 (value);
25142 #include "gt-arm.h"