1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 static int aapcs_select_return_coproc (const_tree, const_tree);
175 #ifdef OBJECT_FORMAT_ELF
176 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
177 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
180 static void arm_encode_section_info (tree, rtx, int);
183 static void arm_file_end (void);
184 static void arm_file_start (void);
186 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
189 enum machine_mode, const_tree, bool);
190 static bool arm_promote_prototypes (const_tree);
191 static bool arm_default_short_enums (void);
192 static bool arm_align_anon_bitfield (void);
193 static bool arm_return_in_msb (const_tree);
194 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
195 static bool arm_return_in_memory (const_tree, const_tree);
196 #ifdef TARGET_UNWIND_INFO
197 static void arm_unwind_emit (FILE *, rtx);
198 static bool arm_output_ttype (rtx);
200 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
201 static rtx arm_dwarf_register_span (rtx);
203 static tree arm_cxx_guard_type (void);
204 static bool arm_cxx_guard_mask_bit (void);
205 static tree arm_get_cookie_size (tree);
206 static bool arm_cookie_has_size (void);
207 static bool arm_cxx_cdtor_returns_this (void);
208 static bool arm_cxx_key_method_may_be_inline (void);
209 static void arm_cxx_determine_class_data_visibility (tree);
210 static bool arm_cxx_class_data_always_comdat (void);
211 static bool arm_cxx_use_aeabi_atexit (void);
212 static void arm_init_libfuncs (void);
213 static tree arm_build_builtin_va_list (void);
214 static void arm_expand_builtin_va_start (tree, rtx);
215 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
216 static bool arm_handle_option (size_t, const char *, int);
217 static void arm_target_help (void);
218 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
219 static bool arm_cannot_copy_insn_p (rtx);
220 static bool arm_tls_symbol_p (rtx x);
221 static int arm_issue_rate (void);
222 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
223 static bool arm_allocate_stack_slots_for_args (void);
224 static const char *arm_invalid_parameter_type (const_tree t);
225 static const char *arm_invalid_return_type (const_tree t);
226 static tree arm_promoted_type (const_tree t);
227 static tree arm_convert_to_type (tree type, tree expr);
228 static bool arm_scalar_mode_supported_p (enum machine_mode);
229 static bool arm_frame_pointer_required (void);
230 static bool arm_can_eliminate (const int, const int);
231 static void arm_asm_trampoline_template (FILE *);
232 static void arm_trampoline_init (rtx, tree, rtx);
233 static rtx arm_trampoline_adjust_address (rtx);
234 static rtx arm_pic_static_addr (rtx orig, rtx reg);
235 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
236 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
239 /* Table of machine attributes. */
240 static const struct attribute_spec arm_attribute_table[] =
242 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
243 /* Function calls made to this symbol must be done indirectly, because
244 it may lie outside of the 26 bit addressing range of a normal function
246 { "long_call", 0, 0, false, true, true, NULL },
247 /* Whereas these functions are always known to reside within the 26 bit
249 { "short_call", 0, 0, false, true, true, NULL },
250 /* Specify the procedure call conventions for a function. */
251 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
252 /* Interrupt Service Routines have special prologue and epilogue requirements. */
253 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
254 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
255 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
257 /* ARM/PE has three new attributes:
259 dllexport - for exporting a function/variable that will live in a dll
260 dllimport - for importing a function/variable from a dll
262 Microsoft allows multiple declspecs in one __declspec, separating
263 them with spaces. We do NOT support this. Instead, use __declspec
266 { "dllimport", 0, 0, true, false, false, NULL },
267 { "dllexport", 0, 0, true, false, false, NULL },
268 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
269 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
270 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
271 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
272 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
274 { NULL, 0, 0, false, false, false, NULL }
277 /* Initialize the GCC target structure. */
278 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
279 #undef TARGET_MERGE_DECL_ATTRIBUTES
280 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
283 #undef TARGET_LEGITIMIZE_ADDRESS
284 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
286 #undef TARGET_ATTRIBUTE_TABLE
287 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
289 #undef TARGET_ASM_FILE_START
290 #define TARGET_ASM_FILE_START arm_file_start
291 #undef TARGET_ASM_FILE_END
292 #define TARGET_ASM_FILE_END arm_file_end
294 #undef TARGET_ASM_ALIGNED_SI_OP
295 #define TARGET_ASM_ALIGNED_SI_OP NULL
296 #undef TARGET_ASM_INTEGER
297 #define TARGET_ASM_INTEGER arm_assemble_integer
299 #undef TARGET_PRINT_OPERAND
300 #define TARGET_PRINT_OPERAND arm_print_operand
301 #undef TARGET_PRINT_OPERAND_ADDRESS
302 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
303 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
304 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
306 #undef TARGET_ASM_FUNCTION_PROLOGUE
307 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
309 #undef TARGET_ASM_FUNCTION_EPILOGUE
310 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
312 #undef TARGET_DEFAULT_TARGET_FLAGS
313 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
314 #undef TARGET_HANDLE_OPTION
315 #define TARGET_HANDLE_OPTION arm_handle_option
317 #define TARGET_HELP arm_target_help
319 #undef TARGET_COMP_TYPE_ATTRIBUTES
320 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
322 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
323 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
328 #undef TARGET_ENCODE_SECTION_INFO
330 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
332 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
335 #undef TARGET_STRIP_NAME_ENCODING
336 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
338 #undef TARGET_ASM_INTERNAL_LABEL
339 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
341 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
342 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
344 #undef TARGET_FUNCTION_VALUE
345 #define TARGET_FUNCTION_VALUE arm_function_value
347 #undef TARGET_LIBCALL_VALUE
348 #define TARGET_LIBCALL_VALUE arm_libcall_value
350 #undef TARGET_ASM_OUTPUT_MI_THUNK
351 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
352 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
353 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
355 #undef TARGET_RTX_COSTS
356 #define TARGET_RTX_COSTS arm_rtx_costs
357 #undef TARGET_ADDRESS_COST
358 #define TARGET_ADDRESS_COST arm_address_cost
360 #undef TARGET_SHIFT_TRUNCATION_MASK
361 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
362 #undef TARGET_VECTOR_MODE_SUPPORTED_P
363 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
365 #undef TARGET_MACHINE_DEPENDENT_REORG
366 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
368 #undef TARGET_INIT_BUILTINS
369 #define TARGET_INIT_BUILTINS arm_init_builtins
370 #undef TARGET_EXPAND_BUILTIN
371 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
373 #undef TARGET_INIT_LIBFUNCS
374 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
376 #undef TARGET_PROMOTE_FUNCTION_MODE
377 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
378 #undef TARGET_PROMOTE_PROTOTYPES
379 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
380 #undef TARGET_PASS_BY_REFERENCE
381 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
382 #undef TARGET_ARG_PARTIAL_BYTES
383 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
384 #undef TARGET_FUNCTION_ARG
385 #define TARGET_FUNCTION_ARG arm_function_arg
386 #undef TARGET_FUNCTION_ARG_ADVANCE
387 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
389 #undef TARGET_SETUP_INCOMING_VARARGS
390 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
392 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
393 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
395 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
396 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
397 #undef TARGET_TRAMPOLINE_INIT
398 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
399 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
400 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
402 #undef TARGET_DEFAULT_SHORT_ENUMS
403 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
405 #undef TARGET_ALIGN_ANON_BITFIELD
406 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
408 #undef TARGET_NARROW_VOLATILE_BITFIELD
409 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
411 #undef TARGET_CXX_GUARD_TYPE
412 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
414 #undef TARGET_CXX_GUARD_MASK_BIT
415 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
417 #undef TARGET_CXX_GET_COOKIE_SIZE
418 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
420 #undef TARGET_CXX_COOKIE_HAS_SIZE
421 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
423 #undef TARGET_CXX_CDTOR_RETURNS_THIS
424 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
426 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
427 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
429 #undef TARGET_CXX_USE_AEABI_ATEXIT
430 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
432 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
433 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
434 arm_cxx_determine_class_data_visibility
436 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
437 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
439 #undef TARGET_RETURN_IN_MSB
440 #define TARGET_RETURN_IN_MSB arm_return_in_msb
442 #undef TARGET_RETURN_IN_MEMORY
443 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
445 #undef TARGET_MUST_PASS_IN_STACK
446 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
448 #ifdef TARGET_UNWIND_INFO
449 #undef TARGET_ASM_UNWIND_EMIT
450 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
452 /* EABI unwinding tables use a different format for the typeinfo tables. */
453 #undef TARGET_ASM_TTYPE
454 #define TARGET_ASM_TTYPE arm_output_ttype
456 #undef TARGET_ARM_EABI_UNWINDER
457 #define TARGET_ARM_EABI_UNWINDER true
458 #endif /* TARGET_UNWIND_INFO */
460 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
461 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
463 #undef TARGET_DWARF_REGISTER_SPAN
464 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
466 #undef TARGET_CANNOT_COPY_INSN_P
467 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
470 #undef TARGET_HAVE_TLS
471 #define TARGET_HAVE_TLS true
474 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
475 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
477 #undef TARGET_CANNOT_FORCE_CONST_MEM
478 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
480 #undef TARGET_MAX_ANCHOR_OFFSET
481 #define TARGET_MAX_ANCHOR_OFFSET 4095
483 /* The minimum is set such that the total size of the block
484 for a particular anchor is -4088 + 1 + 4095 bytes, which is
485 divisible by eight, ensuring natural spacing of anchors. */
486 #undef TARGET_MIN_ANCHOR_OFFSET
487 #define TARGET_MIN_ANCHOR_OFFSET -4088
489 #undef TARGET_SCHED_ISSUE_RATE
490 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
492 #undef TARGET_MANGLE_TYPE
493 #define TARGET_MANGLE_TYPE arm_mangle_type
495 #undef TARGET_BUILD_BUILTIN_VA_LIST
496 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
497 #undef TARGET_EXPAND_BUILTIN_VA_START
498 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
499 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
500 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
503 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
504 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
507 #undef TARGET_LEGITIMATE_ADDRESS_P
508 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
510 #undef TARGET_INVALID_PARAMETER_TYPE
511 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
513 #undef TARGET_INVALID_RETURN_TYPE
514 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
516 #undef TARGET_PROMOTED_TYPE
517 #define TARGET_PROMOTED_TYPE arm_promoted_type
519 #undef TARGET_CONVERT_TO_TYPE
520 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
522 #undef TARGET_SCALAR_MODE_SUPPORTED_P
523 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
525 #undef TARGET_FRAME_POINTER_REQUIRED
526 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
528 #undef TARGET_CAN_ELIMINATE
529 #define TARGET_CAN_ELIMINATE arm_can_eliminate
531 struct gcc_target targetm = TARGET_INITIALIZER;
533 /* Obstack for minipool constant handling. */
534 static struct obstack minipool_obstack;
535 static char * minipool_startobj;
537 /* The maximum number of insns skipped which
538 will be conditionalised if possible. */
539 static int max_insns_skipped = 5;
541 extern FILE * asm_out_file;
543 /* True if we are currently building a constant table. */
544 int making_const_table;
546 /* The processor for which instructions should be scheduled. */
547 enum processor_type arm_tune = arm_none;
549 /* The current tuning set. */
550 const struct tune_params *current_tune;
552 /* Which floating point hardware to schedule for. */
555 /* Which floating popint hardware to use. */
556 const struct arm_fpu_desc *arm_fpu_desc;
558 /* Whether to use floating point hardware. */
559 enum float_abi_type arm_float_abi;
561 /* Which __fp16 format to use. */
562 enum arm_fp16_format_type arm_fp16_format;
564 /* Which ABI to use. */
565 enum arm_abi_type arm_abi;
567 /* Which thread pointer model to use. */
568 enum arm_tp_type target_thread_pointer = TP_AUTO;
570 /* Used to parse -mstructure_size_boundary command line option. */
571 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
573 /* Used for Thumb call_via trampolines. */
574 rtx thumb_call_via_label[14];
575 static int thumb_call_reg_needed;
577 /* Bit values used to identify processor capabilities. */
578 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
579 #define FL_ARCH3M (1 << 1) /* Extended multiply */
580 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
581 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
582 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
583 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
584 #define FL_THUMB (1 << 6) /* Thumb aware */
585 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
586 #define FL_STRONG (1 << 8) /* StrongARM */
587 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
588 #define FL_XSCALE (1 << 10) /* XScale */
589 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
590 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
591 media instructions. */
592 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
593 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
594 Note: ARM6 & 7 derivatives only. */
595 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
596 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
597 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
599 #define FL_DIV (1 << 18) /* Hardware divide. */
600 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
601 #define FL_NEON (1 << 20) /* Neon instructions. */
602 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
604 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
606 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
608 /* Flags that only effect tuning, not available instructions. */
609 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
612 #define FL_FOR_ARCH2 FL_NOTM
613 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
614 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
615 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
616 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
617 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
618 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
619 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
620 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
621 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
622 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
623 #define FL_FOR_ARCH6J FL_FOR_ARCH6
624 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
625 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
626 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
627 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
628 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
629 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
630 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
631 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
632 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
633 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
635 /* The bits in this mask specify which
636 instructions we are allowed to generate. */
637 static unsigned long insn_flags = 0;
639 /* The bits in this mask specify which instruction scheduling options should
641 static unsigned long tune_flags = 0;
643 /* The following are used in the arm.md file as equivalents to bits
644 in the above two flag variables. */
646 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
649 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
652 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
655 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
658 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
661 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
664 /* Nonzero if this chip supports the ARM 6K extensions. */
667 /* Nonzero if this chip supports the ARM 7 extensions. */
670 /* Nonzero if instructions not present in the 'M' profile can be used. */
671 int arm_arch_notm = 0;
673 /* Nonzero if instructions present in ARMv7E-M can be used. */
676 /* Nonzero if this chip can benefit from load scheduling. */
677 int arm_ld_sched = 0;
679 /* Nonzero if this chip is a StrongARM. */
680 int arm_tune_strongarm = 0;
682 /* Nonzero if this chip is a Cirrus variant. */
683 int arm_arch_cirrus = 0;
685 /* Nonzero if this chip supports Intel Wireless MMX technology. */
686 int arm_arch_iwmmxt = 0;
688 /* Nonzero if this chip is an XScale. */
689 int arm_arch_xscale = 0;
691 /* Nonzero if tuning for XScale */
692 int arm_tune_xscale = 0;
694 /* Nonzero if we want to tune for stores that access the write-buffer.
695 This typically means an ARM6 or ARM7 with MMU or MPU. */
696 int arm_tune_wbuf = 0;
698 /* Nonzero if tuning for Cortex-A9. */
699 int arm_tune_cortex_a9 = 0;
701 /* Nonzero if generating Thumb instructions. */
704 /* Nonzero if generating Thumb-1 instructions. */
707 /* Nonzero if we should define __THUMB_INTERWORK__ in the
709 XXX This is a bit of a hack, it's intended to help work around
710 problems in GLD which doesn't understand that armv5t code is
711 interworking clean. */
712 int arm_cpp_interwork = 0;
714 /* Nonzero if chip supports Thumb 2. */
717 /* Nonzero if chip supports integer division instruction. */
720 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
721 we must report the mode of the memory reference from
722 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
723 enum machine_mode output_memory_reference_mode;
725 /* The register number to be used for the PIC offset register. */
726 unsigned arm_pic_register = INVALID_REGNUM;
728 /* Set to 1 after arm_reorg has started. Reset to start at the start of
729 the next function. */
730 static int after_arm_reorg = 0;
732 enum arm_pcs arm_pcs_default;
734 /* For an explanation of these variables, see final_prescan_insn below. */
736 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
737 enum arm_cond_code arm_current_cc;
740 int arm_target_label;
741 /* The number of conditionally executed insns, including the current insn. */
742 int arm_condexec_count = 0;
743 /* A bitmask specifying the patterns for the IT block.
744 Zero means do not output an IT block before this insn. */
745 int arm_condexec_mask = 0;
746 /* The number of bits used in arm_condexec_mask. */
747 int arm_condexec_masklen = 0;
749 /* The condition codes of the ARM, and the inverse function. */
750 static const char * const arm_condition_codes[] =
752 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
753 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
756 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
757 int arm_regs_in_sequence[] =
759 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
762 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
763 #define streq(string1, string2) (strcmp (string1, string2) == 0)
765 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
766 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
767 | (1 << PIC_OFFSET_TABLE_REGNUM)))
769 /* Initialization code. */
773 const char *const name;
774 enum processor_type core;
776 const unsigned long flags;
777 const struct tune_params *const tune;
780 const struct tune_params arm_slowmul_tune =
782 arm_slowmul_rtx_costs,
787 const struct tune_params arm_fastmul_tune =
789 arm_fastmul_rtx_costs,
794 const struct tune_params arm_xscale_tune =
796 arm_xscale_rtx_costs,
797 xscale_sched_adjust_cost,
801 const struct tune_params arm_9e_tune =
808 const struct tune_params arm_cortex_a9_tune =
811 cortex_a9_sched_adjust_cost,
816 /* Not all of these give usefully different compilation alternatives,
817 but there is no simple way of generalizing them. */
818 static const struct processors all_cores[] =
821 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
822 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
823 #include "arm-cores.def"
825 {NULL, arm_none, NULL, 0, NULL}
828 static const struct processors all_architectures[] =
830 /* ARM Architectures */
831 /* We don't specify tuning costs here as it will be figured out
834 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
835 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
836 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
837 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
838 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
839 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
840 implementations that support it, so we will leave it out for now. */
841 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
842 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
843 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
844 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
845 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
846 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
847 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
848 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
849 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
850 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
851 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
852 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
853 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
854 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
855 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
856 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
857 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
858 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
859 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
860 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
861 {NULL, arm_none, NULL, 0 , NULL}
865 /* These are populated as commandline arguments are processed, or NULL
867 static const struct processors *arm_selected_arch;
868 static const struct processors *arm_selected_cpu;
869 static const struct processors *arm_selected_tune;
871 /* The name of the preprocessor macro to define for this architecture. */
873 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
875 /* Available values for -mfpu=. */
877 static const struct arm_fpu_desc all_fpus[] =
879 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
880 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
881 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
882 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
883 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
884 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
885 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
886 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
887 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
888 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
889 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
890 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
891 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
892 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
893 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
894 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
895 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
896 /* Compatibility aliases. */
897 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
904 enum float_abi_type abi_type;
908 /* Available values for -mfloat-abi=. */
910 static const struct float_abi all_float_abis[] =
912 {"soft", ARM_FLOAT_ABI_SOFT},
913 {"softfp", ARM_FLOAT_ABI_SOFTFP},
914 {"hard", ARM_FLOAT_ABI_HARD}
921 enum arm_fp16_format_type fp16_format_type;
925 /* Available values for -mfp16-format=. */
927 static const struct fp16_format all_fp16_formats[] =
929 {"none", ARM_FP16_FORMAT_NONE},
930 {"ieee", ARM_FP16_FORMAT_IEEE},
931 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
938 enum arm_abi_type abi_type;
942 /* Available values for -mabi=. */
944 static const struct abi_name arm_all_abis[] =
946 {"apcs-gnu", ARM_ABI_APCS},
947 {"atpcs", ARM_ABI_ATPCS},
948 {"aapcs", ARM_ABI_AAPCS},
949 {"iwmmxt", ARM_ABI_IWMMXT},
950 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
953 /* Supported TLS relocations. */
963 /* The maximum number of insns to be used when loading a constant. */
965 arm_constant_limit (bool size_p)
967 return size_p ? 1 : current_tune->constant_limit;
970 /* Emit an insn that's a simple single-set. Both the operands must be known
973 emit_set_insn (rtx x, rtx y)
975 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
978 /* Return the number of bits set in VALUE. */
980 bit_count (unsigned long value)
982 unsigned long count = 0;
987 value &= value - 1; /* Clear the least-significant set bit. */
993 /* Set up library functions unique to ARM. */
996 arm_init_libfuncs (void)
998 /* There are no special library functions unless we are using the
1003 /* The functions below are described in Section 4 of the "Run-Time
1004 ABI for the ARM architecture", Version 1.0. */
1006 /* Double-precision floating-point arithmetic. Table 2. */
1007 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1008 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1009 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1010 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1011 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1013 /* Double-precision comparisons. Table 3. */
1014 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1015 set_optab_libfunc (ne_optab, DFmode, NULL);
1016 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1017 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1018 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1019 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1020 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1022 /* Single-precision floating-point arithmetic. Table 4. */
1023 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1024 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1025 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1026 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1027 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1029 /* Single-precision comparisons. Table 5. */
1030 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1031 set_optab_libfunc (ne_optab, SFmode, NULL);
1032 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1033 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1034 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1035 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1036 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1038 /* Floating-point to integer conversions. Table 6. */
1039 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1040 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1041 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1042 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1043 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1044 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1045 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1046 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1048 /* Conversions between floating types. Table 7. */
1049 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1050 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1052 /* Integer to floating-point conversions. Table 8. */
1053 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1054 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1055 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1056 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1057 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1058 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1059 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1060 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1062 /* Long long. Table 9. */
1063 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1064 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1065 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1066 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1067 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1068 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1069 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1070 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1072 /* Integer (32/32->32) division. \S 4.3.1. */
1073 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1074 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1076 /* The divmod functions are designed so that they can be used for
1077 plain division, even though they return both the quotient and the
1078 remainder. The quotient is returned in the usual location (i.e.,
1079 r0 for SImode, {r0, r1} for DImode), just as would be expected
1080 for an ordinary division routine. Because the AAPCS calling
1081 conventions specify that all of { r0, r1, r2, r3 } are
1082 callee-saved registers, there is no need to tell the compiler
1083 explicitly that those registers are clobbered by these
1085 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1086 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1088 /* For SImode division the ABI provides div-without-mod routines,
1089 which are faster. */
1090 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1091 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1093 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1094 divmod libcalls instead. */
1095 set_optab_libfunc (smod_optab, DImode, NULL);
1096 set_optab_libfunc (umod_optab, DImode, NULL);
1097 set_optab_libfunc (smod_optab, SImode, NULL);
1098 set_optab_libfunc (umod_optab, SImode, NULL);
1100 /* Half-precision float operations. The compiler handles all operations
1101 with NULL libfuncs by converting the SFmode. */
1102 switch (arm_fp16_format)
1104 case ARM_FP16_FORMAT_IEEE:
1105 case ARM_FP16_FORMAT_ALTERNATIVE:
1108 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1109 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1111 : "__gnu_f2h_alternative"));
1112 set_conv_libfunc (sext_optab, SFmode, HFmode,
1113 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1115 : "__gnu_h2f_alternative"));
1118 set_optab_libfunc (add_optab, HFmode, NULL);
1119 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1120 set_optab_libfunc (smul_optab, HFmode, NULL);
1121 set_optab_libfunc (neg_optab, HFmode, NULL);
1122 set_optab_libfunc (sub_optab, HFmode, NULL);
1125 set_optab_libfunc (eq_optab, HFmode, NULL);
1126 set_optab_libfunc (ne_optab, HFmode, NULL);
1127 set_optab_libfunc (lt_optab, HFmode, NULL);
1128 set_optab_libfunc (le_optab, HFmode, NULL);
1129 set_optab_libfunc (ge_optab, HFmode, NULL);
1130 set_optab_libfunc (gt_optab, HFmode, NULL);
1131 set_optab_libfunc (unord_optab, HFmode, NULL);
1138 if (TARGET_AAPCS_BASED)
1139 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1142 /* On AAPCS systems, this is the "struct __va_list". */
1143 static GTY(()) tree va_list_type;
1145 /* Return the type to use as __builtin_va_list. */
1147 arm_build_builtin_va_list (void)
1152 if (!TARGET_AAPCS_BASED)
1153 return std_build_builtin_va_list ();
1155 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1163 The C Library ABI further reinforces this definition in \S
1166 We must follow this definition exactly. The structure tag
1167 name is visible in C++ mangled names, and thus forms a part
1168 of the ABI. The field name may be used by people who
1169 #include <stdarg.h>. */
1170 /* Create the type. */
1171 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1172 /* Give it the required name. */
1173 va_list_name = build_decl (BUILTINS_LOCATION,
1175 get_identifier ("__va_list"),
1177 DECL_ARTIFICIAL (va_list_name) = 1;
1178 TYPE_NAME (va_list_type) = va_list_name;
1179 /* Create the __ap field. */
1180 ap_field = build_decl (BUILTINS_LOCATION,
1182 get_identifier ("__ap"),
1184 DECL_ARTIFICIAL (ap_field) = 1;
1185 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1186 TYPE_FIELDS (va_list_type) = ap_field;
1187 /* Compute its layout. */
1188 layout_type (va_list_type);
1190 return va_list_type;
1193 /* Return an expression of type "void *" pointing to the next
1194 available argument in a variable-argument list. VALIST is the
1195 user-level va_list object, of type __builtin_va_list. */
1197 arm_extract_valist_ptr (tree valist)
1199 if (TREE_TYPE (valist) == error_mark_node)
1200 return error_mark_node;
1202 /* On an AAPCS target, the pointer is stored within "struct
1204 if (TARGET_AAPCS_BASED)
1206 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1207 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1208 valist, ap_field, NULL_TREE);
1214 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1216 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1218 valist = arm_extract_valist_ptr (valist);
1219 std_expand_builtin_va_start (valist, nextarg);
1222 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1224 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1227 valist = arm_extract_valist_ptr (valist);
1228 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1231 /* Lookup NAME in SEL. */
1233 static const struct processors *
1234 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1236 if (!(name && *name))
1239 for (; sel->name != NULL; sel++)
1241 if (streq (name, sel->name))
1245 error ("bad value (%s) for %s switch", name, desc);
1249 /* Implement TARGET_HANDLE_OPTION. */
1252 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1257 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1261 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1264 case OPT_mhard_float:
1265 target_float_abi_name = "hard";
1268 case OPT_msoft_float:
1269 target_float_abi_name = "soft";
1273 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1282 arm_target_help (void)
1285 static int columns = 0;
1288 /* If we have not done so already, obtain the desired maximum width of
1289 the output. Note - this is a duplication of the code at the start of
1290 gcc/opts.c:print_specific_help() - the two copies should probably be
1291 replaced by a single function. */
1296 GET_ENVIRONMENT (p, "COLUMNS");
1299 int value = atoi (p);
1306 /* Use a reasonable default. */
1310 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1312 /* The - 2 is because we know that the last entry in the array is NULL. */
1313 i = ARRAY_SIZE (all_cores) - 2;
1315 printf (" %s", all_cores[i].name);
1316 remaining = columns - (strlen (all_cores[i].name) + 4);
1317 gcc_assert (remaining >= 0);
1321 int len = strlen (all_cores[i].name);
1323 if (remaining > len + 2)
1325 printf (", %s", all_cores[i].name);
1326 remaining -= len + 2;
1332 printf ("\n %s", all_cores[i].name);
1333 remaining = columns - (len + 4);
1337 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1339 i = ARRAY_SIZE (all_architectures) - 2;
1342 printf (" %s", all_architectures[i].name);
1343 remaining = columns - (strlen (all_architectures[i].name) + 4);
1344 gcc_assert (remaining >= 0);
1348 int len = strlen (all_architectures[i].name);
1350 if (remaining > len + 2)
1352 printf (", %s", all_architectures[i].name);
1353 remaining -= len + 2;
1359 printf ("\n %s", all_architectures[i].name);
1360 remaining = columns - (len + 4);
1367 /* Fix up any incompatible options that the user has specified.
1368 This has now turned into a maze. */
1370 arm_override_options (void)
1374 if (arm_selected_arch)
1376 if (arm_selected_cpu)
1378 /* Check for conflict between mcpu and march. */
1379 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1381 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1382 arm_selected_cpu->name, arm_selected_arch->name);
1383 /* -march wins for code generation.
1384 -mcpu wins for default tuning. */
1385 if (!arm_selected_tune)
1386 arm_selected_tune = arm_selected_cpu;
1388 arm_selected_cpu = arm_selected_arch;
1392 arm_selected_arch = NULL;
1395 /* Pick a CPU based on the architecture. */
1396 arm_selected_cpu = arm_selected_arch;
1399 /* If the user did not specify a processor, choose one for them. */
1400 if (!arm_selected_cpu)
1402 const struct processors * sel;
1403 unsigned int sought;
1405 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1406 if (!arm_selected_cpu->name)
1408 #ifdef SUBTARGET_CPU_DEFAULT
1409 /* Use the subtarget default CPU if none was specified by
1411 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1413 /* Default to ARM6. */
1414 if (!arm_selected_cpu->name)
1415 arm_selected_cpu = &all_cores[arm6];
1418 sel = arm_selected_cpu;
1419 insn_flags = sel->flags;
1421 /* Now check to see if the user has specified some command line
1422 switch that require certain abilities from the cpu. */
1425 if (TARGET_INTERWORK || TARGET_THUMB)
1427 sought |= (FL_THUMB | FL_MODE32);
1429 /* There are no ARM processors that support both APCS-26 and
1430 interworking. Therefore we force FL_MODE26 to be removed
1431 from insn_flags here (if it was set), so that the search
1432 below will always be able to find a compatible processor. */
1433 insn_flags &= ~FL_MODE26;
1436 if (sought != 0 && ((sought & insn_flags) != sought))
1438 /* Try to locate a CPU type that supports all of the abilities
1439 of the default CPU, plus the extra abilities requested by
1441 for (sel = all_cores; sel->name != NULL; sel++)
1442 if ((sel->flags & sought) == (sought | insn_flags))
1445 if (sel->name == NULL)
1447 unsigned current_bit_count = 0;
1448 const struct processors * best_fit = NULL;
1450 /* Ideally we would like to issue an error message here
1451 saying that it was not possible to find a CPU compatible
1452 with the default CPU, but which also supports the command
1453 line options specified by the programmer, and so they
1454 ought to use the -mcpu=<name> command line option to
1455 override the default CPU type.
1457 If we cannot find a cpu that has both the
1458 characteristics of the default cpu and the given
1459 command line options we scan the array again looking
1460 for a best match. */
1461 for (sel = all_cores; sel->name != NULL; sel++)
1462 if ((sel->flags & sought) == sought)
1466 count = bit_count (sel->flags & insn_flags);
1468 if (count >= current_bit_count)
1471 current_bit_count = count;
1475 gcc_assert (best_fit);
1479 arm_selected_cpu = sel;
1483 gcc_assert (arm_selected_cpu);
1484 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1485 if (!arm_selected_tune)
1486 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1488 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1489 insn_flags = arm_selected_cpu->flags;
1491 arm_tune = arm_selected_tune->core;
1492 tune_flags = arm_selected_tune->flags;
1493 current_tune = arm_selected_tune->tune;
1495 if (target_fp16_format_name)
1497 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1499 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1501 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1505 if (i == ARRAY_SIZE (all_fp16_formats))
1506 error ("invalid __fp16 format option: -mfp16-format=%s",
1507 target_fp16_format_name);
1510 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1512 if (target_abi_name)
1514 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1516 if (streq (arm_all_abis[i].name, target_abi_name))
1518 arm_abi = arm_all_abis[i].abi_type;
1522 if (i == ARRAY_SIZE (arm_all_abis))
1523 error ("invalid ABI option: -mabi=%s", target_abi_name);
1526 arm_abi = ARM_DEFAULT_ABI;
1528 /* Make sure that the processor choice does not conflict with any of the
1529 other command line choices. */
1530 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1531 error ("target CPU does not support ARM mode");
1533 /* BPABI targets use linker tricks to allow interworking on cores
1534 without thumb support. */
1535 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1537 warning (0, "target CPU does not support interworking" );
1538 target_flags &= ~MASK_INTERWORK;
1541 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1543 warning (0, "target CPU does not support THUMB instructions");
1544 target_flags &= ~MASK_THUMB;
1547 if (TARGET_APCS_FRAME && TARGET_THUMB)
1549 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1550 target_flags &= ~MASK_APCS_FRAME;
1553 /* Callee super interworking implies thumb interworking. Adding
1554 this to the flags here simplifies the logic elsewhere. */
1555 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1556 target_flags |= MASK_INTERWORK;
1558 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1559 from here where no function is being compiled currently. */
1560 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1561 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1563 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1564 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1566 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1568 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1569 target_flags |= MASK_APCS_FRAME;
1572 if (TARGET_POKE_FUNCTION_NAME)
1573 target_flags |= MASK_APCS_FRAME;
1575 if (TARGET_APCS_REENT && flag_pic)
1576 error ("-fpic and -mapcs-reent are incompatible");
1578 if (TARGET_APCS_REENT)
1579 warning (0, "APCS reentrant code not supported. Ignored");
1581 /* If this target is normally configured to use APCS frames, warn if they
1582 are turned off and debugging is turned on. */
1584 && write_symbols != NO_DEBUG
1585 && !TARGET_APCS_FRAME
1586 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1587 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1589 if (TARGET_APCS_FLOAT)
1590 warning (0, "passing floating point arguments in fp regs not yet supported");
1592 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1593 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1594 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1595 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1596 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1597 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1598 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1599 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1600 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1601 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1602 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1603 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1604 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1605 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1607 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1608 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1609 thumb_code = TARGET_ARM == 0;
1610 thumb1_code = TARGET_THUMB1 != 0;
1611 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1612 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1613 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1614 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1615 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1617 /* If we are not using the default (ARM mode) section anchor offset
1618 ranges, then set the correct ranges now. */
1621 /* Thumb-1 LDR instructions cannot have negative offsets.
1622 Permissible positive offset ranges are 5-bit (for byte loads),
1623 6-bit (for halfword loads), or 7-bit (for word loads).
1624 Empirical results suggest a 7-bit anchor range gives the best
1625 overall code size. */
1626 targetm.min_anchor_offset = 0;
1627 targetm.max_anchor_offset = 127;
1629 else if (TARGET_THUMB2)
1631 /* The minimum is set such that the total size of the block
1632 for a particular anchor is 248 + 1 + 4095 bytes, which is
1633 divisible by eight, ensuring natural spacing of anchors. */
1634 targetm.min_anchor_offset = -248;
1635 targetm.max_anchor_offset = 4095;
1638 /* V5 code we generate is completely interworking capable, so we turn off
1639 TARGET_INTERWORK here to avoid many tests later on. */
1641 /* XXX However, we must pass the right pre-processor defines to CPP
1642 or GLD can get confused. This is a hack. */
1643 if (TARGET_INTERWORK)
1644 arm_cpp_interwork = 1;
1647 target_flags &= ~MASK_INTERWORK;
1649 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1650 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1652 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1653 error ("iwmmxt abi requires an iwmmxt capable cpu");
1655 if (target_fpu_name == NULL && target_fpe_name != NULL)
1657 if (streq (target_fpe_name, "2"))
1658 target_fpu_name = "fpe2";
1659 else if (streq (target_fpe_name, "3"))
1660 target_fpu_name = "fpe3";
1662 error ("invalid floating point emulation option: -mfpe=%s",
1666 if (target_fpu_name == NULL)
1668 #ifdef FPUTYPE_DEFAULT
1669 target_fpu_name = FPUTYPE_DEFAULT;
1671 if (arm_arch_cirrus)
1672 target_fpu_name = "maverick";
1674 target_fpu_name = "fpe2";
1678 arm_fpu_desc = NULL;
1679 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1681 if (streq (all_fpus[i].name, target_fpu_name))
1683 arm_fpu_desc = &all_fpus[i];
1690 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1694 switch (arm_fpu_desc->model)
1696 case ARM_FP_MODEL_FPA:
1697 if (arm_fpu_desc->rev == 2)
1698 arm_fpu_attr = FPU_FPE2;
1699 else if (arm_fpu_desc->rev == 3)
1700 arm_fpu_attr = FPU_FPE3;
1702 arm_fpu_attr = FPU_FPA;
1705 case ARM_FP_MODEL_MAVERICK:
1706 arm_fpu_attr = FPU_MAVERICK;
1709 case ARM_FP_MODEL_VFP:
1710 arm_fpu_attr = FPU_VFP;
1717 if (target_float_abi_name != NULL)
1719 /* The user specified a FP ABI. */
1720 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1722 if (streq (all_float_abis[i].name, target_float_abi_name))
1724 arm_float_abi = all_float_abis[i].abi_type;
1728 if (i == ARRAY_SIZE (all_float_abis))
1729 error ("invalid floating point abi: -mfloat-abi=%s",
1730 target_float_abi_name);
1733 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1735 if (TARGET_AAPCS_BASED
1736 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1737 error ("FPA is unsupported in the AAPCS");
1739 if (TARGET_AAPCS_BASED)
1741 if (TARGET_CALLER_INTERWORKING)
1742 error ("AAPCS does not support -mcaller-super-interworking");
1744 if (TARGET_CALLEE_INTERWORKING)
1745 error ("AAPCS does not support -mcallee-super-interworking");
1748 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1749 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1750 will ever exist. GCC makes no attempt to support this combination. */
1751 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1752 sorry ("iWMMXt and hardware floating point");
1754 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1755 if (TARGET_THUMB2 && TARGET_IWMMXT)
1756 sorry ("Thumb-2 iWMMXt");
1758 /* __fp16 support currently assumes the core has ldrh. */
1759 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1760 sorry ("__fp16 and no ldrh");
1762 /* If soft-float is specified then don't use FPU. */
1763 if (TARGET_SOFT_FLOAT)
1764 arm_fpu_attr = FPU_NONE;
1766 if (TARGET_AAPCS_BASED)
1768 if (arm_abi == ARM_ABI_IWMMXT)
1769 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1770 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1771 && TARGET_HARD_FLOAT
1773 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1775 arm_pcs_default = ARM_PCS_AAPCS;
1779 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1780 sorry ("-mfloat-abi=hard and VFP");
1782 if (arm_abi == ARM_ABI_APCS)
1783 arm_pcs_default = ARM_PCS_APCS;
1785 arm_pcs_default = ARM_PCS_ATPCS;
1788 /* For arm2/3 there is no need to do any scheduling if there is only
1789 a floating point emulator, or we are doing software floating-point. */
1790 if ((TARGET_SOFT_FLOAT
1791 || (TARGET_FPA && arm_fpu_desc->rev))
1792 && (tune_flags & FL_MODE32) == 0)
1793 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1795 if (target_thread_switch)
1797 if (strcmp (target_thread_switch, "soft") == 0)
1798 target_thread_pointer = TP_SOFT;
1799 else if (strcmp (target_thread_switch, "auto") == 0)
1800 target_thread_pointer = TP_AUTO;
1801 else if (strcmp (target_thread_switch, "cp15") == 0)
1802 target_thread_pointer = TP_CP15;
1804 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1807 /* Use the cp15 method if it is available. */
1808 if (target_thread_pointer == TP_AUTO)
1810 if (arm_arch6k && !TARGET_THUMB1)
1811 target_thread_pointer = TP_CP15;
1813 target_thread_pointer = TP_SOFT;
1816 if (TARGET_HARD_TP && TARGET_THUMB1)
1817 error ("can not use -mtp=cp15 with 16-bit Thumb");
1819 /* Override the default structure alignment for AAPCS ABI. */
1820 if (TARGET_AAPCS_BASED)
1821 arm_structure_size_boundary = 8;
1823 if (structure_size_string != NULL)
1825 int size = strtol (structure_size_string, NULL, 0);
1827 if (size == 8 || size == 32
1828 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1829 arm_structure_size_boundary = size;
1831 warning (0, "structure size boundary can only be set to %s",
1832 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1835 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1837 error ("RTP PIC is incompatible with Thumb");
1841 /* If stack checking is disabled, we can use r10 as the PIC register,
1842 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1843 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1845 if (TARGET_VXWORKS_RTP)
1846 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1847 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1850 if (flag_pic && TARGET_VXWORKS_RTP)
1851 arm_pic_register = 9;
1853 if (arm_pic_register_string != NULL)
1855 int pic_register = decode_reg_name (arm_pic_register_string);
1858 warning (0, "-mpic-register= is useless without -fpic");
1860 /* Prevent the user from choosing an obviously stupid PIC register. */
1861 else if (pic_register < 0 || call_used_regs[pic_register]
1862 || pic_register == HARD_FRAME_POINTER_REGNUM
1863 || pic_register == STACK_POINTER_REGNUM
1864 || pic_register >= PC_REGNUM
1865 || (TARGET_VXWORKS_RTP
1866 && (unsigned int) pic_register != arm_pic_register))
1867 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1869 arm_pic_register = pic_register;
1872 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1873 if (fix_cm3_ldrd == 2)
1875 if (arm_selected_cpu->core == cortexm3)
1881 if (TARGET_THUMB1 && flag_schedule_insns)
1883 /* Don't warn since it's on by default in -O2. */
1884 flag_schedule_insns = 0;
1889 /* If optimizing for size, bump the number of instructions that we
1890 are prepared to conditionally execute (even on a StrongARM). */
1891 max_insns_skipped = 6;
1895 /* StrongARM has early execution of branches, so a sequence
1896 that is worth skipping is shorter. */
1897 if (arm_tune_strongarm)
1898 max_insns_skipped = 3;
1901 /* Hot/Cold partitioning is not currently supported, since we can't
1902 handle literal pool placement in that case. */
1903 if (flag_reorder_blocks_and_partition)
1905 inform (input_location,
1906 "-freorder-blocks-and-partition not supported on this architecture");
1907 flag_reorder_blocks_and_partition = 0;
1908 flag_reorder_blocks = 1;
1911 if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
1913 /* Hoisting PIC address calculations more aggressively provides a small,
1914 but measurable, size reduction for PIC code. Therefore, we decrease
1915 the bar for unrestricted expression hoisting to the cost of PIC address
1916 calculation, which is 2 instructions. */
1917 set_param_value ("gcse-unrestricted-cost", 2);
1919 /* Register global variables with the garbage collector. */
1920 arm_add_gc_roots ();
1924 arm_add_gc_roots (void)
1926 gcc_obstack_init(&minipool_obstack);
1927 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1930 /* A table of known ARM exception types.
1931 For use with the interrupt function attribute. */
1935 const char *const arg;
1936 const unsigned long return_value;
1940 static const isr_attribute_arg isr_attribute_args [] =
1942 { "IRQ", ARM_FT_ISR },
1943 { "irq", ARM_FT_ISR },
1944 { "FIQ", ARM_FT_FIQ },
1945 { "fiq", ARM_FT_FIQ },
1946 { "ABORT", ARM_FT_ISR },
1947 { "abort", ARM_FT_ISR },
1948 { "ABORT", ARM_FT_ISR },
1949 { "abort", ARM_FT_ISR },
1950 { "UNDEF", ARM_FT_EXCEPTION },
1951 { "undef", ARM_FT_EXCEPTION },
1952 { "SWI", ARM_FT_EXCEPTION },
1953 { "swi", ARM_FT_EXCEPTION },
1954 { NULL, ARM_FT_NORMAL }
1957 /* Returns the (interrupt) function type of the current
1958 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1960 static unsigned long
1961 arm_isr_value (tree argument)
1963 const isr_attribute_arg * ptr;
1967 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1969 /* No argument - default to IRQ. */
1970 if (argument == NULL_TREE)
1973 /* Get the value of the argument. */
1974 if (TREE_VALUE (argument) == NULL_TREE
1975 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1976 return ARM_FT_UNKNOWN;
1978 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1980 /* Check it against the list of known arguments. */
1981 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1982 if (streq (arg, ptr->arg))
1983 return ptr->return_value;
1985 /* An unrecognized interrupt type. */
1986 return ARM_FT_UNKNOWN;
1989 /* Computes the type of the current function. */
1991 static unsigned long
1992 arm_compute_func_type (void)
1994 unsigned long type = ARM_FT_UNKNOWN;
1998 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2000 /* Decide if the current function is volatile. Such functions
2001 never return, and many memory cycles can be saved by not storing
2002 register values that will never be needed again. This optimization
2003 was added to speed up context switching in a kernel application. */
2005 && (TREE_NOTHROW (current_function_decl)
2006 || !(flag_unwind_tables
2007 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
2008 && TREE_THIS_VOLATILE (current_function_decl))
2009 type |= ARM_FT_VOLATILE;
2011 if (cfun->static_chain_decl != NULL)
2012 type |= ARM_FT_NESTED;
2014 attr = DECL_ATTRIBUTES (current_function_decl);
2016 a = lookup_attribute ("naked", attr);
2018 type |= ARM_FT_NAKED;
2020 a = lookup_attribute ("isr", attr);
2022 a = lookup_attribute ("interrupt", attr);
2025 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2027 type |= arm_isr_value (TREE_VALUE (a));
2032 /* Returns the type of the current function. */
2035 arm_current_func_type (void)
2037 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2038 cfun->machine->func_type = arm_compute_func_type ();
2040 return cfun->machine->func_type;
2044 arm_allocate_stack_slots_for_args (void)
2046 /* Naked functions should not allocate stack slots for arguments. */
2047 return !IS_NAKED (arm_current_func_type ());
2051 /* Output assembler code for a block containing the constant parts
2052 of a trampoline, leaving space for the variable parts.
2054 On the ARM, (if r8 is the static chain regnum, and remembering that
2055 referencing pc adds an offset of 8) the trampoline looks like:
2058 .word static chain value
2059 .word function's address
2060 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2063 arm_asm_trampoline_template (FILE *f)
2067 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2068 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2070 else if (TARGET_THUMB2)
2072 /* The Thumb-2 trampoline is similar to the arm implementation.
2073 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2074 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2075 STATIC_CHAIN_REGNUM, PC_REGNUM);
2076 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2080 ASM_OUTPUT_ALIGN (f, 2);
2081 fprintf (f, "\t.code\t16\n");
2082 fprintf (f, ".Ltrampoline_start:\n");
2083 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2084 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2085 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2086 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2087 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2088 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2090 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2091 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2094 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2097 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2099 rtx fnaddr, mem, a_tramp;
2101 emit_block_move (m_tramp, assemble_trampoline_template (),
2102 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2104 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2105 emit_move_insn (mem, chain_value);
2107 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2108 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2109 emit_move_insn (mem, fnaddr);
2111 a_tramp = XEXP (m_tramp, 0);
2112 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2113 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2114 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2117 /* Thumb trampolines should be entered in thumb mode, so set
2118 the bottom bit of the address. */
2121 arm_trampoline_adjust_address (rtx addr)
2124 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2125 NULL, 0, OPTAB_LIB_WIDEN);
2129 /* Return 1 if it is possible to return using a single instruction.
2130 If SIBLING is non-null, this is a test for a return before a sibling
2131 call. SIBLING is the call insn, so we can examine its register usage. */
2134 use_return_insn (int iscond, rtx sibling)
2137 unsigned int func_type;
2138 unsigned long saved_int_regs;
2139 unsigned HOST_WIDE_INT stack_adjust;
2140 arm_stack_offsets *offsets;
2142 /* Never use a return instruction before reload has run. */
2143 if (!reload_completed)
2146 func_type = arm_current_func_type ();
2148 /* Naked, volatile and stack alignment functions need special
2150 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2153 /* So do interrupt functions that use the frame pointer and Thumb
2154 interrupt functions. */
2155 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2158 offsets = arm_get_frame_offsets ();
2159 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2161 /* As do variadic functions. */
2162 if (crtl->args.pretend_args_size
2163 || cfun->machine->uses_anonymous_args
2164 /* Or if the function calls __builtin_eh_return () */
2165 || crtl->calls_eh_return
2166 /* Or if the function calls alloca */
2167 || cfun->calls_alloca
2168 /* Or if there is a stack adjustment. However, if the stack pointer
2169 is saved on the stack, we can use a pre-incrementing stack load. */
2170 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2171 && stack_adjust == 4)))
2174 saved_int_regs = offsets->saved_regs_mask;
2176 /* Unfortunately, the insn
2178 ldmib sp, {..., sp, ...}
2180 triggers a bug on most SA-110 based devices, such that the stack
2181 pointer won't be correctly restored if the instruction takes a
2182 page fault. We work around this problem by popping r3 along with
2183 the other registers, since that is never slower than executing
2184 another instruction.
2186 We test for !arm_arch5 here, because code for any architecture
2187 less than this could potentially be run on one of the buggy
2189 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2191 /* Validate that r3 is a call-clobbered register (always true in
2192 the default abi) ... */
2193 if (!call_used_regs[3])
2196 /* ... that it isn't being used for a return value ... */
2197 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2200 /* ... or for a tail-call argument ... */
2203 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2205 if (find_regno_fusage (sibling, USE, 3))
2209 /* ... and that there are no call-saved registers in r0-r2
2210 (always true in the default ABI). */
2211 if (saved_int_regs & 0x7)
2215 /* Can't be done if interworking with Thumb, and any registers have been
2217 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2220 /* On StrongARM, conditional returns are expensive if they aren't
2221 taken and multiple registers have been stacked. */
2222 if (iscond && arm_tune_strongarm)
2224 /* Conditional return when just the LR is stored is a simple
2225 conditional-load instruction, that's not expensive. */
2226 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2230 && arm_pic_register != INVALID_REGNUM
2231 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2235 /* If there are saved registers but the LR isn't saved, then we need
2236 two instructions for the return. */
2237 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2240 /* Can't be done if any of the FPA regs are pushed,
2241 since this also requires an insn. */
2242 if (TARGET_HARD_FLOAT && TARGET_FPA)
2243 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2244 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2247 /* Likewise VFP regs. */
2248 if (TARGET_HARD_FLOAT && TARGET_VFP)
2249 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2250 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2253 if (TARGET_REALLY_IWMMXT)
2254 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2255 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2261 /* Return TRUE if int I is a valid immediate ARM constant. */
2264 const_ok_for_arm (HOST_WIDE_INT i)
2268 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2269 be all zero, or all one. */
2270 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2271 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2272 != ((~(unsigned HOST_WIDE_INT) 0)
2273 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2276 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2278 /* Fast return for 0 and small values. We must do this for zero, since
2279 the code below can't handle that one case. */
2280 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2283 /* Get the number of trailing zeros. */
2284 lowbit = ffs((int) i) - 1;
2286 /* Only even shifts are allowed in ARM mode so round down to the
2287 nearest even number. */
2291 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2296 /* Allow rotated constants in ARM mode. */
2298 && ((i & ~0xc000003f) == 0
2299 || (i & ~0xf000000f) == 0
2300 || (i & ~0xfc000003) == 0))
2307 /* Allow repeated pattern. */
2310 if (i == v || i == (v | (v << 8)))
2317 /* Return true if I is a valid constant for the operation CODE. */
2319 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2321 if (const_ok_for_arm (i))
2345 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2347 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2353 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2357 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2364 /* Emit a sequence of insns to handle a large constant.
2365 CODE is the code of the operation required, it can be any of SET, PLUS,
2366 IOR, AND, XOR, MINUS;
2367 MODE is the mode in which the operation is being performed;
2368 VAL is the integer to operate on;
2369 SOURCE is the other operand (a register, or a null-pointer for SET);
2370 SUBTARGETS means it is safe to create scratch registers if that will
2371 either produce a simpler sequence, or we will want to cse the values.
2372 Return value is the number of insns emitted. */
2374 /* ??? Tweak this for thumb2. */
2376 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2377 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2381 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2382 cond = COND_EXEC_TEST (PATTERN (insn));
2386 if (subtargets || code == SET
2387 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2388 && REGNO (target) != REGNO (source)))
2390 /* After arm_reorg has been called, we can't fix up expensive
2391 constants by pushing them into memory so we must synthesize
2392 them in-line, regardless of the cost. This is only likely to
2393 be more costly on chips that have load delay slots and we are
2394 compiling without running the scheduler (so no splitting
2395 occurred before the final instruction emission).
2397 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2399 if (!after_arm_reorg
2401 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2403 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2408 /* Currently SET is the only monadic value for CODE, all
2409 the rest are diadic. */
2410 if (TARGET_USE_MOVT)
2411 arm_emit_movpair (target, GEN_INT (val));
2413 emit_set_insn (target, GEN_INT (val));
2419 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2421 if (TARGET_USE_MOVT)
2422 arm_emit_movpair (temp, GEN_INT (val));
2424 emit_set_insn (temp, GEN_INT (val));
2426 /* For MINUS, the value is subtracted from, since we never
2427 have subtraction of a constant. */
2429 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2431 emit_set_insn (target,
2432 gen_rtx_fmt_ee (code, mode, source, temp));
2438 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2442 /* Return the number of instructions required to synthesize the given
2443 constant, if we start emitting them from bit-position I. */
2445 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2447 HOST_WIDE_INT temp1;
2448 int step_size = TARGET_ARM ? 2 : 1;
2451 gcc_assert (TARGET_ARM || i == 0);
2459 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2464 temp1 = remainder & ((0x0ff << end)
2465 | ((i < end) ? (0xff >> (32 - end)) : 0));
2466 remainder &= ~temp1;
2471 } while (remainder);
2476 find_best_start (unsigned HOST_WIDE_INT remainder)
2478 int best_consecutive_zeros = 0;
2482 /* If we aren't targetting ARM, the best place to start is always at
2487 for (i = 0; i < 32; i += 2)
2489 int consecutive_zeros = 0;
2491 if (!(remainder & (3 << i)))
2493 while ((i < 32) && !(remainder & (3 << i)))
2495 consecutive_zeros += 2;
2498 if (consecutive_zeros > best_consecutive_zeros)
2500 best_consecutive_zeros = consecutive_zeros;
2501 best_start = i - consecutive_zeros;
2507 /* So long as it won't require any more insns to do so, it's
2508 desirable to emit a small constant (in bits 0...9) in the last
2509 insn. This way there is more chance that it can be combined with
2510 a later addressing insn to form a pre-indexed load or store
2511 operation. Consider:
2513 *((volatile int *)0xe0000100) = 1;
2514 *((volatile int *)0xe0000110) = 2;
2516 We want this to wind up as:
2520 str rB, [rA, #0x100]
2522 str rB, [rA, #0x110]
2524 rather than having to synthesize both large constants from scratch.
2526 Therefore, we calculate how many insns would be required to emit
2527 the constant starting from `best_start', and also starting from
2528 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2529 yield a shorter sequence, we may as well use zero. */
2531 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2532 && (count_insns_for_constant (remainder, 0) <=
2533 count_insns_for_constant (remainder, best_start)))
2539 /* Emit an instruction with the indicated PATTERN. If COND is
2540 non-NULL, conditionalize the execution of the instruction on COND
2544 emit_constant_insn (rtx cond, rtx pattern)
2547 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2548 emit_insn (pattern);
2551 /* As above, but extra parameter GENERATE which, if clear, suppresses
2553 /* ??? This needs more work for thumb2. */
2556 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2557 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2562 int final_invert = 0;
2563 int can_negate_initial = 0;
2565 int num_bits_set = 0;
2566 int set_sign_bit_copies = 0;
2567 int clear_sign_bit_copies = 0;
2568 int clear_zero_bit_copies = 0;
2569 int set_zero_bit_copies = 0;
2571 unsigned HOST_WIDE_INT temp1, temp2;
2572 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2573 int step_size = TARGET_ARM ? 2 : 1;
2575 /* Find out which operations are safe for a given CODE. Also do a quick
2576 check for degenerate cases; these can occur when DImode operations
2587 can_negate_initial = 1;
2591 if (remainder == 0xffffffff)
2594 emit_constant_insn (cond,
2595 gen_rtx_SET (VOIDmode, target,
2596 GEN_INT (ARM_SIGN_EXTEND (val))));
2602 if (reload_completed && rtx_equal_p (target, source))
2606 emit_constant_insn (cond,
2607 gen_rtx_SET (VOIDmode, target, source));
2619 emit_constant_insn (cond,
2620 gen_rtx_SET (VOIDmode, target, const0_rtx));
2623 if (remainder == 0xffffffff)
2625 if (reload_completed && rtx_equal_p (target, source))
2628 emit_constant_insn (cond,
2629 gen_rtx_SET (VOIDmode, target, source));
2638 if (reload_completed && rtx_equal_p (target, source))
2641 emit_constant_insn (cond,
2642 gen_rtx_SET (VOIDmode, target, source));
2646 if (remainder == 0xffffffff)
2649 emit_constant_insn (cond,
2650 gen_rtx_SET (VOIDmode, target,
2651 gen_rtx_NOT (mode, source)));
2657 /* We treat MINUS as (val - source), since (source - val) is always
2658 passed as (source + (-val)). */
2662 emit_constant_insn (cond,
2663 gen_rtx_SET (VOIDmode, target,
2664 gen_rtx_NEG (mode, source)));
2667 if (const_ok_for_arm (val))
2670 emit_constant_insn (cond,
2671 gen_rtx_SET (VOIDmode, target,
2672 gen_rtx_MINUS (mode, GEN_INT (val),
2684 /* If we can do it in one insn get out quickly. */
2685 if (const_ok_for_arm (val)
2686 || (can_negate_initial && const_ok_for_arm (-val))
2687 || (can_invert && const_ok_for_arm (~val)))
2690 emit_constant_insn (cond,
2691 gen_rtx_SET (VOIDmode, target,
2693 ? gen_rtx_fmt_ee (code, mode, source,
2699 /* Calculate a few attributes that may be useful for specific
2701 /* Count number of leading zeros. */
2702 for (i = 31; i >= 0; i--)
2704 if ((remainder & (1 << i)) == 0)
2705 clear_sign_bit_copies++;
2710 /* Count number of leading 1's. */
2711 for (i = 31; i >= 0; i--)
2713 if ((remainder & (1 << i)) != 0)
2714 set_sign_bit_copies++;
2719 /* Count number of trailing zero's. */
2720 for (i = 0; i <= 31; i++)
2722 if ((remainder & (1 << i)) == 0)
2723 clear_zero_bit_copies++;
2728 /* Count number of trailing 1's. */
2729 for (i = 0; i <= 31; i++)
2731 if ((remainder & (1 << i)) != 0)
2732 set_zero_bit_copies++;
2740 /* See if we can use movw. */
2741 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2744 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2749 /* See if we can do this by sign_extending a constant that is known
2750 to be negative. This is a good, way of doing it, since the shift
2751 may well merge into a subsequent insn. */
2752 if (set_sign_bit_copies > 1)
2754 if (const_ok_for_arm
2755 (temp1 = ARM_SIGN_EXTEND (remainder
2756 << (set_sign_bit_copies - 1))))
2760 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2761 emit_constant_insn (cond,
2762 gen_rtx_SET (VOIDmode, new_src,
2764 emit_constant_insn (cond,
2765 gen_ashrsi3 (target, new_src,
2766 GEN_INT (set_sign_bit_copies - 1)));
2770 /* For an inverted constant, we will need to set the low bits,
2771 these will be shifted out of harm's way. */
2772 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2773 if (const_ok_for_arm (~temp1))
2777 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2778 emit_constant_insn (cond,
2779 gen_rtx_SET (VOIDmode, new_src,
2781 emit_constant_insn (cond,
2782 gen_ashrsi3 (target, new_src,
2783 GEN_INT (set_sign_bit_copies - 1)));
2789 /* See if we can calculate the value as the difference between two
2790 valid immediates. */
2791 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2793 int topshift = clear_sign_bit_copies & ~1;
2795 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2796 & (0xff000000 >> topshift));
2798 /* If temp1 is zero, then that means the 9 most significant
2799 bits of remainder were 1 and we've caused it to overflow.
2800 When topshift is 0 we don't need to do anything since we
2801 can borrow from 'bit 32'. */
2802 if (temp1 == 0 && topshift != 0)
2803 temp1 = 0x80000000 >> (topshift - 1);
2805 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2807 if (const_ok_for_arm (temp2))
2811 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2812 emit_constant_insn (cond,
2813 gen_rtx_SET (VOIDmode, new_src,
2815 emit_constant_insn (cond,
2816 gen_addsi3 (target, new_src,
2824 /* See if we can generate this by setting the bottom (or the top)
2825 16 bits, and then shifting these into the other half of the
2826 word. We only look for the simplest cases, to do more would cost
2827 too much. Be careful, however, not to generate this when the
2828 alternative would take fewer insns. */
2829 if (val & 0xffff0000)
2831 temp1 = remainder & 0xffff0000;
2832 temp2 = remainder & 0x0000ffff;
2834 /* Overlaps outside this range are best done using other methods. */
2835 for (i = 9; i < 24; i++)
2837 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2838 && !const_ok_for_arm (temp2))
2840 rtx new_src = (subtargets
2841 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2843 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2844 source, subtargets, generate);
2852 gen_rtx_ASHIFT (mode, source,
2859 /* Don't duplicate cases already considered. */
2860 for (i = 17; i < 24; i++)
2862 if (((temp1 | (temp1 >> i)) == remainder)
2863 && !const_ok_for_arm (temp1))
2865 rtx new_src = (subtargets
2866 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2868 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2869 source, subtargets, generate);
2874 gen_rtx_SET (VOIDmode, target,
2877 gen_rtx_LSHIFTRT (mode, source,
2888 /* If we have IOR or XOR, and the constant can be loaded in a
2889 single instruction, and we can find a temporary to put it in,
2890 then this can be done in two instructions instead of 3-4. */
2892 /* TARGET can't be NULL if SUBTARGETS is 0 */
2893 || (reload_completed && !reg_mentioned_p (target, source)))
2895 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2899 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2901 emit_constant_insn (cond,
2902 gen_rtx_SET (VOIDmode, sub,
2904 emit_constant_insn (cond,
2905 gen_rtx_SET (VOIDmode, target,
2906 gen_rtx_fmt_ee (code, mode,
2917 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2918 and the remainder 0s for e.g. 0xfff00000)
2919 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2921 This can be done in 2 instructions by using shifts with mov or mvn.
2926 mvn r0, r0, lsr #12 */
2927 if (set_sign_bit_copies > 8
2928 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2932 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2933 rtx shift = GEN_INT (set_sign_bit_copies);
2937 gen_rtx_SET (VOIDmode, sub,
2939 gen_rtx_ASHIFT (mode,
2944 gen_rtx_SET (VOIDmode, target,
2946 gen_rtx_LSHIFTRT (mode, sub,
2953 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2955 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2957 For eg. r0 = r0 | 0xfff
2962 if (set_zero_bit_copies > 8
2963 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2967 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2968 rtx shift = GEN_INT (set_zero_bit_copies);
2972 gen_rtx_SET (VOIDmode, sub,
2974 gen_rtx_LSHIFTRT (mode,
2979 gen_rtx_SET (VOIDmode, target,
2981 gen_rtx_ASHIFT (mode, sub,
2987 /* This will never be reached for Thumb2 because orn is a valid
2988 instruction. This is for Thumb1 and the ARM 32 bit cases.
2990 x = y | constant (such that ~constant is a valid constant)
2992 x = ~(~y & ~constant).
2994 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2998 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2999 emit_constant_insn (cond,
3000 gen_rtx_SET (VOIDmode, sub,
3001 gen_rtx_NOT (mode, source)));
3004 sub = gen_reg_rtx (mode);
3005 emit_constant_insn (cond,
3006 gen_rtx_SET (VOIDmode, sub,
3007 gen_rtx_AND (mode, source,
3009 emit_constant_insn (cond,
3010 gen_rtx_SET (VOIDmode, target,
3011 gen_rtx_NOT (mode, sub)));
3018 /* See if two shifts will do 2 or more insn's worth of work. */
3019 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3021 HOST_WIDE_INT shift_mask = ((0xffffffff
3022 << (32 - clear_sign_bit_copies))
3025 if ((remainder | shift_mask) != 0xffffffff)
3029 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3030 insns = arm_gen_constant (AND, mode, cond,
3031 remainder | shift_mask,
3032 new_src, source, subtargets, 1);
3037 rtx targ = subtargets ? NULL_RTX : target;
3038 insns = arm_gen_constant (AND, mode, cond,
3039 remainder | shift_mask,
3040 targ, source, subtargets, 0);
3046 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047 rtx shift = GEN_INT (clear_sign_bit_copies);
3049 emit_insn (gen_ashlsi3 (new_src, source, shift));
3050 emit_insn (gen_lshrsi3 (target, new_src, shift));
3056 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3058 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3060 if ((remainder | shift_mask) != 0xffffffff)
3064 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3066 insns = arm_gen_constant (AND, mode, cond,
3067 remainder | shift_mask,
3068 new_src, source, subtargets, 1);
3073 rtx targ = subtargets ? NULL_RTX : target;
3075 insns = arm_gen_constant (AND, mode, cond,
3076 remainder | shift_mask,
3077 targ, source, subtargets, 0);
3083 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3084 rtx shift = GEN_INT (clear_zero_bit_copies);
3086 emit_insn (gen_lshrsi3 (new_src, source, shift));
3087 emit_insn (gen_ashlsi3 (target, new_src, shift));
3099 for (i = 0; i < 32; i++)
3100 if (remainder & (1 << i))
3104 || (code != IOR && can_invert && num_bits_set > 16))
3105 remainder ^= 0xffffffff;
3106 else if (code == PLUS && num_bits_set > 16)
3107 remainder = (-remainder) & 0xffffffff;
3109 /* For XOR, if more than half the bits are set and there's a sequence
3110 of more than 8 consecutive ones in the pattern then we can XOR by the
3111 inverted constant and then invert the final result; this may save an
3112 instruction and might also lead to the final mvn being merged with
3113 some other operation. */
3114 else if (code == XOR && num_bits_set > 16
3115 && (count_insns_for_constant (remainder ^ 0xffffffff,
3117 (remainder ^ 0xffffffff))
3118 < count_insns_for_constant (remainder,
3119 find_best_start (remainder))))
3121 remainder ^= 0xffffffff;
3130 /* Now try and find a way of doing the job in either two or three
3132 We start by looking for the largest block of zeros that are aligned on
3133 a 2-bit boundary, we then fill up the temps, wrapping around to the
3134 top of the word when we drop off the bottom.
3135 In the worst case this code should produce no more than four insns.
3136 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3137 best place to start. */
3139 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3142 /* Now start emitting the insns. */
3143 i = find_best_start (remainder);
3150 if (remainder & (3 << (i - 2)))
3155 temp1 = remainder & ((0x0ff << end)
3156 | ((i < end) ? (0xff >> (32 - end)) : 0));
3157 remainder &= ~temp1;
3161 rtx new_src, temp1_rtx;
3163 if (code == SET || code == MINUS)
3165 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3166 if (can_invert && code != MINUS)
3171 if ((final_invert || remainder) && subtargets)
3172 new_src = gen_reg_rtx (mode);
3177 else if (can_negate)
3181 temp1 = trunc_int_for_mode (temp1, mode);
3182 temp1_rtx = GEN_INT (temp1);
3186 else if (code == MINUS)
3187 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3189 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3191 emit_constant_insn (cond,
3192 gen_rtx_SET (VOIDmode, new_src,
3202 else if (code == MINUS)
3208 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3218 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3219 gen_rtx_NOT (mode, source)));
3226 /* Canonicalize a comparison so that we are more likely to recognize it.
3227 This can be done for a few constant compares, where we can make the
3228 immediate value easier to load. */
3231 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3233 enum machine_mode mode;
3234 unsigned HOST_WIDE_INT i, maxval;
3236 mode = GET_MODE (*op0);
3237 if (mode == VOIDmode)
3238 mode = GET_MODE (*op1);
3240 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3242 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3243 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3244 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3245 for GTU/LEU in Thumb mode. */
3250 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3252 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3255 if (code == GT || code == LE
3256 || (!TARGET_ARM && (code == GTU || code == LEU)))
3258 /* Missing comparison. First try to use an available
3260 if (GET_CODE (*op1) == CONST_INT)
3268 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3270 *op1 = GEN_INT (i + 1);
3271 return code == GT ? GE : LT;
3276 if (i != ~((unsigned HOST_WIDE_INT) 0)
3277 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3279 *op1 = GEN_INT (i + 1);
3280 return code == GTU ? GEU : LTU;
3288 /* If that did not work, reverse the condition. */
3292 return swap_condition (code);
3298 /* Comparisons smaller than DImode. Only adjust comparisons against
3299 an out-of-range constant. */
3300 if (GET_CODE (*op1) != CONST_INT
3301 || const_ok_for_arm (INTVAL (*op1))
3302 || const_ok_for_arm (- INTVAL (*op1)))
3316 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3318 *op1 = GEN_INT (i + 1);
3319 return code == GT ? GE : LT;
3326 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3328 *op1 = GEN_INT (i - 1);
3329 return code == GE ? GT : LE;
3335 if (i != ~((unsigned HOST_WIDE_INT) 0)
3336 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3338 *op1 = GEN_INT (i + 1);
3339 return code == GTU ? GEU : LTU;
3346 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3348 *op1 = GEN_INT (i - 1);
3349 return code == GEU ? GTU : LEU;
3361 /* Define how to find the value returned by a function. */
3364 arm_function_value(const_tree type, const_tree func,
3365 bool outgoing ATTRIBUTE_UNUSED)
3367 enum machine_mode mode;
3368 int unsignedp ATTRIBUTE_UNUSED;
3369 rtx r ATTRIBUTE_UNUSED;
3371 mode = TYPE_MODE (type);
3373 if (TARGET_AAPCS_BASED)
3374 return aapcs_allocate_return_reg (mode, type, func);
3376 /* Promote integer types. */
3377 if (INTEGRAL_TYPE_P (type))
3378 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3380 /* Promotes small structs returned in a register to full-word size
3381 for big-endian AAPCS. */
3382 if (arm_return_in_msb (type))
3384 HOST_WIDE_INT size = int_size_in_bytes (type);
3385 if (size % UNITS_PER_WORD != 0)
3387 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3388 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3392 return LIBCALL_VALUE (mode);
3396 libcall_eq (const void *p1, const void *p2)
3398 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3402 libcall_hash (const void *p1)
3404 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3408 add_libcall (htab_t htab, rtx libcall)
3410 *htab_find_slot (htab, libcall, INSERT) = libcall;
3414 arm_libcall_uses_aapcs_base (const_rtx libcall)
3416 static bool init_done = false;
3417 static htab_t libcall_htab;
3423 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3425 add_libcall (libcall_htab,
3426 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3427 add_libcall (libcall_htab,
3428 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3429 add_libcall (libcall_htab,
3430 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3431 add_libcall (libcall_htab,
3432 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3434 add_libcall (libcall_htab,
3435 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3436 add_libcall (libcall_htab,
3437 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3438 add_libcall (libcall_htab,
3439 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3440 add_libcall (libcall_htab,
3441 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3443 add_libcall (libcall_htab,
3444 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3445 add_libcall (libcall_htab,
3446 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3447 add_libcall (libcall_htab,
3448 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3449 add_libcall (libcall_htab,
3450 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3451 add_libcall (libcall_htab,
3452 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3453 add_libcall (libcall_htab,
3454 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3457 return libcall && htab_find (libcall_htab, libcall) != NULL;
3461 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3463 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3464 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3466 /* The following libcalls return their result in integer registers,
3467 even though they return a floating point value. */
3468 if (arm_libcall_uses_aapcs_base (libcall))
3469 return gen_rtx_REG (mode, ARG_REGISTER(1));
3473 return LIBCALL_VALUE (mode);
3476 /* Determine the amount of memory needed to store the possible return
3477 registers of an untyped call. */
3479 arm_apply_result_size (void)
3485 if (TARGET_HARD_FLOAT_ABI)
3491 if (TARGET_MAVERICK)
3494 if (TARGET_IWMMXT_ABI)
3501 /* Decide whether TYPE should be returned in memory (true)
3502 or in a register (false). FNTYPE is the type of the function making
3505 arm_return_in_memory (const_tree type, const_tree fntype)
3509 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3511 if (TARGET_AAPCS_BASED)
3513 /* Simple, non-aggregate types (ie not including vectors and
3514 complex) are always returned in a register (or registers).
3515 We don't care about which register here, so we can short-cut
3516 some of the detail. */
3517 if (!AGGREGATE_TYPE_P (type)
3518 && TREE_CODE (type) != VECTOR_TYPE
3519 && TREE_CODE (type) != COMPLEX_TYPE)
3522 /* Any return value that is no larger than one word can be
3524 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3527 /* Check any available co-processors to see if they accept the
3528 type as a register candidate (VFP, for example, can return
3529 some aggregates in consecutive registers). These aren't
3530 available if the call is variadic. */
3531 if (aapcs_select_return_coproc (type, fntype) >= 0)
3534 /* Vector values should be returned using ARM registers, not
3535 memory (unless they're over 16 bytes, which will break since
3536 we only have four call-clobbered registers to play with). */
3537 if (TREE_CODE (type) == VECTOR_TYPE)
3538 return (size < 0 || size > (4 * UNITS_PER_WORD));
3540 /* The rest go in memory. */
3544 if (TREE_CODE (type) == VECTOR_TYPE)
3545 return (size < 0 || size > (4 * UNITS_PER_WORD));
3547 if (!AGGREGATE_TYPE_P (type) &&
3548 (TREE_CODE (type) != VECTOR_TYPE))
3549 /* All simple types are returned in registers. */
3552 if (arm_abi != ARM_ABI_APCS)
3554 /* ATPCS and later return aggregate types in memory only if they are
3555 larger than a word (or are variable size). */
3556 return (size < 0 || size > UNITS_PER_WORD);
3559 /* For the arm-wince targets we choose to be compatible with Microsoft's
3560 ARM and Thumb compilers, which always return aggregates in memory. */
3562 /* All structures/unions bigger than one word are returned in memory.
3563 Also catch the case where int_size_in_bytes returns -1. In this case
3564 the aggregate is either huge or of variable size, and in either case
3565 we will want to return it via memory and not in a register. */
3566 if (size < 0 || size > UNITS_PER_WORD)
3569 if (TREE_CODE (type) == RECORD_TYPE)
3573 /* For a struct the APCS says that we only return in a register
3574 if the type is 'integer like' and every addressable element
3575 has an offset of zero. For practical purposes this means
3576 that the structure can have at most one non bit-field element
3577 and that this element must be the first one in the structure. */
3579 /* Find the first field, ignoring non FIELD_DECL things which will
3580 have been created by C++. */
3581 for (field = TYPE_FIELDS (type);
3582 field && TREE_CODE (field) != FIELD_DECL;
3583 field = DECL_CHAIN (field))
3587 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3589 /* Check that the first field is valid for returning in a register. */
3591 /* ... Floats are not allowed */
3592 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3595 /* ... Aggregates that are not themselves valid for returning in
3596 a register are not allowed. */
3597 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3600 /* Now check the remaining fields, if any. Only bitfields are allowed,
3601 since they are not addressable. */
3602 for (field = DECL_CHAIN (field);
3604 field = DECL_CHAIN (field))
3606 if (TREE_CODE (field) != FIELD_DECL)
3609 if (!DECL_BIT_FIELD_TYPE (field))
3616 if (TREE_CODE (type) == UNION_TYPE)
3620 /* Unions can be returned in registers if every element is
3621 integral, or can be returned in an integer register. */
3622 for (field = TYPE_FIELDS (type);
3624 field = DECL_CHAIN (field))
3626 if (TREE_CODE (field) != FIELD_DECL)
3629 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3632 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3638 #endif /* not ARM_WINCE */
3640 /* Return all other types in memory. */
3644 /* Indicate whether or not words of a double are in big-endian order. */
3647 arm_float_words_big_endian (void)
3649 if (TARGET_MAVERICK)
3652 /* For FPA, float words are always big-endian. For VFP, floats words
3653 follow the memory system mode. */
3661 return (TARGET_BIG_END ? 1 : 0);
3666 const struct pcs_attribute_arg
3670 } pcs_attribute_args[] =
3672 {"aapcs", ARM_PCS_AAPCS},
3673 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3675 /* We could recognize these, but changes would be needed elsewhere
3676 * to implement them. */
3677 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3678 {"atpcs", ARM_PCS_ATPCS},
3679 {"apcs", ARM_PCS_APCS},
3681 {NULL, ARM_PCS_UNKNOWN}
3685 arm_pcs_from_attribute (tree attr)
3687 const struct pcs_attribute_arg *ptr;
3690 /* Get the value of the argument. */
3691 if (TREE_VALUE (attr) == NULL_TREE
3692 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3693 return ARM_PCS_UNKNOWN;
3695 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3697 /* Check it against the list of known arguments. */
3698 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3699 if (streq (arg, ptr->arg))
3702 /* An unrecognized interrupt type. */
3703 return ARM_PCS_UNKNOWN;
3706 /* Get the PCS variant to use for this call. TYPE is the function's type
3707 specification, DECL is the specific declartion. DECL may be null if
3708 the call could be indirect or if this is a library call. */
3710 arm_get_pcs_model (const_tree type, const_tree decl)
3712 bool user_convention = false;
3713 enum arm_pcs user_pcs = arm_pcs_default;
3718 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3721 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3722 user_convention = true;
3725 if (TARGET_AAPCS_BASED)
3727 /* Detect varargs functions. These always use the base rules
3728 (no argument is ever a candidate for a co-processor
3730 bool base_rules = stdarg_p (type);
3732 if (user_convention)
3734 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3735 sorry ("Non-AAPCS derived PCS variant");
3736 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3737 error ("Variadic functions must use the base AAPCS variant");
3741 return ARM_PCS_AAPCS;
3742 else if (user_convention)
3744 else if (decl && flag_unit_at_a_time)
3746 /* Local functions never leak outside this compilation unit,
3747 so we are free to use whatever conventions are
3749 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3750 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3752 return ARM_PCS_AAPCS_LOCAL;
3755 else if (user_convention && user_pcs != arm_pcs_default)
3756 sorry ("PCS variant");
3758 /* For everything else we use the target's default. */
3759 return arm_pcs_default;
3764 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3765 const_tree fntype ATTRIBUTE_UNUSED,
3766 rtx libcall ATTRIBUTE_UNUSED,
3767 const_tree fndecl ATTRIBUTE_UNUSED)
3769 /* Record the unallocated VFP registers. */
3770 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3771 pcum->aapcs_vfp_reg_alloc = 0;
3774 /* Walk down the type tree of TYPE counting consecutive base elements.
3775 If *MODEP is VOIDmode, then set it to the first valid floating point
3776 type. If a non-floating point type is found, or if a floating point
3777 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3778 otherwise return the count in the sub-tree. */
3780 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3782 enum machine_mode mode;
3785 switch (TREE_CODE (type))
3788 mode = TYPE_MODE (type);
3789 if (mode != DFmode && mode != SFmode)
3792 if (*modep == VOIDmode)
3801 mode = TYPE_MODE (TREE_TYPE (type));
3802 if (mode != DFmode && mode != SFmode)
3805 if (*modep == VOIDmode)
3814 /* Use V2SImode and V4SImode as representatives of all 64-bit
3815 and 128-bit vector types, whether or not those modes are
3816 supported with the present options. */
3817 size = int_size_in_bytes (type);
3830 if (*modep == VOIDmode)
3833 /* Vector modes are considered to be opaque: two vectors are
3834 equivalent for the purposes of being homogeneous aggregates
3835 if they are the same size. */
3844 tree index = TYPE_DOMAIN (type);
3846 /* Can't handle incomplete types. */
3847 if (!COMPLETE_TYPE_P(type))
3850 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3853 || !TYPE_MAX_VALUE (index)
3854 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3855 || !TYPE_MIN_VALUE (index)
3856 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3860 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3861 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3863 /* There must be no padding. */
3864 if (!host_integerp (TYPE_SIZE (type), 1)
3865 || (tree_low_cst (TYPE_SIZE (type), 1)
3866 != count * GET_MODE_BITSIZE (*modep)))
3878 /* Can't handle incomplete types. */
3879 if (!COMPLETE_TYPE_P(type))
3882 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3884 if (TREE_CODE (field) != FIELD_DECL)
3887 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3893 /* There must be no padding. */
3894 if (!host_integerp (TYPE_SIZE (type), 1)
3895 || (tree_low_cst (TYPE_SIZE (type), 1)
3896 != count * GET_MODE_BITSIZE (*modep)))
3903 case QUAL_UNION_TYPE:
3905 /* These aren't very interesting except in a degenerate case. */
3910 /* Can't handle incomplete types. */
3911 if (!COMPLETE_TYPE_P(type))
3914 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3916 if (TREE_CODE (field) != FIELD_DECL)
3919 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3922 count = count > sub_count ? count : sub_count;
3925 /* There must be no padding. */
3926 if (!host_integerp (TYPE_SIZE (type), 1)
3927 || (tree_low_cst (TYPE_SIZE (type), 1)
3928 != count * GET_MODE_BITSIZE (*modep)))
3941 /* Return true if PCS_VARIANT should use VFP registers. */
3943 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3945 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3947 static bool seen_thumb1_vfp = false;
3949 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3951 sorry ("Thumb-1 hard-float VFP ABI");
3952 /* sorry() is not immediately fatal, so only display this once. */
3953 seen_thumb1_vfp = true;
3959 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3962 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3963 (TARGET_VFP_DOUBLE || !is_double));
3967 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3968 enum machine_mode mode, const_tree type,
3969 enum machine_mode *base_mode, int *count)
3971 enum machine_mode new_mode = VOIDmode;
3973 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3974 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3975 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3980 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3983 new_mode = (mode == DCmode ? DFmode : SFmode);
3985 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3987 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3989 if (ag_count > 0 && ag_count <= 4)
3998 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4001 *base_mode = new_mode;
4006 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4007 enum machine_mode mode, const_tree type)
4009 int count ATTRIBUTE_UNUSED;
4010 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4012 if (!use_vfp_abi (pcs_variant, false))
4014 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4019 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4022 if (!use_vfp_abi (pcum->pcs_variant, false))
4025 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4026 &pcum->aapcs_vfp_rmode,
4027 &pcum->aapcs_vfp_rcount);
4031 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4032 const_tree type ATTRIBUTE_UNUSED)
4034 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4035 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4038 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4039 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4041 pcum->aapcs_vfp_reg_alloc = mask << regno;
4042 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4045 int rcount = pcum->aapcs_vfp_rcount;
4047 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4051 /* Avoid using unsupported vector modes. */
4052 if (rmode == V2SImode)
4054 else if (rmode == V4SImode)
4061 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4062 for (i = 0; i < rcount; i++)
4064 rtx tmp = gen_rtx_REG (rmode,
4065 FIRST_VFP_REGNUM + regno + i * rshift);
4066 tmp = gen_rtx_EXPR_LIST
4068 GEN_INT (i * GET_MODE_SIZE (rmode)));
4069 XVECEXP (par, 0, i) = tmp;
4072 pcum->aapcs_reg = par;
4075 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4082 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4083 enum machine_mode mode,
4084 const_tree type ATTRIBUTE_UNUSED)
4086 if (!use_vfp_abi (pcs_variant, false))
4089 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4092 enum machine_mode ag_mode;
4097 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4102 if (ag_mode == V2SImode)
4104 else if (ag_mode == V4SImode)
4110 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4111 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4112 for (i = 0; i < count; i++)
4114 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4115 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4116 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4117 XVECEXP (par, 0, i) = tmp;
4123 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4127 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4128 enum machine_mode mode ATTRIBUTE_UNUSED,
4129 const_tree type ATTRIBUTE_UNUSED)
4131 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4132 pcum->aapcs_vfp_reg_alloc = 0;
4136 #define AAPCS_CP(X) \
4138 aapcs_ ## X ## _cum_init, \
4139 aapcs_ ## X ## _is_call_candidate, \
4140 aapcs_ ## X ## _allocate, \
4141 aapcs_ ## X ## _is_return_candidate, \
4142 aapcs_ ## X ## _allocate_return_reg, \
4143 aapcs_ ## X ## _advance \
4146 /* Table of co-processors that can be used to pass arguments in
4147 registers. Idealy no arugment should be a candidate for more than
4148 one co-processor table entry, but the table is processed in order
4149 and stops after the first match. If that entry then fails to put
4150 the argument into a co-processor register, the argument will go on
4154 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4155 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4157 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4158 BLKmode) is a candidate for this co-processor's registers; this
4159 function should ignore any position-dependent state in
4160 CUMULATIVE_ARGS and only use call-type dependent information. */
4161 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4163 /* Return true if the argument does get a co-processor register; it
4164 should set aapcs_reg to an RTX of the register allocated as is
4165 required for a return from FUNCTION_ARG. */
4166 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4168 /* Return true if a result of mode MODE (or type TYPE if MODE is
4169 BLKmode) is can be returned in this co-processor's registers. */
4170 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4172 /* Allocate and return an RTX element to hold the return type of a
4173 call, this routine must not fail and will only be called if
4174 is_return_candidate returned true with the same parameters. */
4175 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4177 /* Finish processing this argument and prepare to start processing
4179 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4180 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4188 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4193 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4194 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4201 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4203 /* We aren't passed a decl, so we can't check that a call is local.
4204 However, it isn't clear that that would be a win anyway, since it
4205 might limit some tail-calling opportunities. */
4206 enum arm_pcs pcs_variant;
4210 const_tree fndecl = NULL_TREE;
4212 if (TREE_CODE (fntype) == FUNCTION_DECL)
4215 fntype = TREE_TYPE (fntype);
4218 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4221 pcs_variant = arm_pcs_default;
4223 if (pcs_variant != ARM_PCS_AAPCS)
4227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4228 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4237 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4240 /* We aren't passed a decl, so we can't check that a call is local.
4241 However, it isn't clear that that would be a win anyway, since it
4242 might limit some tail-calling opportunities. */
4243 enum arm_pcs pcs_variant;
4244 int unsignedp ATTRIBUTE_UNUSED;
4248 const_tree fndecl = NULL_TREE;
4250 if (TREE_CODE (fntype) == FUNCTION_DECL)
4253 fntype = TREE_TYPE (fntype);
4256 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4259 pcs_variant = arm_pcs_default;
4261 /* Promote integer types. */
4262 if (type && INTEGRAL_TYPE_P (type))
4263 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4265 if (pcs_variant != ARM_PCS_AAPCS)
4269 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4270 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4272 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4276 /* Promotes small structs returned in a register to full-word size
4277 for big-endian AAPCS. */
4278 if (type && arm_return_in_msb (type))
4280 HOST_WIDE_INT size = int_size_in_bytes (type);
4281 if (size % UNITS_PER_WORD != 0)
4283 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4284 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4288 return gen_rtx_REG (mode, R0_REGNUM);
4292 aapcs_libcall_value (enum machine_mode mode)
4294 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4297 /* Lay out a function argument using the AAPCS rules. The rule
4298 numbers referred to here are those in the AAPCS. */
4300 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4301 const_tree type, bool named)
4306 /* We only need to do this once per argument. */
4307 if (pcum->aapcs_arg_processed)
4310 pcum->aapcs_arg_processed = true;
4312 /* Special case: if named is false then we are handling an incoming
4313 anonymous argument which is on the stack. */
4317 /* Is this a potential co-processor register candidate? */
4318 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4320 int slot = aapcs_select_call_coproc (pcum, mode, type);
4321 pcum->aapcs_cprc_slot = slot;
4323 /* We don't have to apply any of the rules from part B of the
4324 preparation phase, these are handled elsewhere in the
4329 /* A Co-processor register candidate goes either in its own
4330 class of registers or on the stack. */
4331 if (!pcum->aapcs_cprc_failed[slot])
4333 /* C1.cp - Try to allocate the argument to co-processor
4335 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4338 /* C2.cp - Put the argument on the stack and note that we
4339 can't assign any more candidates in this slot. We also
4340 need to note that we have allocated stack space, so that
4341 we won't later try to split a non-cprc candidate between
4342 core registers and the stack. */
4343 pcum->aapcs_cprc_failed[slot] = true;
4344 pcum->can_split = false;
4347 /* We didn't get a register, so this argument goes on the
4349 gcc_assert (pcum->can_split == false);
4354 /* C3 - For double-word aligned arguments, round the NCRN up to the
4355 next even number. */
4356 ncrn = pcum->aapcs_ncrn;
4357 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4360 nregs = ARM_NUM_REGS2(mode, type);
4362 /* Sigh, this test should really assert that nregs > 0, but a GCC
4363 extension allows empty structs and then gives them empty size; it
4364 then allows such a structure to be passed by value. For some of
4365 the code below we have to pretend that such an argument has
4366 non-zero size so that we 'locate' it correctly either in
4367 registers or on the stack. */
4368 gcc_assert (nregs >= 0);
4370 nregs2 = nregs ? nregs : 1;
4372 /* C4 - Argument fits entirely in core registers. */
4373 if (ncrn + nregs2 <= NUM_ARG_REGS)
4375 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4376 pcum->aapcs_next_ncrn = ncrn + nregs;
4380 /* C5 - Some core registers left and there are no arguments already
4381 on the stack: split this argument between the remaining core
4382 registers and the stack. */
4383 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4385 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4386 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4387 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4391 /* C6 - NCRN is set to 4. */
4392 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4394 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4398 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4399 for a call to a function whose data type is FNTYPE.
4400 For a library call, FNTYPE is NULL. */
4402 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4404 tree fndecl ATTRIBUTE_UNUSED)
4406 /* Long call handling. */
4408 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4410 pcum->pcs_variant = arm_pcs_default;
4412 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4414 if (arm_libcall_uses_aapcs_base (libname))
4415 pcum->pcs_variant = ARM_PCS_AAPCS;
4417 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4418 pcum->aapcs_reg = NULL_RTX;
4419 pcum->aapcs_partial = 0;
4420 pcum->aapcs_arg_processed = false;
4421 pcum->aapcs_cprc_slot = -1;
4422 pcum->can_split = true;
4424 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4428 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4430 pcum->aapcs_cprc_failed[i] = false;
4431 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4439 /* On the ARM, the offset starts at 0. */
4441 pcum->iwmmxt_nregs = 0;
4442 pcum->can_split = true;
4444 /* Varargs vectors are treated the same as long long.
4445 named_count avoids having to change the way arm handles 'named' */
4446 pcum->named_count = 0;
4449 if (TARGET_REALLY_IWMMXT && fntype)
4453 for (fn_arg = TYPE_ARG_TYPES (fntype);
4455 fn_arg = TREE_CHAIN (fn_arg))
4456 pcum->named_count += 1;
4458 if (! pcum->named_count)
4459 pcum->named_count = INT_MAX;
4464 /* Return true if mode/type need doubleword alignment. */
4466 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4468 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4469 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4473 /* Determine where to put an argument to a function.
4474 Value is zero to push the argument on the stack,
4475 or a hard register in which to store the argument.
4477 MODE is the argument's machine mode.
4478 TYPE is the data type of the argument (as a tree).
4479 This is null for libcalls where that information may
4481 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4482 the preceding args and about the function being called.
4483 NAMED is nonzero if this argument is a named parameter
4484 (otherwise it is an extra parameter matching an ellipsis).
4486 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4487 other arguments are passed on the stack. If (NAMED == 0) (which happens
4488 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4489 defined), say it is passed in the stack (function_prologue will
4490 indeed make it pass in the stack if necessary). */
4493 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4494 const_tree type, bool named)
4498 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4499 a call insn (op3 of a call_value insn). */
4500 if (mode == VOIDmode)
4503 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4505 aapcs_layout_arg (pcum, mode, type, named);
4506 return pcum->aapcs_reg;
4509 /* Varargs vectors are treated the same as long long.
4510 named_count avoids having to change the way arm handles 'named' */
4511 if (TARGET_IWMMXT_ABI
4512 && arm_vector_mode_supported_p (mode)
4513 && pcum->named_count > pcum->nargs + 1)
4515 if (pcum->iwmmxt_nregs <= 9)
4516 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4519 pcum->can_split = false;
4524 /* Put doubleword aligned quantities in even register pairs. */
4526 && ARM_DOUBLEWORD_ALIGN
4527 && arm_needs_doubleword_align (mode, type))
4530 /* Only allow splitting an arg between regs and memory if all preceding
4531 args were allocated to regs. For args passed by reference we only count
4532 the reference pointer. */
4533 if (pcum->can_split)
4536 nregs = ARM_NUM_REGS2 (mode, type);
4538 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4541 return gen_rtx_REG (mode, pcum->nregs);
4545 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4546 tree type, bool named)
4548 int nregs = pcum->nregs;
4550 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4552 aapcs_layout_arg (pcum, mode, type, named);
4553 return pcum->aapcs_partial;
4556 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4559 if (NUM_ARG_REGS > nregs
4560 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4562 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4567 /* Update the data in PCUM to advance over an argument
4568 of mode MODE and data type TYPE.
4569 (TYPE is null for libcalls where that information may not be available.) */
4572 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4573 const_tree type, bool named)
4575 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4577 aapcs_layout_arg (pcum, mode, type, named);
4579 if (pcum->aapcs_cprc_slot >= 0)
4581 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4583 pcum->aapcs_cprc_slot = -1;
4586 /* Generic stuff. */
4587 pcum->aapcs_arg_processed = false;
4588 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4589 pcum->aapcs_reg = NULL_RTX;
4590 pcum->aapcs_partial = 0;
4595 if (arm_vector_mode_supported_p (mode)
4596 && pcum->named_count > pcum->nargs
4597 && TARGET_IWMMXT_ABI)
4598 pcum->iwmmxt_nregs += 1;
4600 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4604 /* Variable sized types are passed by reference. This is a GCC
4605 extension to the ARM ABI. */
4608 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4609 enum machine_mode mode ATTRIBUTE_UNUSED,
4610 const_tree type, bool named ATTRIBUTE_UNUSED)
4612 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4615 /* Encode the current state of the #pragma [no_]long_calls. */
4618 OFF, /* No #pragma [no_]long_calls is in effect. */
4619 LONG, /* #pragma long_calls is in effect. */
4620 SHORT /* #pragma no_long_calls is in effect. */
4623 static arm_pragma_enum arm_pragma_long_calls = OFF;
4626 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4628 arm_pragma_long_calls = LONG;
4632 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4634 arm_pragma_long_calls = SHORT;
4638 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4640 arm_pragma_long_calls = OFF;
4643 /* Handle an attribute requiring a FUNCTION_DECL;
4644 arguments as in struct attribute_spec.handler. */
4646 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4647 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4649 if (TREE_CODE (*node) != FUNCTION_DECL)
4651 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4653 *no_add_attrs = true;
4659 /* Handle an "interrupt" or "isr" attribute;
4660 arguments as in struct attribute_spec.handler. */
4662 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4667 if (TREE_CODE (*node) != FUNCTION_DECL)
4669 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4671 *no_add_attrs = true;
4673 /* FIXME: the argument if any is checked for type attributes;
4674 should it be checked for decl ones? */
4678 if (TREE_CODE (*node) == FUNCTION_TYPE
4679 || TREE_CODE (*node) == METHOD_TYPE)
4681 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4683 warning (OPT_Wattributes, "%qE attribute ignored",
4685 *no_add_attrs = true;
4688 else if (TREE_CODE (*node) == POINTER_TYPE
4689 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4690 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4691 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4693 *node = build_variant_type_copy (*node);
4694 TREE_TYPE (*node) = build_type_attribute_variant
4696 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4697 *no_add_attrs = true;
4701 /* Possibly pass this attribute on from the type to a decl. */
4702 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4703 | (int) ATTR_FLAG_FUNCTION_NEXT
4704 | (int) ATTR_FLAG_ARRAY_NEXT))
4706 *no_add_attrs = true;
4707 return tree_cons (name, args, NULL_TREE);
4711 warning (OPT_Wattributes, "%qE attribute ignored",
4720 /* Handle a "pcs" attribute; arguments as in struct
4721 attribute_spec.handler. */
4723 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4724 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4726 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4728 warning (OPT_Wattributes, "%qE attribute ignored", name);
4729 *no_add_attrs = true;
4734 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4735 /* Handle the "notshared" attribute. This attribute is another way of
4736 requesting hidden visibility. ARM's compiler supports
4737 "__declspec(notshared)"; we support the same thing via an
4741 arm_handle_notshared_attribute (tree *node,
4742 tree name ATTRIBUTE_UNUSED,
4743 tree args ATTRIBUTE_UNUSED,
4744 int flags ATTRIBUTE_UNUSED,
4747 tree decl = TYPE_NAME (*node);
4751 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4752 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4753 *no_add_attrs = false;
4759 /* Return 0 if the attributes for two types are incompatible, 1 if they
4760 are compatible, and 2 if they are nearly compatible (which causes a
4761 warning to be generated). */
4763 arm_comp_type_attributes (const_tree type1, const_tree type2)
4767 /* Check for mismatch of non-default calling convention. */
4768 if (TREE_CODE (type1) != FUNCTION_TYPE)
4771 /* Check for mismatched call attributes. */
4772 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4773 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4774 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4775 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4777 /* Only bother to check if an attribute is defined. */
4778 if (l1 | l2 | s1 | s2)
4780 /* If one type has an attribute, the other must have the same attribute. */
4781 if ((l1 != l2) || (s1 != s2))
4784 /* Disallow mixed attributes. */
4785 if ((l1 & s2) || (l2 & s1))
4789 /* Check for mismatched ISR attribute. */
4790 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4792 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4793 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4795 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4802 /* Assigns default attributes to newly defined type. This is used to
4803 set short_call/long_call attributes for function types of
4804 functions defined inside corresponding #pragma scopes. */
4806 arm_set_default_type_attributes (tree type)
4808 /* Add __attribute__ ((long_call)) to all functions, when
4809 inside #pragma long_calls or __attribute__ ((short_call)),
4810 when inside #pragma no_long_calls. */
4811 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4813 tree type_attr_list, attr_name;
4814 type_attr_list = TYPE_ATTRIBUTES (type);
4816 if (arm_pragma_long_calls == LONG)
4817 attr_name = get_identifier ("long_call");
4818 else if (arm_pragma_long_calls == SHORT)
4819 attr_name = get_identifier ("short_call");
4823 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4824 TYPE_ATTRIBUTES (type) = type_attr_list;
4828 /* Return true if DECL is known to be linked into section SECTION. */
4831 arm_function_in_section_p (tree decl, section *section)
4833 /* We can only be certain about functions defined in the same
4834 compilation unit. */
4835 if (!TREE_STATIC (decl))
4838 /* Make sure that SYMBOL always binds to the definition in this
4839 compilation unit. */
4840 if (!targetm.binds_local_p (decl))
4843 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4844 if (!DECL_SECTION_NAME (decl))
4846 /* Make sure that we will not create a unique section for DECL. */
4847 if (flag_function_sections || DECL_ONE_ONLY (decl))
4851 return function_section (decl) == section;
4854 /* Return nonzero if a 32-bit "long_call" should be generated for
4855 a call from the current function to DECL. We generate a long_call
4858 a. has an __attribute__((long call))
4859 or b. is within the scope of a #pragma long_calls
4860 or c. the -mlong-calls command line switch has been specified
4862 However we do not generate a long call if the function:
4864 d. has an __attribute__ ((short_call))
4865 or e. is inside the scope of a #pragma no_long_calls
4866 or f. is defined in the same section as the current function. */
4869 arm_is_long_call_p (tree decl)
4874 return TARGET_LONG_CALLS;
4876 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4877 if (lookup_attribute ("short_call", attrs))
4880 /* For "f", be conservative, and only cater for cases in which the
4881 whole of the current function is placed in the same section. */
4882 if (!flag_reorder_blocks_and_partition
4883 && TREE_CODE (decl) == FUNCTION_DECL
4884 && arm_function_in_section_p (decl, current_function_section ()))
4887 if (lookup_attribute ("long_call", attrs))
4890 return TARGET_LONG_CALLS;
4893 /* Return nonzero if it is ok to make a tail-call to DECL. */
4895 arm_function_ok_for_sibcall (tree decl, tree exp)
4897 unsigned long func_type;
4899 if (cfun->machine->sibcall_blocked)
4902 /* Never tailcall something for which we have no decl, or if we
4903 are generating code for Thumb-1. */
4904 if (decl == NULL || TARGET_THUMB1)
4907 /* The PIC register is live on entry to VxWorks PLT entries, so we
4908 must make the call before restoring the PIC register. */
4909 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4912 /* Cannot tail-call to long calls, since these are out of range of
4913 a branch instruction. */
4914 if (arm_is_long_call_p (decl))
4917 /* If we are interworking and the function is not declared static
4918 then we can't tail-call it unless we know that it exists in this
4919 compilation unit (since it might be a Thumb routine). */
4920 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4923 func_type = arm_current_func_type ();
4924 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4925 if (IS_INTERRUPT (func_type))
4928 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4930 /* Check that the return value locations are the same. For
4931 example that we aren't returning a value from the sibling in
4932 a VFP register but then need to transfer it to a core
4936 a = arm_function_value (TREE_TYPE (exp), decl, false);
4937 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4939 if (!rtx_equal_p (a, b))
4943 /* Never tailcall if function may be called with a misaligned SP. */
4944 if (IS_STACKALIGN (func_type))
4947 /* Everything else is ok. */
4952 /* Addressing mode support functions. */
4954 /* Return nonzero if X is a legitimate immediate operand when compiling
4955 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4957 legitimate_pic_operand_p (rtx x)
4959 if (GET_CODE (x) == SYMBOL_REF
4960 || (GET_CODE (x) == CONST
4961 && GET_CODE (XEXP (x, 0)) == PLUS
4962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4968 /* Record that the current function needs a PIC register. Initialize
4969 cfun->machine->pic_reg if we have not already done so. */
4972 require_pic_register (void)
4974 /* A lot of the logic here is made obscure by the fact that this
4975 routine gets called as part of the rtx cost estimation process.
4976 We don't want those calls to affect any assumptions about the real
4977 function; and further, we can't call entry_of_function() until we
4978 start the real expansion process. */
4979 if (!crtl->uses_pic_offset_table)
4981 gcc_assert (can_create_pseudo_p ());
4982 if (arm_pic_register != INVALID_REGNUM)
4984 if (!cfun->machine->pic_reg)
4985 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4987 /* Play games to avoid marking the function as needing pic
4988 if we are being called as part of the cost-estimation
4990 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4991 crtl->uses_pic_offset_table = 1;
4997 if (!cfun->machine->pic_reg)
4998 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5000 /* Play games to avoid marking the function as needing pic
5001 if we are being called as part of the cost-estimation
5003 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5005 crtl->uses_pic_offset_table = 1;
5008 arm_load_pic_register (0UL);
5012 /* We can be called during expansion of PHI nodes, where
5013 we can't yet emit instructions directly in the final
5014 insn stream. Queue the insns on the entry edge, they will
5015 be committed after everything else is expanded. */
5016 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5023 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5025 if (GET_CODE (orig) == SYMBOL_REF
5026 || GET_CODE (orig) == LABEL_REF)
5032 gcc_assert (can_create_pseudo_p ());
5033 reg = gen_reg_rtx (Pmode);
5036 /* VxWorks does not impose a fixed gap between segments; the run-time
5037 gap can be different from the object-file gap. We therefore can't
5038 use GOTOFF unless we are absolutely sure that the symbol is in the
5039 same segment as the GOT. Unfortunately, the flexibility of linker
5040 scripts means that we can't be sure of that in general, so assume
5041 that GOTOFF is never valid on VxWorks. */
5042 if ((GET_CODE (orig) == LABEL_REF
5043 || (GET_CODE (orig) == SYMBOL_REF &&
5044 SYMBOL_REF_LOCAL_P (orig)))
5046 && !TARGET_VXWORKS_RTP)
5047 insn = arm_pic_static_addr (orig, reg);
5053 /* If this function doesn't have a pic register, create one now. */
5054 require_pic_register ();
5056 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5058 /* Make the MEM as close to a constant as possible. */
5059 mem = SET_SRC (pat);
5060 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5061 MEM_READONLY_P (mem) = 1;
5062 MEM_NOTRAP_P (mem) = 1;
5064 insn = emit_insn (pat);
5067 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5069 set_unique_reg_note (insn, REG_EQUAL, orig);
5073 else if (GET_CODE (orig) == CONST)
5077 if (GET_CODE (XEXP (orig, 0)) == PLUS
5078 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5081 /* Handle the case where we have: const (UNSPEC_TLS). */
5082 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5083 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5086 /* Handle the case where we have:
5087 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5089 if (GET_CODE (XEXP (orig, 0)) == PLUS
5090 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5091 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5093 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5099 gcc_assert (can_create_pseudo_p ());
5100 reg = gen_reg_rtx (Pmode);
5103 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5105 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5106 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5107 base == reg ? 0 : reg);
5109 if (GET_CODE (offset) == CONST_INT)
5111 /* The base register doesn't really matter, we only want to
5112 test the index for the appropriate mode. */
5113 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5115 gcc_assert (can_create_pseudo_p ());
5116 offset = force_reg (Pmode, offset);
5119 if (GET_CODE (offset) == CONST_INT)
5120 return plus_constant (base, INTVAL (offset));
5123 if (GET_MODE_SIZE (mode) > 4
5124 && (GET_MODE_CLASS (mode) == MODE_INT
5125 || TARGET_SOFT_FLOAT))
5127 emit_insn (gen_addsi3 (reg, base, offset));
5131 return gen_rtx_PLUS (Pmode, base, offset);
5138 /* Find a spare register to use during the prolog of a function. */
5141 thumb_find_work_register (unsigned long pushed_regs_mask)
5145 /* Check the argument registers first as these are call-used. The
5146 register allocation order means that sometimes r3 might be used
5147 but earlier argument registers might not, so check them all. */
5148 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5149 if (!df_regs_ever_live_p (reg))
5152 /* Before going on to check the call-saved registers we can try a couple
5153 more ways of deducing that r3 is available. The first is when we are
5154 pushing anonymous arguments onto the stack and we have less than 4
5155 registers worth of fixed arguments(*). In this case r3 will be part of
5156 the variable argument list and so we can be sure that it will be
5157 pushed right at the start of the function. Hence it will be available
5158 for the rest of the prologue.
5159 (*): ie crtl->args.pretend_args_size is greater than 0. */
5160 if (cfun->machine->uses_anonymous_args
5161 && crtl->args.pretend_args_size > 0)
5162 return LAST_ARG_REGNUM;
5164 /* The other case is when we have fixed arguments but less than 4 registers
5165 worth. In this case r3 might be used in the body of the function, but
5166 it is not being used to convey an argument into the function. In theory
5167 we could just check crtl->args.size to see how many bytes are
5168 being passed in argument registers, but it seems that it is unreliable.
5169 Sometimes it will have the value 0 when in fact arguments are being
5170 passed. (See testcase execute/20021111-1.c for an example). So we also
5171 check the args_info.nregs field as well. The problem with this field is
5172 that it makes no allowances for arguments that are passed to the
5173 function but which are not used. Hence we could miss an opportunity
5174 when a function has an unused argument in r3. But it is better to be
5175 safe than to be sorry. */
5176 if (! cfun->machine->uses_anonymous_args
5177 && crtl->args.size >= 0
5178 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5179 && crtl->args.info.nregs < 4)
5180 return LAST_ARG_REGNUM;
5182 /* Otherwise look for a call-saved register that is going to be pushed. */
5183 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5184 if (pushed_regs_mask & (1 << reg))
5189 /* Thumb-2 can use high regs. */
5190 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5191 if (pushed_regs_mask & (1 << reg))
5194 /* Something went wrong - thumb_compute_save_reg_mask()
5195 should have arranged for a suitable register to be pushed. */
5199 static GTY(()) int pic_labelno;
5201 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5205 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5207 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5209 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5212 gcc_assert (flag_pic);
5214 pic_reg = cfun->machine->pic_reg;
5215 if (TARGET_VXWORKS_RTP)
5217 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5218 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5219 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5221 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5223 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5224 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5228 /* We use an UNSPEC rather than a LABEL_REF because this label
5229 never appears in the code stream. */
5231 labelno = GEN_INT (pic_labelno++);
5232 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5233 l1 = gen_rtx_CONST (VOIDmode, l1);
5235 /* On the ARM the PC register contains 'dot + 8' at the time of the
5236 addition, on the Thumb it is 'dot + 4'. */
5237 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5238 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5240 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5244 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5246 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5248 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5250 else /* TARGET_THUMB1 */
5252 if (arm_pic_register != INVALID_REGNUM
5253 && REGNO (pic_reg) > LAST_LO_REGNUM)
5255 /* We will have pushed the pic register, so we should always be
5256 able to find a work register. */
5257 pic_tmp = gen_rtx_REG (SImode,
5258 thumb_find_work_register (saved_regs));
5259 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5260 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5263 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5264 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5268 /* Need to emit this whether or not we obey regdecls,
5269 since setjmp/longjmp can cause life info to screw up. */
5273 /* Generate code to load the address of a static var when flag_pic is set. */
5275 arm_pic_static_addr (rtx orig, rtx reg)
5277 rtx l1, labelno, offset_rtx, insn;
5279 gcc_assert (flag_pic);
5281 /* We use an UNSPEC rather than a LABEL_REF because this label
5282 never appears in the code stream. */
5283 labelno = GEN_INT (pic_labelno++);
5284 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5285 l1 = gen_rtx_CONST (VOIDmode, l1);
5287 /* On the ARM the PC register contains 'dot + 8' at the time of the
5288 addition, on the Thumb it is 'dot + 4'. */
5289 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5290 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5291 UNSPEC_SYMBOL_OFFSET);
5292 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5296 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5298 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5300 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5302 else /* TARGET_THUMB1 */
5304 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5305 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5311 /* Return nonzero if X is valid as an ARM state addressing register. */
5313 arm_address_register_rtx_p (rtx x, int strict_p)
5317 if (GET_CODE (x) != REG)
5323 return ARM_REGNO_OK_FOR_BASE_P (regno);
5325 return (regno <= LAST_ARM_REGNUM
5326 || regno >= FIRST_PSEUDO_REGISTER
5327 || regno == FRAME_POINTER_REGNUM
5328 || regno == ARG_POINTER_REGNUM);
5331 /* Return TRUE if this rtx is the difference of a symbol and a label,
5332 and will reduce to a PC-relative relocation in the object file.
5333 Expressions like this can be left alone when generating PIC, rather
5334 than forced through the GOT. */
5336 pcrel_constant_p (rtx x)
5338 if (GET_CODE (x) == MINUS)
5339 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5344 /* Return true if X will surely end up in an index register after next
5347 will_be_in_index_register (const_rtx x)
5349 /* arm.md: calculate_pic_address will split this into a register. */
5350 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5353 /* Return nonzero if X is a valid ARM state address operand. */
5355 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5359 enum rtx_code code = GET_CODE (x);
5361 if (arm_address_register_rtx_p (x, strict_p))
5364 use_ldrd = (TARGET_LDRD
5366 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5368 if (code == POST_INC || code == PRE_DEC
5369 || ((code == PRE_INC || code == POST_DEC)
5370 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5371 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5373 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5374 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5375 && GET_CODE (XEXP (x, 1)) == PLUS
5376 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5378 rtx addend = XEXP (XEXP (x, 1), 1);
5380 /* Don't allow ldrd post increment by register because it's hard
5381 to fixup invalid register choices. */
5383 && GET_CODE (x) == POST_MODIFY
5384 && GET_CODE (addend) == REG)
5387 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5388 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5391 /* After reload constants split into minipools will have addresses
5392 from a LABEL_REF. */
5393 else if (reload_completed
5394 && (code == LABEL_REF
5396 && GET_CODE (XEXP (x, 0)) == PLUS
5397 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5398 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5401 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5404 else if (code == PLUS)
5406 rtx xop0 = XEXP (x, 0);
5407 rtx xop1 = XEXP (x, 1);
5409 return ((arm_address_register_rtx_p (xop0, strict_p)
5410 && ((GET_CODE(xop1) == CONST_INT
5411 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5412 || (!strict_p && will_be_in_index_register (xop1))))
5413 || (arm_address_register_rtx_p (xop1, strict_p)
5414 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5418 /* Reload currently can't handle MINUS, so disable this for now */
5419 else if (GET_CODE (x) == MINUS)
5421 rtx xop0 = XEXP (x, 0);
5422 rtx xop1 = XEXP (x, 1);
5424 return (arm_address_register_rtx_p (xop0, strict_p)
5425 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5429 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5430 && code == SYMBOL_REF
5431 && CONSTANT_POOL_ADDRESS_P (x)
5433 && symbol_mentioned_p (get_pool_constant (x))
5434 && ! pcrel_constant_p (get_pool_constant (x))))
5440 /* Return nonzero if X is a valid Thumb-2 address operand. */
5442 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5445 enum rtx_code code = GET_CODE (x);
5447 if (arm_address_register_rtx_p (x, strict_p))
5450 use_ldrd = (TARGET_LDRD
5452 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5454 if (code == POST_INC || code == PRE_DEC
5455 || ((code == PRE_INC || code == POST_DEC)
5456 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5457 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5459 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5460 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5461 && GET_CODE (XEXP (x, 1)) == PLUS
5462 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5464 /* Thumb-2 only has autoincrement by constant. */
5465 rtx addend = XEXP (XEXP (x, 1), 1);
5466 HOST_WIDE_INT offset;
5468 if (GET_CODE (addend) != CONST_INT)
5471 offset = INTVAL(addend);
5472 if (GET_MODE_SIZE (mode) <= 4)
5473 return (offset > -256 && offset < 256);
5475 return (use_ldrd && offset > -1024 && offset < 1024
5476 && (offset & 3) == 0);
5479 /* After reload constants split into minipools will have addresses
5480 from a LABEL_REF. */
5481 else if (reload_completed
5482 && (code == LABEL_REF
5484 && GET_CODE (XEXP (x, 0)) == PLUS
5485 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5486 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5489 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5492 else if (code == PLUS)
5494 rtx xop0 = XEXP (x, 0);
5495 rtx xop1 = XEXP (x, 1);
5497 return ((arm_address_register_rtx_p (xop0, strict_p)
5498 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5499 || (!strict_p && will_be_in_index_register (xop1))))
5500 || (arm_address_register_rtx_p (xop1, strict_p)
5501 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5504 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5505 && code == SYMBOL_REF
5506 && CONSTANT_POOL_ADDRESS_P (x)
5508 && symbol_mentioned_p (get_pool_constant (x))
5509 && ! pcrel_constant_p (get_pool_constant (x))))
5515 /* Return nonzero if INDEX is valid for an address index operand in
5518 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5521 HOST_WIDE_INT range;
5522 enum rtx_code code = GET_CODE (index);
5524 /* Standard coprocessor addressing modes. */
5525 if (TARGET_HARD_FLOAT
5526 && (TARGET_FPA || TARGET_MAVERICK)
5527 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5528 || (TARGET_MAVERICK && mode == DImode)))
5529 return (code == CONST_INT && INTVAL (index) < 1024
5530 && INTVAL (index) > -1024
5531 && (INTVAL (index) & 3) == 0);
5534 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5535 return (code == CONST_INT
5536 && INTVAL (index) < 1016
5537 && INTVAL (index) > -1024
5538 && (INTVAL (index) & 3) == 0);
5540 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5541 return (code == CONST_INT
5542 && INTVAL (index) < 1024
5543 && INTVAL (index) > -1024
5544 && (INTVAL (index) & 3) == 0);
5546 if (arm_address_register_rtx_p (index, strict_p)
5547 && (GET_MODE_SIZE (mode) <= 4))
5550 if (mode == DImode || mode == DFmode)
5552 if (code == CONST_INT)
5554 HOST_WIDE_INT val = INTVAL (index);
5557 return val > -256 && val < 256;
5559 return val > -4096 && val < 4092;
5562 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5565 if (GET_MODE_SIZE (mode) <= 4
5569 || (mode == QImode && outer == SIGN_EXTEND))))
5573 rtx xiop0 = XEXP (index, 0);
5574 rtx xiop1 = XEXP (index, 1);
5576 return ((arm_address_register_rtx_p (xiop0, strict_p)
5577 && power_of_two_operand (xiop1, SImode))
5578 || (arm_address_register_rtx_p (xiop1, strict_p)
5579 && power_of_two_operand (xiop0, SImode)));
5581 else if (code == LSHIFTRT || code == ASHIFTRT
5582 || code == ASHIFT || code == ROTATERT)
5584 rtx op = XEXP (index, 1);
5586 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5587 && GET_CODE (op) == CONST_INT
5589 && INTVAL (op) <= 31);
5593 /* For ARM v4 we may be doing a sign-extend operation during the
5599 || (outer == SIGN_EXTEND && mode == QImode))
5605 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5607 return (code == CONST_INT
5608 && INTVAL (index) < range
5609 && INTVAL (index) > -range);
5612 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5613 index operand. i.e. 1, 2, 4 or 8. */
5615 thumb2_index_mul_operand (rtx op)
5619 if (GET_CODE(op) != CONST_INT)
5623 return (val == 1 || val == 2 || val == 4 || val == 8);
5626 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5628 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5630 enum rtx_code code = GET_CODE (index);
5632 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5633 /* Standard coprocessor addressing modes. */
5634 if (TARGET_HARD_FLOAT
5635 && (TARGET_FPA || TARGET_MAVERICK)
5636 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5637 || (TARGET_MAVERICK && mode == DImode)))
5638 return (code == CONST_INT && INTVAL (index) < 1024
5639 && INTVAL (index) > -1024
5640 && (INTVAL (index) & 3) == 0);
5642 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5644 /* For DImode assume values will usually live in core regs
5645 and only allow LDRD addressing modes. */
5646 if (!TARGET_LDRD || mode != DImode)
5647 return (code == CONST_INT
5648 && INTVAL (index) < 1024
5649 && INTVAL (index) > -1024
5650 && (INTVAL (index) & 3) == 0);
5654 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5655 return (code == CONST_INT
5656 && INTVAL (index) < 1016
5657 && INTVAL (index) > -1024
5658 && (INTVAL (index) & 3) == 0);
5660 if (arm_address_register_rtx_p (index, strict_p)
5661 && (GET_MODE_SIZE (mode) <= 4))
5664 if (mode == DImode || mode == DFmode)
5666 if (code == CONST_INT)
5668 HOST_WIDE_INT val = INTVAL (index);
5669 /* ??? Can we assume ldrd for thumb2? */
5670 /* Thumb-2 ldrd only has reg+const addressing modes. */
5671 /* ldrd supports offsets of +-1020.
5672 However the ldr fallback does not. */
5673 return val > -256 && val < 256 && (val & 3) == 0;
5681 rtx xiop0 = XEXP (index, 0);
5682 rtx xiop1 = XEXP (index, 1);
5684 return ((arm_address_register_rtx_p (xiop0, strict_p)
5685 && thumb2_index_mul_operand (xiop1))
5686 || (arm_address_register_rtx_p (xiop1, strict_p)
5687 && thumb2_index_mul_operand (xiop0)));
5689 else if (code == ASHIFT)
5691 rtx op = XEXP (index, 1);
5693 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5694 && GET_CODE (op) == CONST_INT
5696 && INTVAL (op) <= 3);
5699 return (code == CONST_INT
5700 && INTVAL (index) < 4096
5701 && INTVAL (index) > -256);
5704 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5706 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5710 if (GET_CODE (x) != REG)
5716 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5718 return (regno <= LAST_LO_REGNUM
5719 || regno > LAST_VIRTUAL_REGISTER
5720 || regno == FRAME_POINTER_REGNUM
5721 || (GET_MODE_SIZE (mode) >= 4
5722 && (regno == STACK_POINTER_REGNUM
5723 || regno >= FIRST_PSEUDO_REGISTER
5724 || x == hard_frame_pointer_rtx
5725 || x == arg_pointer_rtx)));
5728 /* Return nonzero if x is a legitimate index register. This is the case
5729 for any base register that can access a QImode object. */
5731 thumb1_index_register_rtx_p (rtx x, int strict_p)
5733 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5736 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5738 The AP may be eliminated to either the SP or the FP, so we use the
5739 least common denominator, e.g. SImode, and offsets from 0 to 64.
5741 ??? Verify whether the above is the right approach.
5743 ??? Also, the FP may be eliminated to the SP, so perhaps that
5744 needs special handling also.
5746 ??? Look at how the mips16 port solves this problem. It probably uses
5747 better ways to solve some of these problems.
5749 Although it is not incorrect, we don't accept QImode and HImode
5750 addresses based on the frame pointer or arg pointer until the
5751 reload pass starts. This is so that eliminating such addresses
5752 into stack based ones won't produce impossible code. */
5754 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5756 /* ??? Not clear if this is right. Experiment. */
5757 if (GET_MODE_SIZE (mode) < 4
5758 && !(reload_in_progress || reload_completed)
5759 && (reg_mentioned_p (frame_pointer_rtx, x)
5760 || reg_mentioned_p (arg_pointer_rtx, x)
5761 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5762 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5763 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5764 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5767 /* Accept any base register. SP only in SImode or larger. */
5768 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5771 /* This is PC relative data before arm_reorg runs. */
5772 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5773 && GET_CODE (x) == SYMBOL_REF
5774 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5777 /* This is PC relative data after arm_reorg runs. */
5778 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5780 && (GET_CODE (x) == LABEL_REF
5781 || (GET_CODE (x) == CONST
5782 && GET_CODE (XEXP (x, 0)) == PLUS
5783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5784 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5787 /* Post-inc indexing only supported for SImode and larger. */
5788 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5789 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5792 else if (GET_CODE (x) == PLUS)
5794 /* REG+REG address can be any two index registers. */
5795 /* We disallow FRAME+REG addressing since we know that FRAME
5796 will be replaced with STACK, and SP relative addressing only
5797 permits SP+OFFSET. */
5798 if (GET_MODE_SIZE (mode) <= 4
5799 && XEXP (x, 0) != frame_pointer_rtx
5800 && XEXP (x, 1) != frame_pointer_rtx
5801 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5802 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5803 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5806 /* REG+const has 5-7 bit offset for non-SP registers. */
5807 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5808 || XEXP (x, 0) == arg_pointer_rtx)
5809 && GET_CODE (XEXP (x, 1)) == CONST_INT
5810 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5813 /* REG+const has 10-bit offset for SP, but only SImode and
5814 larger is supported. */
5815 /* ??? Should probably check for DI/DFmode overflow here
5816 just like GO_IF_LEGITIMATE_OFFSET does. */
5817 else if (GET_CODE (XEXP (x, 0)) == REG
5818 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5819 && GET_MODE_SIZE (mode) >= 4
5820 && GET_CODE (XEXP (x, 1)) == CONST_INT
5821 && INTVAL (XEXP (x, 1)) >= 0
5822 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5823 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5826 else if (GET_CODE (XEXP (x, 0)) == REG
5827 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5828 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5829 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5830 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5831 && GET_MODE_SIZE (mode) >= 4
5832 && GET_CODE (XEXP (x, 1)) == CONST_INT
5833 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5837 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5838 && GET_MODE_SIZE (mode) == 4
5839 && GET_CODE (x) == SYMBOL_REF
5840 && CONSTANT_POOL_ADDRESS_P (x)
5842 && symbol_mentioned_p (get_pool_constant (x))
5843 && ! pcrel_constant_p (get_pool_constant (x))))
5849 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5850 instruction of mode MODE. */
5852 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5854 switch (GET_MODE_SIZE (mode))
5857 return val >= 0 && val < 32;
5860 return val >= 0 && val < 64 && (val & 1) == 0;
5864 && (val + GET_MODE_SIZE (mode)) <= 128
5870 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5873 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5874 else if (TARGET_THUMB2)
5875 return thumb2_legitimate_address_p (mode, x, strict_p);
5876 else /* if (TARGET_THUMB1) */
5877 return thumb1_legitimate_address_p (mode, x, strict_p);
5880 /* Build the SYMBOL_REF for __tls_get_addr. */
5882 static GTY(()) rtx tls_get_addr_libfunc;
5885 get_tls_get_addr (void)
5887 if (!tls_get_addr_libfunc)
5888 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5889 return tls_get_addr_libfunc;
5893 arm_load_tp (rtx target)
5896 target = gen_reg_rtx (SImode);
5900 /* Can return in any reg. */
5901 emit_insn (gen_load_tp_hard (target));
5905 /* Always returned in r0. Immediately copy the result into a pseudo,
5906 otherwise other uses of r0 (e.g. setting up function arguments) may
5907 clobber the value. */
5911 emit_insn (gen_load_tp_soft ());
5913 tmp = gen_rtx_REG (SImode, 0);
5914 emit_move_insn (target, tmp);
5920 load_tls_operand (rtx x, rtx reg)
5924 if (reg == NULL_RTX)
5925 reg = gen_reg_rtx (SImode);
5927 tmp = gen_rtx_CONST (SImode, x);
5929 emit_move_insn (reg, tmp);
5935 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5937 rtx insns, label, labelno, sum;
5941 labelno = GEN_INT (pic_labelno++);
5942 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5943 label = gen_rtx_CONST (VOIDmode, label);
5945 sum = gen_rtx_UNSPEC (Pmode,
5946 gen_rtvec (4, x, GEN_INT (reloc), label,
5947 GEN_INT (TARGET_ARM ? 8 : 4)),
5949 reg = load_tls_operand (sum, reg);
5952 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5953 else if (TARGET_THUMB2)
5954 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5955 else /* TARGET_THUMB1 */
5956 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5958 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5959 Pmode, 1, reg, Pmode);
5961 insns = get_insns ();
5968 legitimize_tls_address (rtx x, rtx reg)
5970 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5971 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5975 case TLS_MODEL_GLOBAL_DYNAMIC:
5976 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5977 dest = gen_reg_rtx (Pmode);
5978 emit_libcall_block (insns, dest, ret, x);
5981 case TLS_MODEL_LOCAL_DYNAMIC:
5982 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5984 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5985 share the LDM result with other LD model accesses. */
5986 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5988 dest = gen_reg_rtx (Pmode);
5989 emit_libcall_block (insns, dest, ret, eqv);
5991 /* Load the addend. */
5992 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5994 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5995 return gen_rtx_PLUS (Pmode, dest, addend);
5997 case TLS_MODEL_INITIAL_EXEC:
5998 labelno = GEN_INT (pic_labelno++);
5999 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6000 label = gen_rtx_CONST (VOIDmode, label);
6001 sum = gen_rtx_UNSPEC (Pmode,
6002 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6003 GEN_INT (TARGET_ARM ? 8 : 4)),
6005 reg = load_tls_operand (sum, reg);
6008 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6009 else if (TARGET_THUMB2)
6010 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6013 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6014 emit_move_insn (reg, gen_const_mem (SImode, reg));
6017 tp = arm_load_tp (NULL_RTX);
6019 return gen_rtx_PLUS (Pmode, tp, reg);
6021 case TLS_MODEL_LOCAL_EXEC:
6022 tp = arm_load_tp (NULL_RTX);
6024 reg = gen_rtx_UNSPEC (Pmode,
6025 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6027 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6029 return gen_rtx_PLUS (Pmode, tp, reg);
6036 /* Try machine-dependent ways of modifying an illegitimate address
6037 to be legitimate. If we find one, return the new, valid address. */
6039 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6043 /* TODO: legitimize_address for Thumb2. */
6046 return thumb_legitimize_address (x, orig_x, mode);
6049 if (arm_tls_symbol_p (x))
6050 return legitimize_tls_address (x, NULL_RTX);
6052 if (GET_CODE (x) == PLUS)
6054 rtx xop0 = XEXP (x, 0);
6055 rtx xop1 = XEXP (x, 1);
6057 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6058 xop0 = force_reg (SImode, xop0);
6060 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6061 xop1 = force_reg (SImode, xop1);
6063 if (ARM_BASE_REGISTER_RTX_P (xop0)
6064 && GET_CODE (xop1) == CONST_INT)
6066 HOST_WIDE_INT n, low_n;
6070 /* VFP addressing modes actually allow greater offsets, but for
6071 now we just stick with the lowest common denominator. */
6073 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6085 low_n = ((mode) == TImode ? 0
6086 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6090 base_reg = gen_reg_rtx (SImode);
6091 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6092 emit_move_insn (base_reg, val);
6093 x = plus_constant (base_reg, low_n);
6095 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6096 x = gen_rtx_PLUS (SImode, xop0, xop1);
6099 /* XXX We don't allow MINUS any more -- see comment in
6100 arm_legitimate_address_outer_p (). */
6101 else if (GET_CODE (x) == MINUS)
6103 rtx xop0 = XEXP (x, 0);
6104 rtx xop1 = XEXP (x, 1);
6106 if (CONSTANT_P (xop0))
6107 xop0 = force_reg (SImode, xop0);
6109 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6110 xop1 = force_reg (SImode, xop1);
6112 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6113 x = gen_rtx_MINUS (SImode, xop0, xop1);
6116 /* Make sure to take full advantage of the pre-indexed addressing mode
6117 with absolute addresses which often allows for the base register to
6118 be factorized for multiple adjacent memory references, and it might
6119 even allows for the mini pool to be avoided entirely. */
6120 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6123 HOST_WIDE_INT mask, base, index;
6126 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6127 use a 8-bit index. So let's use a 12-bit index for SImode only and
6128 hope that arm_gen_constant will enable ldrb to use more bits. */
6129 bits = (mode == SImode) ? 12 : 8;
6130 mask = (1 << bits) - 1;
6131 base = INTVAL (x) & ~mask;
6132 index = INTVAL (x) & mask;
6133 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6135 /* It'll most probably be more efficient to generate the base
6136 with more bits set and use a negative index instead. */
6140 base_reg = force_reg (SImode, GEN_INT (base));
6141 x = plus_constant (base_reg, index);
6146 /* We need to find and carefully transform any SYMBOL and LABEL
6147 references; so go back to the original address expression. */
6148 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6150 if (new_x != orig_x)
6158 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6159 to be legitimate. If we find one, return the new, valid address. */
6161 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6163 if (arm_tls_symbol_p (x))
6164 return legitimize_tls_address (x, NULL_RTX);
6166 if (GET_CODE (x) == PLUS
6167 && GET_CODE (XEXP (x, 1)) == CONST_INT
6168 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6169 || INTVAL (XEXP (x, 1)) < 0))
6171 rtx xop0 = XEXP (x, 0);
6172 rtx xop1 = XEXP (x, 1);
6173 HOST_WIDE_INT offset = INTVAL (xop1);
6175 /* Try and fold the offset into a biasing of the base register and
6176 then offsetting that. Don't do this when optimizing for space
6177 since it can cause too many CSEs. */
6178 if (optimize_size && offset >= 0
6179 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6181 HOST_WIDE_INT delta;
6184 delta = offset - (256 - GET_MODE_SIZE (mode));
6185 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6186 delta = 31 * GET_MODE_SIZE (mode);
6188 delta = offset & (~31 * GET_MODE_SIZE (mode));
6190 xop0 = force_operand (plus_constant (xop0, offset - delta),
6192 x = plus_constant (xop0, delta);
6194 else if (offset < 0 && offset > -256)
6195 /* Small negative offsets are best done with a subtract before the
6196 dereference, forcing these into a register normally takes two
6198 x = force_operand (x, NULL_RTX);
6201 /* For the remaining cases, force the constant into a register. */
6202 xop1 = force_reg (SImode, xop1);
6203 x = gen_rtx_PLUS (SImode, xop0, xop1);
6206 else if (GET_CODE (x) == PLUS
6207 && s_register_operand (XEXP (x, 1), SImode)
6208 && !s_register_operand (XEXP (x, 0), SImode))
6210 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6212 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6217 /* We need to find and carefully transform any SYMBOL and LABEL
6218 references; so go back to the original address expression. */
6219 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6221 if (new_x != orig_x)
6229 thumb_legitimize_reload_address (rtx *x_p,
6230 enum machine_mode mode,
6231 int opnum, int type,
6232 int ind_levels ATTRIBUTE_UNUSED)
6236 if (GET_CODE (x) == PLUS
6237 && GET_MODE_SIZE (mode) < 4
6238 && REG_P (XEXP (x, 0))
6239 && XEXP (x, 0) == stack_pointer_rtx
6240 && GET_CODE (XEXP (x, 1)) == CONST_INT
6241 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6246 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6247 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6251 /* If both registers are hi-regs, then it's better to reload the
6252 entire expression rather than each register individually. That
6253 only requires one reload register rather than two. */
6254 if (GET_CODE (x) == PLUS
6255 && REG_P (XEXP (x, 0))
6256 && REG_P (XEXP (x, 1))
6257 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6258 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6263 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6264 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6271 /* Test for various thread-local symbols. */
6273 /* Return TRUE if X is a thread-local symbol. */
6276 arm_tls_symbol_p (rtx x)
6278 if (! TARGET_HAVE_TLS)
6281 if (GET_CODE (x) != SYMBOL_REF)
6284 return SYMBOL_REF_TLS_MODEL (x) != 0;
6287 /* Helper for arm_tls_referenced_p. */
6290 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6292 if (GET_CODE (*x) == SYMBOL_REF)
6293 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6295 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6296 TLS offsets, not real symbol references. */
6297 if (GET_CODE (*x) == UNSPEC
6298 && XINT (*x, 1) == UNSPEC_TLS)
6304 /* Return TRUE if X contains any TLS symbol references. */
6307 arm_tls_referenced_p (rtx x)
6309 if (! TARGET_HAVE_TLS)
6312 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6315 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6318 arm_cannot_force_const_mem (rtx x)
6322 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6324 split_const (x, &base, &offset);
6325 if (GET_CODE (base) == SYMBOL_REF
6326 && !offset_within_block_p (base, INTVAL (offset)))
6329 return arm_tls_referenced_p (x);
6332 #define REG_OR_SUBREG_REG(X) \
6333 (GET_CODE (X) == REG \
6334 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6336 #define REG_OR_SUBREG_RTX(X) \
6337 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6340 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6342 enum machine_mode mode = GET_MODE (x);
6356 return COSTS_N_INSNS (1);
6359 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6362 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6369 return COSTS_N_INSNS (2) + cycles;
6371 return COSTS_N_INSNS (1) + 16;
6374 return (COSTS_N_INSNS (1)
6375 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6376 + GET_CODE (SET_DEST (x)) == MEM));
6381 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6383 if (thumb_shiftable_const (INTVAL (x)))
6384 return COSTS_N_INSNS (2);
6385 return COSTS_N_INSNS (3);
6387 else if ((outer == PLUS || outer == COMPARE)
6388 && INTVAL (x) < 256 && INTVAL (x) > -256)
6390 else if ((outer == IOR || outer == XOR || outer == AND)
6391 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6392 return COSTS_N_INSNS (1);
6393 else if (outer == AND)
6396 /* This duplicates the tests in the andsi3 expander. */
6397 for (i = 9; i <= 31; i++)
6398 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6399 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6400 return COSTS_N_INSNS (2);
6402 else if (outer == ASHIFT || outer == ASHIFTRT
6403 || outer == LSHIFTRT)
6405 return COSTS_N_INSNS (2);
6411 return COSTS_N_INSNS (3);
6429 /* XXX another guess. */
6430 /* Memory costs quite a lot for the first word, but subsequent words
6431 load at the equivalent of a single insn each. */
6432 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6433 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6438 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6444 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6445 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6451 return total + COSTS_N_INSNS (1);
6453 /* Assume a two-shift sequence. Increase the cost slightly so
6454 we prefer actual shifts over an extend operation. */
6455 return total + 1 + COSTS_N_INSNS (2);
6463 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6465 enum machine_mode mode = GET_MODE (x);
6466 enum rtx_code subcode;
6468 enum rtx_code code = GET_CODE (x);
6474 /* Memory costs quite a lot for the first word, but subsequent words
6475 load at the equivalent of a single insn each. */
6476 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6483 if (TARGET_HARD_FLOAT && mode == SFmode)
6484 *total = COSTS_N_INSNS (2);
6485 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6486 *total = COSTS_N_INSNS (4);
6488 *total = COSTS_N_INSNS (20);
6492 if (GET_CODE (XEXP (x, 1)) == REG)
6493 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6494 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6495 *total = rtx_cost (XEXP (x, 1), code, speed);
6501 *total += COSTS_N_INSNS (4);
6506 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6507 *total += rtx_cost (XEXP (x, 0), code, speed);
6510 *total += COSTS_N_INSNS (3);
6514 *total += COSTS_N_INSNS (1);
6515 /* Increase the cost of complex shifts because they aren't any faster,
6516 and reduce dual issue opportunities. */
6517 if (arm_tune_cortex_a9
6518 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6526 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6527 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6528 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6530 *total += rtx_cost (XEXP (x, 1), code, speed);
6534 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6535 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6537 *total += rtx_cost (XEXP (x, 0), code, speed);
6544 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6546 if (TARGET_HARD_FLOAT
6548 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6550 *total = COSTS_N_INSNS (1);
6551 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6552 && arm_const_double_rtx (XEXP (x, 0)))
6554 *total += rtx_cost (XEXP (x, 1), code, speed);
6558 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6559 && arm_const_double_rtx (XEXP (x, 1)))
6561 *total += rtx_cost (XEXP (x, 0), code, speed);
6567 *total = COSTS_N_INSNS (20);
6571 *total = COSTS_N_INSNS (1);
6572 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6573 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6575 *total += rtx_cost (XEXP (x, 1), code, speed);
6579 subcode = GET_CODE (XEXP (x, 1));
6580 if (subcode == ASHIFT || subcode == ASHIFTRT
6581 || subcode == LSHIFTRT
6582 || subcode == ROTATE || subcode == ROTATERT)
6584 *total += rtx_cost (XEXP (x, 0), code, speed);
6585 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6589 /* A shift as a part of RSB costs no more than RSB itself. */
6590 if (GET_CODE (XEXP (x, 0)) == MULT
6591 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6593 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6594 *total += rtx_cost (XEXP (x, 1), code, speed);
6599 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6601 *total += rtx_cost (XEXP (x, 0), code, speed);
6602 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6606 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6607 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6609 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6610 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6611 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6612 *total += COSTS_N_INSNS (1);
6620 if (code == PLUS && arm_arch6 && mode == SImode
6621 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6622 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6624 *total = COSTS_N_INSNS (1);
6625 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6627 *total += rtx_cost (XEXP (x, 1), code, speed);
6631 /* MLA: All arguments must be registers. We filter out
6632 multiplication by a power of two, so that we fall down into
6634 if (GET_CODE (XEXP (x, 0)) == MULT
6635 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6637 /* The cost comes from the cost of the multiply. */
6641 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6643 if (TARGET_HARD_FLOAT
6645 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6647 *total = COSTS_N_INSNS (1);
6648 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6649 && arm_const_double_rtx (XEXP (x, 1)))
6651 *total += rtx_cost (XEXP (x, 0), code, speed);
6658 *total = COSTS_N_INSNS (20);
6662 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6663 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6665 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6666 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6667 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6668 *total += COSTS_N_INSNS (1);
6674 case AND: case XOR: case IOR:
6676 /* Normally the frame registers will be spilt into reg+const during
6677 reload, so it is a bad idea to combine them with other instructions,
6678 since then they might not be moved outside of loops. As a compromise
6679 we allow integration with ops that have a constant as their second
6681 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6682 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6683 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6684 *total = COSTS_N_INSNS (1);
6688 *total += COSTS_N_INSNS (2);
6689 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6690 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6692 *total += rtx_cost (XEXP (x, 0), code, speed);
6699 *total += COSTS_N_INSNS (1);
6700 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6701 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6703 *total += rtx_cost (XEXP (x, 0), code, speed);
6706 subcode = GET_CODE (XEXP (x, 0));
6707 if (subcode == ASHIFT || subcode == ASHIFTRT
6708 || subcode == LSHIFTRT
6709 || subcode == ROTATE || subcode == ROTATERT)
6711 *total += rtx_cost (XEXP (x, 1), code, speed);
6712 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6717 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6719 *total += rtx_cost (XEXP (x, 1), code, speed);
6720 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6724 if (subcode == UMIN || subcode == UMAX
6725 || subcode == SMIN || subcode == SMAX)
6727 *total = COSTS_N_INSNS (3);
6734 /* This should have been handled by the CPU specific routines. */
6738 if (arm_arch3m && mode == SImode
6739 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6740 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6741 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6742 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6743 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6744 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6746 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6749 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6753 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6755 if (TARGET_HARD_FLOAT
6757 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6759 *total = COSTS_N_INSNS (1);
6762 *total = COSTS_N_INSNS (2);
6768 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6769 if (mode == SImode && code == NOT)
6771 subcode = GET_CODE (XEXP (x, 0));
6772 if (subcode == ASHIFT || subcode == ASHIFTRT
6773 || subcode == LSHIFTRT
6774 || subcode == ROTATE || subcode == ROTATERT
6776 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6778 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6779 /* Register shifts cost an extra cycle. */
6780 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6781 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6790 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6792 *total = COSTS_N_INSNS (4);
6796 operand = XEXP (x, 0);
6798 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6799 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6800 && GET_CODE (XEXP (operand, 0)) == REG
6801 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6802 *total += COSTS_N_INSNS (1);
6803 *total += (rtx_cost (XEXP (x, 1), code, speed)
6804 + rtx_cost (XEXP (x, 2), code, speed));
6808 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6810 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6816 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6817 && mode == SImode && XEXP (x, 1) == const0_rtx)
6819 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6825 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6826 && mode == SImode && XEXP (x, 1) == const0_rtx)
6828 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6848 /* SCC insns. In the case where the comparison has already been
6849 performed, then they cost 2 instructions. Otherwise they need
6850 an additional comparison before them. */
6851 *total = COSTS_N_INSNS (2);
6852 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6859 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6865 *total += COSTS_N_INSNS (1);
6866 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6867 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6869 *total += rtx_cost (XEXP (x, 0), code, speed);
6873 subcode = GET_CODE (XEXP (x, 0));
6874 if (subcode == ASHIFT || subcode == ASHIFTRT
6875 || subcode == LSHIFTRT
6876 || subcode == ROTATE || subcode == ROTATERT)
6878 *total += rtx_cost (XEXP (x, 1), code, speed);
6879 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6884 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6886 *total += rtx_cost (XEXP (x, 1), code, speed);
6887 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6897 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6898 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6899 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6900 *total += rtx_cost (XEXP (x, 1), code, speed);
6904 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6906 if (TARGET_HARD_FLOAT
6908 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6910 *total = COSTS_N_INSNS (1);
6913 *total = COSTS_N_INSNS (20);
6916 *total = COSTS_N_INSNS (1);
6918 *total += COSTS_N_INSNS (3);
6924 if (GET_MODE_CLASS (mode) == MODE_INT)
6926 rtx op = XEXP (x, 0);
6927 enum machine_mode opmode = GET_MODE (op);
6930 *total += COSTS_N_INSNS (1);
6932 if (opmode != SImode)
6936 /* If !arm_arch4, we use one of the extendhisi2_mem
6937 or movhi_bytes patterns for HImode. For a QImode
6938 sign extension, we first zero-extend from memory
6939 and then perform a shift sequence. */
6940 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6941 *total += COSTS_N_INSNS (2);
6944 *total += COSTS_N_INSNS (1);
6946 /* We don't have the necessary insn, so we need to perform some
6948 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6949 /* An and with constant 255. */
6950 *total += COSTS_N_INSNS (1);
6952 /* A shift sequence. Increase costs slightly to avoid
6953 combining two shifts into an extend operation. */
6954 *total += COSTS_N_INSNS (2) + 1;
6960 switch (GET_MODE (XEXP (x, 0)))
6967 *total = COSTS_N_INSNS (1);
6977 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6981 if (const_ok_for_arm (INTVAL (x))
6982 || const_ok_for_arm (~INTVAL (x)))
6983 *total = COSTS_N_INSNS (1);
6985 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6986 INTVAL (x), NULL_RTX,
6993 *total = COSTS_N_INSNS (3);
6997 *total = COSTS_N_INSNS (1);
7001 *total = COSTS_N_INSNS (1);
7002 *total += rtx_cost (XEXP (x, 0), code, speed);
7006 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7007 && (mode == SFmode || !TARGET_VFP_SINGLE))
7008 *total = COSTS_N_INSNS (1);
7010 *total = COSTS_N_INSNS (4);
7014 *total = COSTS_N_INSNS (4);
7019 /* Estimates the size cost of thumb1 instructions.
7020 For now most of the code is copied from thumb1_rtx_costs. We need more
7021 fine grain tuning when we have more related test cases. */
7023 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7025 enum machine_mode mode = GET_MODE (x);
7038 return COSTS_N_INSNS (1);
7041 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7043 /* Thumb1 mul instruction can't operate on const. We must Load it
7044 into a register first. */
7045 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7046 return COSTS_N_INSNS (1) + const_size;
7048 return COSTS_N_INSNS (1);
7051 return (COSTS_N_INSNS (1)
7052 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7053 + GET_CODE (SET_DEST (x)) == MEM));
7058 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7059 return COSTS_N_INSNS (1);
7060 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7061 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7062 return COSTS_N_INSNS (2);
7063 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7064 if (thumb_shiftable_const (INTVAL (x)))
7065 return COSTS_N_INSNS (2);
7066 return COSTS_N_INSNS (3);
7068 else if ((outer == PLUS || outer == COMPARE)
7069 && INTVAL (x) < 256 && INTVAL (x) > -256)
7071 else if ((outer == IOR || outer == XOR || outer == AND)
7072 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7073 return COSTS_N_INSNS (1);
7074 else if (outer == AND)
7077 /* This duplicates the tests in the andsi3 expander. */
7078 for (i = 9; i <= 31; i++)
7079 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7080 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7081 return COSTS_N_INSNS (2);
7083 else if (outer == ASHIFT || outer == ASHIFTRT
7084 || outer == LSHIFTRT)
7086 return COSTS_N_INSNS (2);
7092 return COSTS_N_INSNS (3);
7110 /* XXX another guess. */
7111 /* Memory costs quite a lot for the first word, but subsequent words
7112 load at the equivalent of a single insn each. */
7113 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7114 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7119 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7124 /* XXX still guessing. */
7125 switch (GET_MODE (XEXP (x, 0)))
7128 return (1 + (mode == DImode ? 4 : 0)
7129 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7132 return (4 + (mode == DImode ? 4 : 0)
7133 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7136 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7147 /* RTX costs when optimizing for size. */
7149 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7152 enum machine_mode mode = GET_MODE (x);
7155 *total = thumb1_size_rtx_costs (x, code, outer_code);
7159 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7163 /* A memory access costs 1 insn if the mode is small, or the address is
7164 a single register, otherwise it costs one insn per word. */
7165 if (REG_P (XEXP (x, 0)))
7166 *total = COSTS_N_INSNS (1);
7168 && GET_CODE (XEXP (x, 0)) == PLUS
7169 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7170 /* This will be split into two instructions.
7171 See arm.md:calculate_pic_address. */
7172 *total = COSTS_N_INSNS (2);
7174 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7181 /* Needs a libcall, so it costs about this. */
7182 *total = COSTS_N_INSNS (2);
7186 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7188 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7196 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7198 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7201 else if (mode == SImode)
7203 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7204 /* Slightly disparage register shifts, but not by much. */
7205 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7206 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7210 /* Needs a libcall. */
7211 *total = COSTS_N_INSNS (2);
7215 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7216 && (mode == SFmode || !TARGET_VFP_SINGLE))
7218 *total = COSTS_N_INSNS (1);
7224 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7225 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7227 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7228 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7229 || subcode1 == ROTATE || subcode1 == ROTATERT
7230 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7231 || subcode1 == ASHIFTRT)
7233 /* It's just the cost of the two operands. */
7238 *total = COSTS_N_INSNS (1);
7242 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7246 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7247 && (mode == SFmode || !TARGET_VFP_SINGLE))
7249 *total = COSTS_N_INSNS (1);
7253 /* A shift as a part of ADD costs nothing. */
7254 if (GET_CODE (XEXP (x, 0)) == MULT
7255 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7257 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7258 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7259 *total += rtx_cost (XEXP (x, 1), code, false);
7264 case AND: case XOR: case IOR:
7267 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7269 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7270 || subcode == LSHIFTRT || subcode == ASHIFTRT
7271 || (code == AND && subcode == NOT))
7273 /* It's just the cost of the two operands. */
7279 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7283 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7287 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7288 && (mode == SFmode || !TARGET_VFP_SINGLE))
7290 *total = COSTS_N_INSNS (1);
7296 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7305 if (cc_register (XEXP (x, 0), VOIDmode))
7308 *total = COSTS_N_INSNS (1);
7312 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7313 && (mode == SFmode || !TARGET_VFP_SINGLE))
7314 *total = COSTS_N_INSNS (1);
7316 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7321 return arm_rtx_costs_1 (x, outer_code, total, 0);
7324 if (const_ok_for_arm (INTVAL (x)))
7325 /* A multiplication by a constant requires another instruction
7326 to load the constant to a register. */
7327 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7329 else if (const_ok_for_arm (~INTVAL (x)))
7330 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7331 else if (const_ok_for_arm (-INTVAL (x)))
7333 if (outer_code == COMPARE || outer_code == PLUS
7334 || outer_code == MINUS)
7337 *total = COSTS_N_INSNS (1);
7340 *total = COSTS_N_INSNS (2);
7346 *total = COSTS_N_INSNS (2);
7350 *total = COSTS_N_INSNS (4);
7355 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7356 cost of these slightly. */
7357 *total = COSTS_N_INSNS (1) + 1;
7361 if (mode != VOIDmode)
7362 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7364 *total = COSTS_N_INSNS (4); /* How knows? */
7369 /* RTX costs when optimizing for size. */
7371 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7375 return arm_size_rtx_costs (x, (enum rtx_code) code,
7376 (enum rtx_code) outer_code, total);
7378 return current_tune->rtx_costs (x, (enum rtx_code) code,
7379 (enum rtx_code) outer_code,
7383 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7384 supported on any "slowmul" cores, so it can be ignored. */
7387 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7388 int *total, bool speed)
7390 enum machine_mode mode = GET_MODE (x);
7394 *total = thumb1_rtx_costs (x, code, outer_code);
7401 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7404 *total = COSTS_N_INSNS (20);
7408 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7410 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7411 & (unsigned HOST_WIDE_INT) 0xffffffff);
7412 int cost, const_ok = const_ok_for_arm (i);
7413 int j, booth_unit_size;
7415 /* Tune as appropriate. */
7416 cost = const_ok ? 4 : 8;
7417 booth_unit_size = 2;
7418 for (j = 0; i && j < 32; j += booth_unit_size)
7420 i >>= booth_unit_size;
7424 *total = COSTS_N_INSNS (cost);
7425 *total += rtx_cost (XEXP (x, 0), code, speed);
7429 *total = COSTS_N_INSNS (20);
7433 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7438 /* RTX cost for cores with a fast multiply unit (M variants). */
7441 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7442 int *total, bool speed)
7444 enum machine_mode mode = GET_MODE (x);
7448 *total = thumb1_rtx_costs (x, code, outer_code);
7452 /* ??? should thumb2 use different costs? */
7456 /* There is no point basing this on the tuning, since it is always the
7457 fast variant if it exists at all. */
7459 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7460 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7461 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7463 *total = COSTS_N_INSNS(2);
7470 *total = COSTS_N_INSNS (5);
7474 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7476 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7477 & (unsigned HOST_WIDE_INT) 0xffffffff);
7478 int cost, const_ok = const_ok_for_arm (i);
7479 int j, booth_unit_size;
7481 /* Tune as appropriate. */
7482 cost = const_ok ? 4 : 8;
7483 booth_unit_size = 8;
7484 for (j = 0; i && j < 32; j += booth_unit_size)
7486 i >>= booth_unit_size;
7490 *total = COSTS_N_INSNS(cost);
7496 *total = COSTS_N_INSNS (4);
7500 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7502 if (TARGET_HARD_FLOAT
7504 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7506 *total = COSTS_N_INSNS (1);
7511 /* Requires a lib call */
7512 *total = COSTS_N_INSNS (20);
7516 return arm_rtx_costs_1 (x, outer_code, total, speed);
7521 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7522 so it can be ignored. */
7525 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7526 int *total, bool speed)
7528 enum machine_mode mode = GET_MODE (x);
7532 *total = thumb1_rtx_costs (x, code, outer_code);
7539 if (GET_CODE (XEXP (x, 0)) != MULT)
7540 return arm_rtx_costs_1 (x, outer_code, total, speed);
7542 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7543 will stall until the multiplication is complete. */
7544 *total = COSTS_N_INSNS (3);
7548 /* There is no point basing this on the tuning, since it is always the
7549 fast variant if it exists at all. */
7551 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7552 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7553 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7555 *total = COSTS_N_INSNS (2);
7562 *total = COSTS_N_INSNS (5);
7566 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7568 /* If operand 1 is a constant we can more accurately
7569 calculate the cost of the multiply. The multiplier can
7570 retire 15 bits on the first cycle and a further 12 on the
7571 second. We do, of course, have to load the constant into
7572 a register first. */
7573 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7574 /* There's a general overhead of one cycle. */
7576 unsigned HOST_WIDE_INT masked_const;
7581 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7583 masked_const = i & 0xffff8000;
7584 if (masked_const != 0)
7587 masked_const = i & 0xf8000000;
7588 if (masked_const != 0)
7591 *total = COSTS_N_INSNS (cost);
7597 *total = COSTS_N_INSNS (3);
7601 /* Requires a lib call */
7602 *total = COSTS_N_INSNS (20);
7606 return arm_rtx_costs_1 (x, outer_code, total, speed);
7611 /* RTX costs for 9e (and later) cores. */
7614 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7615 int *total, bool speed)
7617 enum machine_mode mode = GET_MODE (x);
7624 *total = COSTS_N_INSNS (3);
7628 *total = thumb1_rtx_costs (x, code, outer_code);
7636 /* There is no point basing this on the tuning, since it is always the
7637 fast variant if it exists at all. */
7639 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7640 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7641 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7643 *total = COSTS_N_INSNS (2);
7650 *total = COSTS_N_INSNS (5);
7656 *total = COSTS_N_INSNS (2);
7660 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7662 if (TARGET_HARD_FLOAT
7664 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7666 *total = COSTS_N_INSNS (1);
7671 *total = COSTS_N_INSNS (20);
7675 return arm_rtx_costs_1 (x, outer_code, total, speed);
7678 /* All address computations that can be done are free, but rtx cost returns
7679 the same for practically all of them. So we weight the different types
7680 of address here in the order (most pref first):
7681 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7683 arm_arm_address_cost (rtx x)
7685 enum rtx_code c = GET_CODE (x);
7687 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7689 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7694 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7697 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7707 arm_thumb_address_cost (rtx x)
7709 enum rtx_code c = GET_CODE (x);
7714 && GET_CODE (XEXP (x, 0)) == REG
7715 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7722 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7724 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7727 /* Adjust cost hook for XScale. */
7729 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7731 /* Some true dependencies can have a higher cost depending
7732 on precisely how certain input operands are used. */
7733 if (REG_NOTE_KIND(link) == 0
7734 && recog_memoized (insn) >= 0
7735 && recog_memoized (dep) >= 0)
7737 int shift_opnum = get_attr_shift (insn);
7738 enum attr_type attr_type = get_attr_type (dep);
7740 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7741 operand for INSN. If we have a shifted input operand and the
7742 instruction we depend on is another ALU instruction, then we may
7743 have to account for an additional stall. */
7744 if (shift_opnum != 0
7745 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7747 rtx shifted_operand;
7750 /* Get the shifted operand. */
7751 extract_insn (insn);
7752 shifted_operand = recog_data.operand[shift_opnum];
7754 /* Iterate over all the operands in DEP. If we write an operand
7755 that overlaps with SHIFTED_OPERAND, then we have increase the
7756 cost of this dependency. */
7758 preprocess_constraints ();
7759 for (opno = 0; opno < recog_data.n_operands; opno++)
7761 /* We can ignore strict inputs. */
7762 if (recog_data.operand_type[opno] == OP_IN)
7765 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7777 /* Adjust cost hook for Cortex A9. */
7779 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7781 switch (REG_NOTE_KIND (link))
7788 case REG_DEP_OUTPUT:
7789 if (recog_memoized (insn) >= 0
7790 && recog_memoized (dep) >= 0)
7792 if (GET_CODE (PATTERN (insn)) == SET)
7795 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7797 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7799 enum attr_type attr_type_insn = get_attr_type (insn);
7800 enum attr_type attr_type_dep = get_attr_type (dep);
7802 /* By default all dependencies of the form
7805 have an extra latency of 1 cycle because
7806 of the input and output dependency in this
7807 case. However this gets modeled as an true
7808 dependency and hence all these checks. */
7809 if (REG_P (SET_DEST (PATTERN (insn)))
7810 && REG_P (SET_DEST (PATTERN (dep)))
7811 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7812 SET_DEST (PATTERN (dep))))
7814 /* FMACS is a special case where the dependant
7815 instruction can be issued 3 cycles before
7816 the normal latency in case of an output
7818 if ((attr_type_insn == TYPE_FMACS
7819 || attr_type_insn == TYPE_FMACD)
7820 && (attr_type_dep == TYPE_FMACS
7821 || attr_type_dep == TYPE_FMACD))
7823 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7824 *cost = insn_default_latency (dep) - 3;
7826 *cost = insn_default_latency (dep);
7831 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7832 *cost = insn_default_latency (dep) + 1;
7834 *cost = insn_default_latency (dep);
7850 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7851 It corrects the value of COST based on the relationship between
7852 INSN and DEP through the dependence LINK. It returns the new
7853 value. There is a per-core adjust_cost hook to adjust scheduler costs
7854 and the per-core hook can choose to completely override the generic
7855 adjust_cost function. Only put bits of code into arm_adjust_cost that
7856 are common across all cores. */
7858 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7862 /* When generating Thumb-1 code, we want to place flag-setting operations
7863 close to a conditional branch which depends on them, so that we can
7864 omit the comparison. */
7866 && REG_NOTE_KIND (link) == 0
7867 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7868 && recog_memoized (dep) >= 0
7869 && get_attr_conds (dep) == CONDS_SET)
7872 if (current_tune->sched_adjust_cost != NULL)
7874 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7878 /* XXX This is not strictly true for the FPA. */
7879 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7880 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7883 /* Call insns don't incur a stall, even if they follow a load. */
7884 if (REG_NOTE_KIND (link) == 0
7885 && GET_CODE (insn) == CALL_INSN)
7888 if ((i_pat = single_set (insn)) != NULL
7889 && GET_CODE (SET_SRC (i_pat)) == MEM
7890 && (d_pat = single_set (dep)) != NULL
7891 && GET_CODE (SET_DEST (d_pat)) == MEM)
7893 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7894 /* This is a load after a store, there is no conflict if the load reads
7895 from a cached area. Assume that loads from the stack, and from the
7896 constant pool are cached, and that others will miss. This is a
7899 if ((GET_CODE (src_mem) == SYMBOL_REF
7900 && CONSTANT_POOL_ADDRESS_P (src_mem))
7901 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7902 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7903 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7910 static int fp_consts_inited = 0;
7912 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7913 static const char * const strings_fp[8] =
7916 "4", "5", "0.5", "10"
7919 static REAL_VALUE_TYPE values_fp[8];
7922 init_fp_table (void)
7928 fp_consts_inited = 1;
7930 fp_consts_inited = 8;
7932 for (i = 0; i < fp_consts_inited; i++)
7934 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7939 /* Return TRUE if rtx X is a valid immediate FP constant. */
7941 arm_const_double_rtx (rtx x)
7946 if (!fp_consts_inited)
7949 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7950 if (REAL_VALUE_MINUS_ZERO (r))
7953 for (i = 0; i < fp_consts_inited; i++)
7954 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7960 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7962 neg_const_double_rtx_ok_for_fpa (rtx x)
7967 if (!fp_consts_inited)
7970 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7971 r = real_value_negate (&r);
7972 if (REAL_VALUE_MINUS_ZERO (r))
7975 for (i = 0; i < 8; i++)
7976 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7983 /* VFPv3 has a fairly wide range of representable immediates, formed from
7984 "quarter-precision" floating-point values. These can be evaluated using this
7985 formula (with ^ for exponentiation):
7989 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7990 16 <= n <= 31 and 0 <= r <= 7.
7992 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7994 - A (most-significant) is the sign bit.
7995 - BCD are the exponent (encoded as r XOR 3).
7996 - EFGH are the mantissa (encoded as n - 16).
7999 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8000 fconst[sd] instruction, or -1 if X isn't suitable. */
8002 vfp3_const_double_index (rtx x)
8004 REAL_VALUE_TYPE r, m;
8006 unsigned HOST_WIDE_INT mantissa, mant_hi;
8007 unsigned HOST_WIDE_INT mask;
8008 HOST_WIDE_INT m1, m2;
8009 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8011 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8014 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8016 /* We can't represent these things, so detect them first. */
8017 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8020 /* Extract sign, exponent and mantissa. */
8021 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8022 r = real_value_abs (&r);
8023 exponent = REAL_EXP (&r);
8024 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8025 highest (sign) bit, with a fixed binary point at bit point_pos.
8026 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8027 bits for the mantissa, this may fail (low bits would be lost). */
8028 real_ldexp (&m, &r, point_pos - exponent);
8029 REAL_VALUE_TO_INT (&m1, &m2, m);
8033 /* If there are bits set in the low part of the mantissa, we can't
8034 represent this value. */
8038 /* Now make it so that mantissa contains the most-significant bits, and move
8039 the point_pos to indicate that the least-significant bits have been
8041 point_pos -= HOST_BITS_PER_WIDE_INT;
8044 /* We can permit four significant bits of mantissa only, plus a high bit
8045 which is always 1. */
8046 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8047 if ((mantissa & mask) != 0)
8050 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8051 mantissa >>= point_pos - 5;
8053 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8054 floating-point immediate zero with Neon using an integer-zero load, but
8055 that case is handled elsewhere.) */
8059 gcc_assert (mantissa >= 16 && mantissa <= 31);
8061 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8062 normalized significands are in the range [1, 2). (Our mantissa is shifted
8063 left 4 places at this point relative to normalized IEEE754 values). GCC
8064 internally uses [0.5, 1) (see real.c), so the exponent returned from
8065 REAL_EXP must be altered. */
8066 exponent = 5 - exponent;
8068 if (exponent < 0 || exponent > 7)
8071 /* Sign, mantissa and exponent are now in the correct form to plug into the
8072 formula described in the comment above. */
8073 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8076 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8078 vfp3_const_double_rtx (rtx x)
8083 return vfp3_const_double_index (x) != -1;
8086 /* Recognize immediates which can be used in various Neon instructions. Legal
8087 immediates are described by the following table (for VMVN variants, the
8088 bitwise inverse of the constant shown is recognized. In either case, VMOV
8089 is output and the correct instruction to use for a given constant is chosen
8090 by the assembler). The constant shown is replicated across all elements of
8091 the destination vector.
8093 insn elems variant constant (binary)
8094 ---- ----- ------- -----------------
8095 vmov i32 0 00000000 00000000 00000000 abcdefgh
8096 vmov i32 1 00000000 00000000 abcdefgh 00000000
8097 vmov i32 2 00000000 abcdefgh 00000000 00000000
8098 vmov i32 3 abcdefgh 00000000 00000000 00000000
8099 vmov i16 4 00000000 abcdefgh
8100 vmov i16 5 abcdefgh 00000000
8101 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8102 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8103 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8104 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8105 vmvn i16 10 00000000 abcdefgh
8106 vmvn i16 11 abcdefgh 00000000
8107 vmov i32 12 00000000 00000000 abcdefgh 11111111
8108 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8109 vmov i32 14 00000000 abcdefgh 11111111 11111111
8110 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8112 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8113 eeeeeeee ffffffff gggggggg hhhhhhhh
8114 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8116 For case 18, B = !b. Representable values are exactly those accepted by
8117 vfp3_const_double_index, but are output as floating-point numbers rather
8120 Variants 0-5 (inclusive) may also be used as immediates for the second
8121 operand of VORR/VBIC instructions.
8123 The INVERSE argument causes the bitwise inverse of the given operand to be
8124 recognized instead (used for recognizing legal immediates for the VAND/VORN
8125 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8126 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8127 output, rather than the real insns vbic/vorr).
8129 INVERSE makes no difference to the recognition of float vectors.
8131 The return value is the variant of immediate as shown in the above table, or
8132 -1 if the given value doesn't match any of the listed patterns.
8135 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8136 rtx *modconst, int *elementwidth)
8138 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8140 for (i = 0; i < idx; i += (STRIDE)) \
8145 immtype = (CLASS); \
8146 elsize = (ELSIZE); \
8150 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8151 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8152 unsigned char bytes[16];
8153 int immtype = -1, matches;
8154 unsigned int invmask = inverse ? 0xff : 0;
8156 /* Vectors of float constants. */
8157 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8159 rtx el0 = CONST_VECTOR_ELT (op, 0);
8162 if (!vfp3_const_double_rtx (el0))
8165 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8167 for (i = 1; i < n_elts; i++)
8169 rtx elt = CONST_VECTOR_ELT (op, i);
8172 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8174 if (!REAL_VALUES_EQUAL (r0, re))
8179 *modconst = CONST_VECTOR_ELT (op, 0);
8187 /* Splat vector constant out into a byte vector. */
8188 for (i = 0; i < n_elts; i++)
8190 rtx el = CONST_VECTOR_ELT (op, i);
8191 unsigned HOST_WIDE_INT elpart;
8192 unsigned int part, parts;
8194 if (GET_CODE (el) == CONST_INT)
8196 elpart = INTVAL (el);
8199 else if (GET_CODE (el) == CONST_DOUBLE)
8201 elpart = CONST_DOUBLE_LOW (el);
8207 for (part = 0; part < parts; part++)
8210 for (byte = 0; byte < innersize; byte++)
8212 bytes[idx++] = (elpart & 0xff) ^ invmask;
8213 elpart >>= BITS_PER_UNIT;
8215 if (GET_CODE (el) == CONST_DOUBLE)
8216 elpart = CONST_DOUBLE_HIGH (el);
8221 gcc_assert (idx == GET_MODE_SIZE (mode));
8225 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8226 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8228 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8229 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8231 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8232 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8234 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8235 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8237 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8239 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8241 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8242 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8244 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8245 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8247 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8248 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8250 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8251 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8253 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8255 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8257 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8258 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8260 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8261 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8263 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8264 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8266 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8267 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8269 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8271 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8272 && bytes[i] == bytes[(i + 8) % idx]);
8280 *elementwidth = elsize;
8284 unsigned HOST_WIDE_INT imm = 0;
8286 /* Un-invert bytes of recognized vector, if necessary. */
8288 for (i = 0; i < idx; i++)
8289 bytes[i] ^= invmask;
8293 /* FIXME: Broken on 32-bit H_W_I hosts. */
8294 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8296 for (i = 0; i < 8; i++)
8297 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8298 << (i * BITS_PER_UNIT);
8300 *modconst = GEN_INT (imm);
8304 unsigned HOST_WIDE_INT imm = 0;
8306 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8307 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8309 *modconst = GEN_INT (imm);
8317 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8318 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8319 float elements), and a modified constant (whatever should be output for a
8320 VMOV) in *MODCONST. */
8323 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8324 rtx *modconst, int *elementwidth)
8328 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8334 *modconst = tmpconst;
8337 *elementwidth = tmpwidth;
8342 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8343 the immediate is valid, write a constant suitable for using as an operand
8344 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8345 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8348 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8349 rtx *modconst, int *elementwidth)
8353 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8355 if (retval < 0 || retval > 5)
8359 *modconst = tmpconst;
8362 *elementwidth = tmpwidth;
8367 /* Return a string suitable for output of Neon immediate logic operation
8371 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8372 int inverse, int quad)
8374 int width, is_valid;
8375 static char templ[40];
8377 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8379 gcc_assert (is_valid != 0);
8382 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8384 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8389 /* Output a sequence of pairwise operations to implement a reduction.
8390 NOTE: We do "too much work" here, because pairwise operations work on two
8391 registers-worth of operands in one go. Unfortunately we can't exploit those
8392 extra calculations to do the full operation in fewer steps, I don't think.
8393 Although all vector elements of the result but the first are ignored, we
8394 actually calculate the same result in each of the elements. An alternative
8395 such as initially loading a vector with zero to use as each of the second
8396 operands would use up an additional register and take an extra instruction,
8397 for no particular gain. */
8400 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8401 rtx (*reduc) (rtx, rtx, rtx))
8403 enum machine_mode inner = GET_MODE_INNER (mode);
8404 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8407 for (i = parts / 2; i >= 1; i /= 2)
8409 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8410 emit_insn (reduc (dest, tmpsum, tmpsum));
8415 /* If VALS is a vector constant that can be loaded into a register
8416 using VDUP, generate instructions to do so and return an RTX to
8417 assign to the register. Otherwise return NULL_RTX. */
8420 neon_vdup_constant (rtx vals)
8422 enum machine_mode mode = GET_MODE (vals);
8423 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8424 int n_elts = GET_MODE_NUNITS (mode);
8425 bool all_same = true;
8429 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8432 for (i = 0; i < n_elts; ++i)
8434 x = XVECEXP (vals, 0, i);
8435 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8440 /* The elements are not all the same. We could handle repeating
8441 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8442 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8446 /* We can load this constant by using VDUP and a constant in a
8447 single ARM register. This will be cheaper than a vector
8450 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8451 return gen_rtx_VEC_DUPLICATE (mode, x);
8454 /* Generate code to load VALS, which is a PARALLEL containing only
8455 constants (for vec_init) or CONST_VECTOR, efficiently into a
8456 register. Returns an RTX to copy into the register, or NULL_RTX
8457 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8460 neon_make_constant (rtx vals)
8462 enum machine_mode mode = GET_MODE (vals);
8464 rtx const_vec = NULL_RTX;
8465 int n_elts = GET_MODE_NUNITS (mode);
8469 if (GET_CODE (vals) == CONST_VECTOR)
8471 else if (GET_CODE (vals) == PARALLEL)
8473 /* A CONST_VECTOR must contain only CONST_INTs and
8474 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8475 Only store valid constants in a CONST_VECTOR. */
8476 for (i = 0; i < n_elts; ++i)
8478 rtx x = XVECEXP (vals, 0, i);
8479 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8482 if (n_const == n_elts)
8483 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8488 if (const_vec != NULL
8489 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8490 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8492 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8493 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8494 pipeline cycle; creating the constant takes one or two ARM
8497 else if (const_vec != NULL_RTX)
8498 /* Load from constant pool. On Cortex-A8 this takes two cycles
8499 (for either double or quad vectors). We can not take advantage
8500 of single-cycle VLD1 because we need a PC-relative addressing
8504 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8505 We can not construct an initializer. */
8509 /* Initialize vector TARGET to VALS. */
8512 neon_expand_vector_init (rtx target, rtx vals)
8514 enum machine_mode mode = GET_MODE (target);
8515 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8516 int n_elts = GET_MODE_NUNITS (mode);
8517 int n_var = 0, one_var = -1;
8518 bool all_same = true;
8522 for (i = 0; i < n_elts; ++i)
8524 x = XVECEXP (vals, 0, i);
8525 if (!CONSTANT_P (x))
8526 ++n_var, one_var = i;
8528 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8534 rtx constant = neon_make_constant (vals);
8535 if (constant != NULL_RTX)
8537 emit_move_insn (target, constant);
8542 /* Splat a single non-constant element if we can. */
8543 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8545 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8546 emit_insn (gen_rtx_SET (VOIDmode, target,
8547 gen_rtx_VEC_DUPLICATE (mode, x)));
8551 /* One field is non-constant. Load constant then overwrite varying
8552 field. This is more efficient than using the stack. */
8555 rtx copy = copy_rtx (vals);
8556 rtx index = GEN_INT (one_var);
8558 /* Load constant part of vector, substitute neighboring value for
8560 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8561 neon_expand_vector_init (target, copy);
8563 /* Insert variable. */
8564 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8568 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8571 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8574 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8577 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8580 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8583 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8586 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8589 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8592 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8600 /* Construct the vector in memory one field at a time
8601 and load the whole vector. */
8602 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8603 for (i = 0; i < n_elts; i++)
8604 emit_move_insn (adjust_address_nv (mem, inner_mode,
8605 i * GET_MODE_SIZE (inner_mode)),
8606 XVECEXP (vals, 0, i));
8607 emit_move_insn (target, mem);
8610 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8611 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8612 reported source locations are bogus. */
8615 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8620 gcc_assert (GET_CODE (operand) == CONST_INT);
8622 lane = INTVAL (operand);
8624 if (lane < low || lane >= high)
8628 /* Bounds-check lanes. */
8631 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8633 bounds_check (operand, low, high, "lane out of range");
8636 /* Bounds-check constants. */
8639 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8641 bounds_check (operand, low, high, "constant out of range");
8645 neon_element_bits (enum machine_mode mode)
8648 return GET_MODE_BITSIZE (mode);
8650 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8654 /* Predicates for `match_operand' and `match_operator'. */
8656 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8658 cirrus_memory_offset (rtx op)
8660 /* Reject eliminable registers. */
8661 if (! (reload_in_progress || reload_completed)
8662 && ( reg_mentioned_p (frame_pointer_rtx, op)
8663 || reg_mentioned_p (arg_pointer_rtx, op)
8664 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8665 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8666 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8667 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8670 if (GET_CODE (op) == MEM)
8676 /* Match: (mem (reg)). */
8677 if (GET_CODE (ind) == REG)
8683 if (GET_CODE (ind) == PLUS
8684 && GET_CODE (XEXP (ind, 0)) == REG
8685 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8686 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8693 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8694 WB is true if full writeback address modes are allowed and is false
8695 if limited writeback address modes (POST_INC and PRE_DEC) are
8699 arm_coproc_mem_operand (rtx op, bool wb)
8703 /* Reject eliminable registers. */
8704 if (! (reload_in_progress || reload_completed)
8705 && ( reg_mentioned_p (frame_pointer_rtx, op)
8706 || reg_mentioned_p (arg_pointer_rtx, op)
8707 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8708 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8709 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8710 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8713 /* Constants are converted into offsets from labels. */
8714 if (GET_CODE (op) != MEM)
8719 if (reload_completed
8720 && (GET_CODE (ind) == LABEL_REF
8721 || (GET_CODE (ind) == CONST
8722 && GET_CODE (XEXP (ind, 0)) == PLUS
8723 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8724 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8727 /* Match: (mem (reg)). */
8728 if (GET_CODE (ind) == REG)
8729 return arm_address_register_rtx_p (ind, 0);
8731 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8732 acceptable in any case (subject to verification by
8733 arm_address_register_rtx_p). We need WB to be true to accept
8734 PRE_INC and POST_DEC. */
8735 if (GET_CODE (ind) == POST_INC
8736 || GET_CODE (ind) == PRE_DEC
8738 && (GET_CODE (ind) == PRE_INC
8739 || GET_CODE (ind) == POST_DEC)))
8740 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8743 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8744 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8745 && GET_CODE (XEXP (ind, 1)) == PLUS
8746 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8747 ind = XEXP (ind, 1);
8752 if (GET_CODE (ind) == PLUS
8753 && GET_CODE (XEXP (ind, 0)) == REG
8754 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8755 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8756 && INTVAL (XEXP (ind, 1)) > -1024
8757 && INTVAL (XEXP (ind, 1)) < 1024
8758 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8764 /* Return TRUE if OP is a memory operand which we can load or store a vector
8765 to/from. TYPE is one of the following values:
8766 0 - Vector load/stor (vldr)
8767 1 - Core registers (ldm)
8768 2 - Element/structure loads (vld1)
8771 neon_vector_mem_operand (rtx op, int type)
8775 /* Reject eliminable registers. */
8776 if (! (reload_in_progress || reload_completed)
8777 && ( reg_mentioned_p (frame_pointer_rtx, op)
8778 || reg_mentioned_p (arg_pointer_rtx, op)
8779 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8780 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8781 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8782 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8785 /* Constants are converted into offsets from labels. */
8786 if (GET_CODE (op) != MEM)
8791 if (reload_completed
8792 && (GET_CODE (ind) == LABEL_REF
8793 || (GET_CODE (ind) == CONST
8794 && GET_CODE (XEXP (ind, 0)) == PLUS
8795 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8796 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8799 /* Match: (mem (reg)). */
8800 if (GET_CODE (ind) == REG)
8801 return arm_address_register_rtx_p (ind, 0);
8803 /* Allow post-increment with Neon registers. */
8804 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8805 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8807 /* FIXME: vld1 allows register post-modify. */
8813 && GET_CODE (ind) == PLUS
8814 && GET_CODE (XEXP (ind, 0)) == REG
8815 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8816 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8817 && INTVAL (XEXP (ind, 1)) > -1024
8818 && INTVAL (XEXP (ind, 1)) < 1016
8819 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8825 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8828 neon_struct_mem_operand (rtx op)
8832 /* Reject eliminable registers. */
8833 if (! (reload_in_progress || reload_completed)
8834 && ( reg_mentioned_p (frame_pointer_rtx, op)
8835 || reg_mentioned_p (arg_pointer_rtx, op)
8836 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8837 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8838 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8839 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8842 /* Constants are converted into offsets from labels. */
8843 if (GET_CODE (op) != MEM)
8848 if (reload_completed
8849 && (GET_CODE (ind) == LABEL_REF
8850 || (GET_CODE (ind) == CONST
8851 && GET_CODE (XEXP (ind, 0)) == PLUS
8852 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8853 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8856 /* Match: (mem (reg)). */
8857 if (GET_CODE (ind) == REG)
8858 return arm_address_register_rtx_p (ind, 0);
8863 /* Return true if X is a register that will be eliminated later on. */
8865 arm_eliminable_register (rtx x)
8867 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8868 || REGNO (x) == ARG_POINTER_REGNUM
8869 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8870 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8873 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8874 coprocessor registers. Otherwise return NO_REGS. */
8877 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8881 if (!TARGET_NEON_FP16)
8882 return GENERAL_REGS;
8883 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8885 return GENERAL_REGS;
8889 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8890 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8891 && neon_vector_mem_operand (x, 0))
8894 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8897 return GENERAL_REGS;
8900 /* Values which must be returned in the most-significant end of the return
8904 arm_return_in_msb (const_tree valtype)
8906 return (TARGET_AAPCS_BASED
8908 && (AGGREGATE_TYPE_P (valtype)
8909 || TREE_CODE (valtype) == COMPLEX_TYPE));
8912 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8913 Use by the Cirrus Maverick code which has to workaround
8914 a hardware bug triggered by such instructions. */
8916 arm_memory_load_p (rtx insn)
8918 rtx body, lhs, rhs;;
8920 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8923 body = PATTERN (insn);
8925 if (GET_CODE (body) != SET)
8928 lhs = XEXP (body, 0);
8929 rhs = XEXP (body, 1);
8931 lhs = REG_OR_SUBREG_RTX (lhs);
8933 /* If the destination is not a general purpose
8934 register we do not have to worry. */
8935 if (GET_CODE (lhs) != REG
8936 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8939 /* As well as loads from memory we also have to react
8940 to loads of invalid constants which will be turned
8941 into loads from the minipool. */
8942 return (GET_CODE (rhs) == MEM
8943 || GET_CODE (rhs) == SYMBOL_REF
8944 || note_invalid_constants (insn, -1, false));
8947 /* Return TRUE if INSN is a Cirrus instruction. */
8949 arm_cirrus_insn_p (rtx insn)
8951 enum attr_cirrus attr;
8953 /* get_attr cannot accept USE or CLOBBER. */
8955 || GET_CODE (insn) != INSN
8956 || GET_CODE (PATTERN (insn)) == USE
8957 || GET_CODE (PATTERN (insn)) == CLOBBER)
8960 attr = get_attr_cirrus (insn);
8962 return attr != CIRRUS_NOT;
8965 /* Cirrus reorg for invalid instruction combinations. */
8967 cirrus_reorg (rtx first)
8969 enum attr_cirrus attr;
8970 rtx body = PATTERN (first);
8974 /* Any branch must be followed by 2 non Cirrus instructions. */
8975 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8978 t = next_nonnote_insn (first);
8980 if (arm_cirrus_insn_p (t))
8983 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8987 emit_insn_after (gen_nop (), first);
8992 /* (float (blah)) is in parallel with a clobber. */
8993 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8994 body = XVECEXP (body, 0, 0);
8996 if (GET_CODE (body) == SET)
8998 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9000 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9001 be followed by a non Cirrus insn. */
9002 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9004 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9005 emit_insn_after (gen_nop (), first);
9009 else if (arm_memory_load_p (first))
9011 unsigned int arm_regno;
9013 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9014 ldr/cfmv64hr combination where the Rd field is the same
9015 in both instructions must be split with a non Cirrus
9022 /* Get Arm register number for ldr insn. */
9023 if (GET_CODE (lhs) == REG)
9024 arm_regno = REGNO (lhs);
9027 gcc_assert (GET_CODE (rhs) == REG);
9028 arm_regno = REGNO (rhs);
9032 first = next_nonnote_insn (first);
9034 if (! arm_cirrus_insn_p (first))
9037 body = PATTERN (first);
9039 /* (float (blah)) is in parallel with a clobber. */
9040 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9041 body = XVECEXP (body, 0, 0);
9043 if (GET_CODE (body) == FLOAT)
9044 body = XEXP (body, 0);
9046 if (get_attr_cirrus (first) == CIRRUS_MOVE
9047 && GET_CODE (XEXP (body, 1)) == REG
9048 && arm_regno == REGNO (XEXP (body, 1)))
9049 emit_insn_after (gen_nop (), first);
9055 /* get_attr cannot accept USE or CLOBBER. */
9057 || GET_CODE (first) != INSN
9058 || GET_CODE (PATTERN (first)) == USE
9059 || GET_CODE (PATTERN (first)) == CLOBBER)
9062 attr = get_attr_cirrus (first);
9064 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9065 must be followed by a non-coprocessor instruction. */
9066 if (attr == CIRRUS_COMPARE)
9070 t = next_nonnote_insn (first);
9072 if (arm_cirrus_insn_p (t))
9075 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9079 emit_insn_after (gen_nop (), first);
9085 /* Return TRUE if X references a SYMBOL_REF. */
9087 symbol_mentioned_p (rtx x)
9092 if (GET_CODE (x) == SYMBOL_REF)
9095 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9096 are constant offsets, not symbols. */
9097 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9100 fmt = GET_RTX_FORMAT (GET_CODE (x));
9102 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9108 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9109 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9112 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9119 /* Return TRUE if X references a LABEL_REF. */
9121 label_mentioned_p (rtx x)
9126 if (GET_CODE (x) == LABEL_REF)
9129 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9130 instruction, but they are constant offsets, not symbols. */
9131 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9134 fmt = GET_RTX_FORMAT (GET_CODE (x));
9135 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9141 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9142 if (label_mentioned_p (XVECEXP (x, i, j)))
9145 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9153 tls_mentioned_p (rtx x)
9155 switch (GET_CODE (x))
9158 return tls_mentioned_p (XEXP (x, 0));
9161 if (XINT (x, 1) == UNSPEC_TLS)
9169 /* Must not copy any rtx that uses a pc-relative address. */
9172 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9174 if (GET_CODE (*x) == UNSPEC
9175 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9181 arm_cannot_copy_insn_p (rtx insn)
9183 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9189 enum rtx_code code = GET_CODE (x);
9206 /* Return 1 if memory locations are adjacent. */
9208 adjacent_mem_locations (rtx a, rtx b)
9210 /* We don't guarantee to preserve the order of these memory refs. */
9211 if (volatile_refs_p (a) || volatile_refs_p (b))
9214 if ((GET_CODE (XEXP (a, 0)) == REG
9215 || (GET_CODE (XEXP (a, 0)) == PLUS
9216 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9217 && (GET_CODE (XEXP (b, 0)) == REG
9218 || (GET_CODE (XEXP (b, 0)) == PLUS
9219 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9221 HOST_WIDE_INT val0 = 0, val1 = 0;
9225 if (GET_CODE (XEXP (a, 0)) == PLUS)
9227 reg0 = XEXP (XEXP (a, 0), 0);
9228 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9233 if (GET_CODE (XEXP (b, 0)) == PLUS)
9235 reg1 = XEXP (XEXP (b, 0), 0);
9236 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9241 /* Don't accept any offset that will require multiple
9242 instructions to handle, since this would cause the
9243 arith_adjacentmem pattern to output an overlong sequence. */
9244 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9247 /* Don't allow an eliminable register: register elimination can make
9248 the offset too large. */
9249 if (arm_eliminable_register (reg0))
9252 val_diff = val1 - val0;
9256 /* If the target has load delay slots, then there's no benefit
9257 to using an ldm instruction unless the offset is zero and
9258 we are optimizing for size. */
9259 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9260 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9261 && (val_diff == 4 || val_diff == -4));
9264 return ((REGNO (reg0) == REGNO (reg1))
9265 && (val_diff == 4 || val_diff == -4));
9271 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9272 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9273 instruction. ADD_OFFSET is nonzero if the base address register needs
9274 to be modified with an add instruction before we can use it. */
9277 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9278 int nops, HOST_WIDE_INT add_offset)
9280 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9281 if the offset isn't small enough. The reason 2 ldrs are faster
9282 is because these ARMs are able to do more than one cache access
9283 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9284 whilst the ARM8 has a double bandwidth cache. This means that
9285 these cores can do both an instruction fetch and a data fetch in
9286 a single cycle, so the trick of calculating the address into a
9287 scratch register (one of the result regs) and then doing a load
9288 multiple actually becomes slower (and no smaller in code size).
9289 That is the transformation
9291 ldr rd1, [rbase + offset]
9292 ldr rd2, [rbase + offset + 4]
9296 add rd1, rbase, offset
9297 ldmia rd1, {rd1, rd2}
9299 produces worse code -- '3 cycles + any stalls on rd2' instead of
9300 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9301 access per cycle, the first sequence could never complete in less
9302 than 6 cycles, whereas the ldm sequence would only take 5 and
9303 would make better use of sequential accesses if not hitting the
9306 We cheat here and test 'arm_ld_sched' which we currently know to
9307 only be true for the ARM8, ARM9 and StrongARM. If this ever
9308 changes, then the test below needs to be reworked. */
9309 if (nops == 2 && arm_ld_sched && add_offset != 0)
9312 /* XScale has load-store double instructions, but they have stricter
9313 alignment requirements than load-store multiple, so we cannot
9316 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9317 the pipeline until completion.
9325 An ldr instruction takes 1-3 cycles, but does not block the
9334 Best case ldr will always win. However, the more ldr instructions
9335 we issue, the less likely we are to be able to schedule them well.
9336 Using ldr instructions also increases code size.
9338 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9339 for counts of 3 or 4 regs. */
9340 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9345 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9346 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9347 an array ORDER which describes the sequence to use when accessing the
9348 offsets that produces an ascending order. In this sequence, each
9349 offset must be larger by exactly 4 than the previous one. ORDER[0]
9350 must have been filled in with the lowest offset by the caller.
9351 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9352 we use to verify that ORDER produces an ascending order of registers.
9353 Return true if it was possible to construct such an order, false if
9357 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9361 for (i = 1; i < nops; i++)
9365 order[i] = order[i - 1];
9366 for (j = 0; j < nops; j++)
9367 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9369 /* We must find exactly one offset that is higher than the
9370 previous one by 4. */
9371 if (order[i] != order[i - 1])
9375 if (order[i] == order[i - 1])
9377 /* The register numbers must be ascending. */
9378 if (unsorted_regs != NULL
9379 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9385 /* Used to determine in a peephole whether a sequence of load
9386 instructions can be changed into a load-multiple instruction.
9387 NOPS is the number of separate load instructions we are examining. The
9388 first NOPS entries in OPERANDS are the destination registers, the
9389 next NOPS entries are memory operands. If this function is
9390 successful, *BASE is set to the common base register of the memory
9391 accesses; *LOAD_OFFSET is set to the first memory location's offset
9392 from that base register.
9393 REGS is an array filled in with the destination register numbers.
9394 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9395 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9396 the sequence of registers in REGS matches the loads from ascending memory
9397 locations, and the function verifies that the register numbers are
9398 themselves ascending. If CHECK_REGS is false, the register numbers
9399 are stored in the order they are found in the operands. */
9401 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9402 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9404 int unsorted_regs[MAX_LDM_STM_OPS];
9405 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9406 int order[MAX_LDM_STM_OPS];
9407 rtx base_reg_rtx = NULL;
9411 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9412 easily extended if required. */
9413 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9415 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9417 /* Loop over the operands and check that the memory references are
9418 suitable (i.e. immediate offsets from the same base register). At
9419 the same time, extract the target register, and the memory
9421 for (i = 0; i < nops; i++)
9426 /* Convert a subreg of a mem into the mem itself. */
9427 if (GET_CODE (operands[nops + i]) == SUBREG)
9428 operands[nops + i] = alter_subreg (operands + (nops + i));
9430 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9432 /* Don't reorder volatile memory references; it doesn't seem worth
9433 looking for the case where the order is ok anyway. */
9434 if (MEM_VOLATILE_P (operands[nops + i]))
9437 offset = const0_rtx;
9439 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9440 || (GET_CODE (reg) == SUBREG
9441 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9442 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9443 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9445 || (GET_CODE (reg) == SUBREG
9446 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9447 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9452 base_reg = REGNO (reg);
9454 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9457 else if (base_reg != (int) REGNO (reg))
9458 /* Not addressed from the same base register. */
9461 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9462 ? REGNO (operands[i])
9463 : REGNO (SUBREG_REG (operands[i])));
9465 /* If it isn't an integer register, or if it overwrites the
9466 base register but isn't the last insn in the list, then
9467 we can't do this. */
9468 if (unsorted_regs[i] < 0
9469 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9470 || unsorted_regs[i] > 14
9471 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9474 unsorted_offsets[i] = INTVAL (offset);
9475 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9479 /* Not a suitable memory address. */
9483 /* All the useful information has now been extracted from the
9484 operands into unsorted_regs and unsorted_offsets; additionally,
9485 order[0] has been set to the lowest offset in the list. Sort
9486 the offsets into order, verifying that they are adjacent, and
9487 check that the register numbers are ascending. */
9488 if (!compute_offset_order (nops, unsorted_offsets, order,
9489 check_regs ? unsorted_regs : NULL))
9493 memcpy (saved_order, order, sizeof order);
9499 for (i = 0; i < nops; i++)
9500 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9502 *load_offset = unsorted_offsets[order[0]];
9506 && !peep2_reg_dead_p (nops, base_reg_rtx))
9509 if (unsorted_offsets[order[0]] == 0)
9510 ldm_case = 1; /* ldmia */
9511 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9512 ldm_case = 2; /* ldmib */
9513 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9514 ldm_case = 3; /* ldmda */
9515 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9516 ldm_case = 4; /* ldmdb */
9517 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9518 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9523 if (!multiple_operation_profitable_p (false, nops,
9525 ? unsorted_offsets[order[0]] : 0))
9531 /* Used to determine in a peephole whether a sequence of store instructions can
9532 be changed into a store-multiple instruction.
9533 NOPS is the number of separate store instructions we are examining.
9534 NOPS_TOTAL is the total number of instructions recognized by the peephole
9536 The first NOPS entries in OPERANDS are the source registers, the next
9537 NOPS entries are memory operands. If this function is successful, *BASE is
9538 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9539 to the first memory location's offset from that base register. REGS is an
9540 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9541 likewise filled with the corresponding rtx's.
9542 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9543 numbers to to an ascending order of stores.
9544 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9545 from ascending memory locations, and the function verifies that the register
9546 numbers are themselves ascending. If CHECK_REGS is false, the register
9547 numbers are stored in the order they are found in the operands. */
9549 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9550 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9551 HOST_WIDE_INT *load_offset, bool check_regs)
9553 int unsorted_regs[MAX_LDM_STM_OPS];
9554 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9555 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9556 int order[MAX_LDM_STM_OPS];
9558 rtx base_reg_rtx = NULL;
9561 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9562 easily extended if required. */
9563 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9565 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9567 /* Loop over the operands and check that the memory references are
9568 suitable (i.e. immediate offsets from the same base register). At
9569 the same time, extract the target register, and the memory
9571 for (i = 0; i < nops; i++)
9576 /* Convert a subreg of a mem into the mem itself. */
9577 if (GET_CODE (operands[nops + i]) == SUBREG)
9578 operands[nops + i] = alter_subreg (operands + (nops + i));
9580 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9582 /* Don't reorder volatile memory references; it doesn't seem worth
9583 looking for the case where the order is ok anyway. */
9584 if (MEM_VOLATILE_P (operands[nops + i]))
9587 offset = const0_rtx;
9589 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9590 || (GET_CODE (reg) == SUBREG
9591 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9592 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9593 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9595 || (GET_CODE (reg) == SUBREG
9596 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9597 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9600 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9601 ? operands[i] : SUBREG_REG (operands[i]));
9602 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9606 base_reg = REGNO (reg);
9608 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9611 else if (base_reg != (int) REGNO (reg))
9612 /* Not addressed from the same base register. */
9615 /* If it isn't an integer register, then we can't do this. */
9616 if (unsorted_regs[i] < 0
9617 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9618 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9619 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9620 || unsorted_regs[i] > 14)
9623 unsorted_offsets[i] = INTVAL (offset);
9624 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9628 /* Not a suitable memory address. */
9632 /* All the useful information has now been extracted from the
9633 operands into unsorted_regs and unsorted_offsets; additionally,
9634 order[0] has been set to the lowest offset in the list. Sort
9635 the offsets into order, verifying that they are adjacent, and
9636 check that the register numbers are ascending. */
9637 if (!compute_offset_order (nops, unsorted_offsets, order,
9638 check_regs ? unsorted_regs : NULL))
9642 memcpy (saved_order, order, sizeof order);
9648 for (i = 0; i < nops; i++)
9650 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9652 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9655 *load_offset = unsorted_offsets[order[0]];
9659 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9662 if (unsorted_offsets[order[0]] == 0)
9663 stm_case = 1; /* stmia */
9664 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9665 stm_case = 2; /* stmib */
9666 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9667 stm_case = 3; /* stmda */
9668 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9669 stm_case = 4; /* stmdb */
9673 if (!multiple_operation_profitable_p (false, nops, 0))
9679 /* Routines for use in generating RTL. */
9681 /* Generate a load-multiple instruction. COUNT is the number of loads in
9682 the instruction; REGS and MEMS are arrays containing the operands.
9683 BASEREG is the base register to be used in addressing the memory operands.
9684 WBACK_OFFSET is nonzero if the instruction should update the base
9688 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9689 HOST_WIDE_INT wback_offset)
9694 if (!multiple_operation_profitable_p (false, count, 0))
9700 for (i = 0; i < count; i++)
9701 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9703 if (wback_offset != 0)
9704 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9712 result = gen_rtx_PARALLEL (VOIDmode,
9713 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9714 if (wback_offset != 0)
9716 XVECEXP (result, 0, 0)
9717 = gen_rtx_SET (VOIDmode, basereg,
9718 plus_constant (basereg, wback_offset));
9723 for (j = 0; i < count; i++, j++)
9724 XVECEXP (result, 0, i)
9725 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9730 /* Generate a store-multiple instruction. COUNT is the number of stores in
9731 the instruction; REGS and MEMS are arrays containing the operands.
9732 BASEREG is the base register to be used in addressing the memory operands.
9733 WBACK_OFFSET is nonzero if the instruction should update the base
9737 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9738 HOST_WIDE_INT wback_offset)
9743 if (GET_CODE (basereg) == PLUS)
9744 basereg = XEXP (basereg, 0);
9746 if (!multiple_operation_profitable_p (false, count, 0))
9752 for (i = 0; i < count; i++)
9753 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9755 if (wback_offset != 0)
9756 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9764 result = gen_rtx_PARALLEL (VOIDmode,
9765 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9766 if (wback_offset != 0)
9768 XVECEXP (result, 0, 0)
9769 = gen_rtx_SET (VOIDmode, basereg,
9770 plus_constant (basereg, wback_offset));
9775 for (j = 0; i < count; i++, j++)
9776 XVECEXP (result, 0, i)
9777 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9782 /* Generate either a load-multiple or a store-multiple instruction. This
9783 function can be used in situations where we can start with a single MEM
9784 rtx and adjust its address upwards.
9785 COUNT is the number of operations in the instruction, not counting a
9786 possible update of the base register. REGS is an array containing the
9788 BASEREG is the base register to be used in addressing the memory operands,
9789 which are constructed from BASEMEM.
9790 WRITE_BACK specifies whether the generated instruction should include an
9791 update of the base register.
9792 OFFSETP is used to pass an offset to and from this function; this offset
9793 is not used when constructing the address (instead BASEMEM should have an
9794 appropriate offset in its address), it is used only for setting
9795 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9798 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9799 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9801 rtx mems[MAX_LDM_STM_OPS];
9802 HOST_WIDE_INT offset = *offsetp;
9805 gcc_assert (count <= MAX_LDM_STM_OPS);
9807 if (GET_CODE (basereg) == PLUS)
9808 basereg = XEXP (basereg, 0);
9810 for (i = 0; i < count; i++)
9812 rtx addr = plus_constant (basereg, i * 4);
9813 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9821 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9822 write_back ? 4 * count : 0);
9824 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9825 write_back ? 4 * count : 0);
9829 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9830 rtx basemem, HOST_WIDE_INT *offsetp)
9832 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9837 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9838 rtx basemem, HOST_WIDE_INT *offsetp)
9840 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9844 /* Called from a peephole2 expander to turn a sequence of loads into an
9845 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9846 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9847 is true if we can reorder the registers because they are used commutatively
9849 Returns true iff we could generate a new instruction. */
9852 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9854 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9855 rtx mems[MAX_LDM_STM_OPS];
9858 HOST_WIDE_INT offset;
9859 int write_back = FALSE;
9863 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9864 &base_reg, &offset, !sort_regs);
9870 for (i = 0; i < nops - 1; i++)
9871 for (j = i + 1; j < nops; j++)
9872 if (regs[i] > regs[j])
9878 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9882 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9883 gcc_assert (ldm_case == 1 || ldm_case == 5);
9889 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
9890 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
9895 base_reg_rtx = newbase;
9899 for (i = 0; i < nops; i++)
9901 addr = plus_constant (base_reg_rtx, offset + i * 4);
9902 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9905 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
9906 write_back ? offset + i * 4 : 0));
9910 /* Called from a peephole2 expander to turn a sequence of stores into an
9911 STM instruction. OPERANDS are the operands found by the peephole matcher;
9912 NOPS indicates how many separate stores we are trying to combine.
9913 Returns true iff we could generate a new instruction. */
9916 gen_stm_seq (rtx *operands, int nops)
9919 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9920 rtx mems[MAX_LDM_STM_OPS];
9923 HOST_WIDE_INT offset;
9924 int write_back = FALSE;
9929 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
9930 mem_order, &base_reg, &offset, true);
9935 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9937 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
9940 gcc_assert (base_reg_dies);
9946 gcc_assert (base_reg_dies);
9947 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
9951 addr = plus_constant (base_reg_rtx, offset);
9953 for (i = 0; i < nops; i++)
9955 addr = plus_constant (base_reg_rtx, offset + i * 4);
9956 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9959 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
9960 write_back ? offset + i * 4 : 0));
9964 /* Called from a peephole2 expander to turn a sequence of stores that are
9965 preceded by constant loads into an STM instruction. OPERANDS are the
9966 operands found by the peephole matcher; NOPS indicates how many
9967 separate stores we are trying to combine; there are 2 * NOPS
9968 instructions in the peephole.
9969 Returns true iff we could generate a new instruction. */
9972 gen_const_stm_seq (rtx *operands, int nops)
9974 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
9975 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9976 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
9977 rtx mems[MAX_LDM_STM_OPS];
9980 HOST_WIDE_INT offset;
9981 int write_back = FALSE;
9986 HARD_REG_SET allocated;
9988 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
9989 mem_order, &base_reg, &offset, false);
9994 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
9996 /* If the same register is used more than once, try to find a free
9998 CLEAR_HARD_REG_SET (allocated);
9999 for (i = 0; i < nops; i++)
10001 for (j = i + 1; j < nops; j++)
10002 if (regs[i] == regs[j])
10004 rtx t = peep2_find_free_register (0, nops * 2,
10005 TARGET_THUMB1 ? "l" : "r",
10006 SImode, &allocated);
10010 regs[i] = REGNO (t);
10014 /* Compute an ordering that maps the register numbers to an ascending
10017 for (i = 0; i < nops; i++)
10018 if (regs[i] < regs[reg_order[0]])
10021 for (i = 1; i < nops; i++)
10023 int this_order = reg_order[i - 1];
10024 for (j = 0; j < nops; j++)
10025 if (regs[j] > regs[reg_order[i - 1]]
10026 && (this_order == reg_order[i - 1]
10027 || regs[j] < regs[this_order]))
10029 reg_order[i] = this_order;
10032 /* Ensure that registers that must be live after the instruction end
10033 up with the correct value. */
10034 for (i = 0; i < nops; i++)
10036 int this_order = reg_order[i];
10037 if ((this_order != mem_order[i]
10038 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10039 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10043 /* Load the constants. */
10044 for (i = 0; i < nops; i++)
10046 rtx op = operands[2 * nops + mem_order[i]];
10047 sorted_regs[i] = regs[reg_order[i]];
10048 emit_move_insn (reg_rtxs[reg_order[i]], op);
10051 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10053 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10056 gcc_assert (base_reg_dies);
10062 gcc_assert (base_reg_dies);
10063 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10067 addr = plus_constant (base_reg_rtx, offset);
10069 for (i = 0; i < nops; i++)
10071 addr = plus_constant (base_reg_rtx, offset + i * 4);
10072 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10075 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10076 write_back ? offset + i * 4 : 0));
10081 arm_gen_movmemqi (rtx *operands)
10083 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10084 HOST_WIDE_INT srcoffset, dstoffset;
10086 rtx src, dst, srcbase, dstbase;
10087 rtx part_bytes_reg = NULL;
10090 if (GET_CODE (operands[2]) != CONST_INT
10091 || GET_CODE (operands[3]) != CONST_INT
10092 || INTVAL (operands[2]) > 64
10093 || INTVAL (operands[3]) & 3)
10096 dstbase = operands[0];
10097 srcbase = operands[1];
10099 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10100 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10102 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10103 out_words_to_go = INTVAL (operands[2]) / 4;
10104 last_bytes = INTVAL (operands[2]) & 3;
10105 dstoffset = srcoffset = 0;
10107 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10108 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10110 for (i = 0; in_words_to_go >= 2; i+=4)
10112 if (in_words_to_go > 4)
10113 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10114 TRUE, srcbase, &srcoffset));
10116 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10117 src, FALSE, srcbase,
10120 if (out_words_to_go)
10122 if (out_words_to_go > 4)
10123 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10124 TRUE, dstbase, &dstoffset));
10125 else if (out_words_to_go != 1)
10126 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10127 out_words_to_go, dst,
10130 dstbase, &dstoffset));
10133 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10134 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10135 if (last_bytes != 0)
10137 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10143 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10144 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10147 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10148 if (out_words_to_go)
10152 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10153 sreg = copy_to_reg (mem);
10155 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10156 emit_move_insn (mem, sreg);
10159 gcc_assert (!in_words_to_go); /* Sanity check */
10162 if (in_words_to_go)
10164 gcc_assert (in_words_to_go > 0);
10166 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10167 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10170 gcc_assert (!last_bytes || part_bytes_reg);
10172 if (BYTES_BIG_ENDIAN && last_bytes)
10174 rtx tmp = gen_reg_rtx (SImode);
10176 /* The bytes we want are in the top end of the word. */
10177 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10178 GEN_INT (8 * (4 - last_bytes))));
10179 part_bytes_reg = tmp;
10183 mem = adjust_automodify_address (dstbase, QImode,
10184 plus_constant (dst, last_bytes - 1),
10185 dstoffset + last_bytes - 1);
10186 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10190 tmp = gen_reg_rtx (SImode);
10191 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10192 part_bytes_reg = tmp;
10199 if (last_bytes > 1)
10201 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10202 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10206 rtx tmp = gen_reg_rtx (SImode);
10207 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10208 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10209 part_bytes_reg = tmp;
10216 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10217 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10224 /* Select a dominance comparison mode if possible for a test of the general
10225 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10226 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10227 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10228 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10229 In all cases OP will be either EQ or NE, but we don't need to know which
10230 here. If we are unable to support a dominance comparison we return
10231 CC mode. This will then fail to match for the RTL expressions that
10232 generate this call. */
10234 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10236 enum rtx_code cond1, cond2;
10239 /* Currently we will probably get the wrong result if the individual
10240 comparisons are not simple. This also ensures that it is safe to
10241 reverse a comparison if necessary. */
10242 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10244 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10248 /* The if_then_else variant of this tests the second condition if the
10249 first passes, but is true if the first fails. Reverse the first
10250 condition to get a true "inclusive-or" expression. */
10251 if (cond_or == DOM_CC_NX_OR_Y)
10252 cond1 = reverse_condition (cond1);
10254 /* If the comparisons are not equal, and one doesn't dominate the other,
10255 then we can't do this. */
10257 && !comparison_dominates_p (cond1, cond2)
10258 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10263 enum rtx_code temp = cond1;
10271 if (cond_or == DOM_CC_X_AND_Y)
10276 case EQ: return CC_DEQmode;
10277 case LE: return CC_DLEmode;
10278 case LEU: return CC_DLEUmode;
10279 case GE: return CC_DGEmode;
10280 case GEU: return CC_DGEUmode;
10281 default: gcc_unreachable ();
10285 if (cond_or == DOM_CC_X_AND_Y)
10297 gcc_unreachable ();
10301 if (cond_or == DOM_CC_X_AND_Y)
10313 gcc_unreachable ();
10317 if (cond_or == DOM_CC_X_AND_Y)
10318 return CC_DLTUmode;
10323 return CC_DLTUmode;
10325 return CC_DLEUmode;
10329 gcc_unreachable ();
10333 if (cond_or == DOM_CC_X_AND_Y)
10334 return CC_DGTUmode;
10339 return CC_DGTUmode;
10341 return CC_DGEUmode;
10345 gcc_unreachable ();
10348 /* The remaining cases only occur when both comparisons are the
10351 gcc_assert (cond1 == cond2);
10355 gcc_assert (cond1 == cond2);
10359 gcc_assert (cond1 == cond2);
10363 gcc_assert (cond1 == cond2);
10364 return CC_DLEUmode;
10367 gcc_assert (cond1 == cond2);
10368 return CC_DGEUmode;
10371 gcc_unreachable ();
10376 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10378 /* All floating point compares return CCFP if it is an equality
10379 comparison, and CCFPE otherwise. */
10380 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10400 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10405 gcc_unreachable ();
10409 /* A compare with a shifted operand. Because of canonicalization, the
10410 comparison will have to be swapped when we emit the assembler. */
10411 if (GET_MODE (y) == SImode
10412 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10413 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10414 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10415 || GET_CODE (x) == ROTATERT))
10418 /* This operation is performed swapped, but since we only rely on the Z
10419 flag we don't need an additional mode. */
10420 if (GET_MODE (y) == SImode
10421 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10422 && GET_CODE (x) == NEG
10423 && (op == EQ || op == NE))
10426 /* This is a special case that is used by combine to allow a
10427 comparison of a shifted byte load to be split into a zero-extend
10428 followed by a comparison of the shifted integer (only valid for
10429 equalities and unsigned inequalities). */
10430 if (GET_MODE (x) == SImode
10431 && GET_CODE (x) == ASHIFT
10432 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10433 && GET_CODE (XEXP (x, 0)) == SUBREG
10434 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10435 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10436 && (op == EQ || op == NE
10437 || op == GEU || op == GTU || op == LTU || op == LEU)
10438 && GET_CODE (y) == CONST_INT)
10441 /* A construct for a conditional compare, if the false arm contains
10442 0, then both conditions must be true, otherwise either condition
10443 must be true. Not all conditions are possible, so CCmode is
10444 returned if it can't be done. */
10445 if (GET_CODE (x) == IF_THEN_ELSE
10446 && (XEXP (x, 2) == const0_rtx
10447 || XEXP (x, 2) == const1_rtx)
10448 && COMPARISON_P (XEXP (x, 0))
10449 && COMPARISON_P (XEXP (x, 1)))
10450 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10451 INTVAL (XEXP (x, 2)));
10453 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10454 if (GET_CODE (x) == AND
10455 && COMPARISON_P (XEXP (x, 0))
10456 && COMPARISON_P (XEXP (x, 1)))
10457 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10460 if (GET_CODE (x) == IOR
10461 && COMPARISON_P (XEXP (x, 0))
10462 && COMPARISON_P (XEXP (x, 1)))
10463 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10466 /* An operation (on Thumb) where we want to test for a single bit.
10467 This is done by shifting that bit up into the top bit of a
10468 scratch register; we can then branch on the sign bit. */
10470 && GET_MODE (x) == SImode
10471 && (op == EQ || op == NE)
10472 && GET_CODE (x) == ZERO_EXTRACT
10473 && XEXP (x, 1) == const1_rtx)
10476 /* An operation that sets the condition codes as a side-effect, the
10477 V flag is not set correctly, so we can only use comparisons where
10478 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10480 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10481 if (GET_MODE (x) == SImode
10483 && (op == EQ || op == NE || op == LT || op == GE)
10484 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10485 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10486 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10487 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10488 || GET_CODE (x) == LSHIFTRT
10489 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10490 || GET_CODE (x) == ROTATERT
10491 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10492 return CC_NOOVmode;
10494 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10497 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10498 && GET_CODE (x) == PLUS
10499 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10502 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10504 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10506 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10513 /* A DImode comparison against zero can be implemented by
10514 or'ing the two halves together. */
10515 if (y == const0_rtx)
10518 /* We can do an equality test in three Thumb instructions. */
10528 /* DImode unsigned comparisons can be implemented by cmp +
10529 cmpeq without a scratch register. Not worth doing in
10540 /* DImode signed and unsigned comparisons can be implemented
10541 by cmp + sbcs with a scratch register, but that does not
10542 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10543 gcc_assert (op != EQ && op != NE);
10547 gcc_unreachable ();
10554 /* X and Y are two things to compare using CODE. Emit the compare insn and
10555 return the rtx for register 0 in the proper mode. FP means this is a
10556 floating point compare: I don't think that it is needed on the arm. */
10558 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10560 enum machine_mode mode;
10562 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10564 /* We might have X as a constant, Y as a register because of the predicates
10565 used for cmpdi. If so, force X to a register here. */
10566 if (dimode_comparison && !REG_P (x))
10567 x = force_reg (DImode, x);
10569 mode = SELECT_CC_MODE (code, x, y);
10570 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10572 if (dimode_comparison
10573 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10574 && mode != CC_CZmode)
10578 /* To compare two non-zero values for equality, XOR them and
10579 then compare against zero. Not used for ARM mode; there
10580 CC_CZmode is cheaper. */
10581 if (mode == CC_Zmode && y != const0_rtx)
10583 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10586 /* A scratch register is required. */
10587 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10588 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10589 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10592 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10597 /* Generate a sequence of insns that will generate the correct return
10598 address mask depending on the physical architecture that the program
10601 arm_gen_return_addr_mask (void)
10603 rtx reg = gen_reg_rtx (Pmode);
10605 emit_insn (gen_return_addr_mask (reg));
10610 arm_reload_in_hi (rtx *operands)
10612 rtx ref = operands[1];
10614 HOST_WIDE_INT offset = 0;
10616 if (GET_CODE (ref) == SUBREG)
10618 offset = SUBREG_BYTE (ref);
10619 ref = SUBREG_REG (ref);
10622 if (GET_CODE (ref) == REG)
10624 /* We have a pseudo which has been spilt onto the stack; there
10625 are two cases here: the first where there is a simple
10626 stack-slot replacement and a second where the stack-slot is
10627 out of range, or is used as a subreg. */
10628 if (reg_equiv_mem[REGNO (ref)])
10630 ref = reg_equiv_mem[REGNO (ref)];
10631 base = find_replacement (&XEXP (ref, 0));
10634 /* The slot is out of range, or was dressed up in a SUBREG. */
10635 base = reg_equiv_address[REGNO (ref)];
10638 base = find_replacement (&XEXP (ref, 0));
10640 /* Handle the case where the address is too complex to be offset by 1. */
10641 if (GET_CODE (base) == MINUS
10642 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10644 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10646 emit_set_insn (base_plus, base);
10649 else if (GET_CODE (base) == PLUS)
10651 /* The addend must be CONST_INT, or we would have dealt with it above. */
10652 HOST_WIDE_INT hi, lo;
10654 offset += INTVAL (XEXP (base, 1));
10655 base = XEXP (base, 0);
10657 /* Rework the address into a legal sequence of insns. */
10658 /* Valid range for lo is -4095 -> 4095 */
10661 : -((-offset) & 0xfff));
10663 /* Corner case, if lo is the max offset then we would be out of range
10664 once we have added the additional 1 below, so bump the msb into the
10665 pre-loading insn(s). */
10669 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10670 ^ (HOST_WIDE_INT) 0x80000000)
10671 - (HOST_WIDE_INT) 0x80000000);
10673 gcc_assert (hi + lo == offset);
10677 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10679 /* Get the base address; addsi3 knows how to handle constants
10680 that require more than one insn. */
10681 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10687 /* Operands[2] may overlap operands[0] (though it won't overlap
10688 operands[1]), that's why we asked for a DImode reg -- so we can
10689 use the bit that does not overlap. */
10690 if (REGNO (operands[2]) == REGNO (operands[0]))
10691 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10693 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10695 emit_insn (gen_zero_extendqisi2 (scratch,
10696 gen_rtx_MEM (QImode,
10697 plus_constant (base,
10699 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10700 gen_rtx_MEM (QImode,
10701 plus_constant (base,
10703 if (!BYTES_BIG_ENDIAN)
10704 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10705 gen_rtx_IOR (SImode,
10708 gen_rtx_SUBREG (SImode, operands[0], 0),
10712 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10713 gen_rtx_IOR (SImode,
10714 gen_rtx_ASHIFT (SImode, scratch,
10716 gen_rtx_SUBREG (SImode, operands[0], 0)));
10719 /* Handle storing a half-word to memory during reload by synthesizing as two
10720 byte stores. Take care not to clobber the input values until after we
10721 have moved them somewhere safe. This code assumes that if the DImode
10722 scratch in operands[2] overlaps either the input value or output address
10723 in some way, then that value must die in this insn (we absolutely need
10724 two scratch registers for some corner cases). */
10726 arm_reload_out_hi (rtx *operands)
10728 rtx ref = operands[0];
10729 rtx outval = operands[1];
10731 HOST_WIDE_INT offset = 0;
10733 if (GET_CODE (ref) == SUBREG)
10735 offset = SUBREG_BYTE (ref);
10736 ref = SUBREG_REG (ref);
10739 if (GET_CODE (ref) == REG)
10741 /* We have a pseudo which has been spilt onto the stack; there
10742 are two cases here: the first where there is a simple
10743 stack-slot replacement and a second where the stack-slot is
10744 out of range, or is used as a subreg. */
10745 if (reg_equiv_mem[REGNO (ref)])
10747 ref = reg_equiv_mem[REGNO (ref)];
10748 base = find_replacement (&XEXP (ref, 0));
10751 /* The slot is out of range, or was dressed up in a SUBREG. */
10752 base = reg_equiv_address[REGNO (ref)];
10755 base = find_replacement (&XEXP (ref, 0));
10757 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10759 /* Handle the case where the address is too complex to be offset by 1. */
10760 if (GET_CODE (base) == MINUS
10761 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10763 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10765 /* Be careful not to destroy OUTVAL. */
10766 if (reg_overlap_mentioned_p (base_plus, outval))
10768 /* Updating base_plus might destroy outval, see if we can
10769 swap the scratch and base_plus. */
10770 if (!reg_overlap_mentioned_p (scratch, outval))
10773 scratch = base_plus;
10778 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10780 /* Be conservative and copy OUTVAL into the scratch now,
10781 this should only be necessary if outval is a subreg
10782 of something larger than a word. */
10783 /* XXX Might this clobber base? I can't see how it can,
10784 since scratch is known to overlap with OUTVAL, and
10785 must be wider than a word. */
10786 emit_insn (gen_movhi (scratch_hi, outval));
10787 outval = scratch_hi;
10791 emit_set_insn (base_plus, base);
10794 else if (GET_CODE (base) == PLUS)
10796 /* The addend must be CONST_INT, or we would have dealt with it above. */
10797 HOST_WIDE_INT hi, lo;
10799 offset += INTVAL (XEXP (base, 1));
10800 base = XEXP (base, 0);
10802 /* Rework the address into a legal sequence of insns. */
10803 /* Valid range for lo is -4095 -> 4095 */
10806 : -((-offset) & 0xfff));
10808 /* Corner case, if lo is the max offset then we would be out of range
10809 once we have added the additional 1 below, so bump the msb into the
10810 pre-loading insn(s). */
10814 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10815 ^ (HOST_WIDE_INT) 0x80000000)
10816 - (HOST_WIDE_INT) 0x80000000);
10818 gcc_assert (hi + lo == offset);
10822 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10824 /* Be careful not to destroy OUTVAL. */
10825 if (reg_overlap_mentioned_p (base_plus, outval))
10827 /* Updating base_plus might destroy outval, see if we
10828 can swap the scratch and base_plus. */
10829 if (!reg_overlap_mentioned_p (scratch, outval))
10832 scratch = base_plus;
10837 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10839 /* Be conservative and copy outval into scratch now,
10840 this should only be necessary if outval is a
10841 subreg of something larger than a word. */
10842 /* XXX Might this clobber base? I can't see how it
10843 can, since scratch is known to overlap with
10845 emit_insn (gen_movhi (scratch_hi, outval));
10846 outval = scratch_hi;
10850 /* Get the base address; addsi3 knows how to handle constants
10851 that require more than one insn. */
10852 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10858 if (BYTES_BIG_ENDIAN)
10860 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10861 plus_constant (base, offset + 1)),
10862 gen_lowpart (QImode, outval)));
10863 emit_insn (gen_lshrsi3 (scratch,
10864 gen_rtx_SUBREG (SImode, outval, 0),
10866 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10867 gen_lowpart (QImode, scratch)));
10871 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10872 gen_lowpart (QImode, outval)));
10873 emit_insn (gen_lshrsi3 (scratch,
10874 gen_rtx_SUBREG (SImode, outval, 0),
10876 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10877 plus_constant (base, offset + 1)),
10878 gen_lowpart (QImode, scratch)));
10882 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10883 (padded to the size of a word) should be passed in a register. */
10886 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10888 if (TARGET_AAPCS_BASED)
10889 return must_pass_in_stack_var_size (mode, type);
10891 return must_pass_in_stack_var_size_or_pad (mode, type);
10895 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10896 Return true if an argument passed on the stack should be padded upwards,
10897 i.e. if the least-significant byte has useful data.
10898 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10899 aggregate types are placed in the lowest memory address. */
10902 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10904 if (!TARGET_AAPCS_BASED)
10905 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10907 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10914 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10915 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10916 byte of the register has useful data, and return the opposite if the
10917 most significant byte does.
10918 For AAPCS, small aggregates and small complex types are always padded
10922 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10923 tree type, int first ATTRIBUTE_UNUSED)
10925 if (TARGET_AAPCS_BASED
10926 && BYTES_BIG_ENDIAN
10927 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10928 && int_size_in_bytes (type) <= 4)
10931 /* Otherwise, use default padding. */
10932 return !BYTES_BIG_ENDIAN;
10936 /* Print a symbolic form of X to the debug file, F. */
10938 arm_print_value (FILE *f, rtx x)
10940 switch (GET_CODE (x))
10943 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10947 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10955 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10957 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10958 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10966 fprintf (f, "\"%s\"", XSTR (x, 0));
10970 fprintf (f, "`%s'", XSTR (x, 0));
10974 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10978 arm_print_value (f, XEXP (x, 0));
10982 arm_print_value (f, XEXP (x, 0));
10984 arm_print_value (f, XEXP (x, 1));
10992 fprintf (f, "????");
10997 /* Routines for manipulation of the constant pool. */
10999 /* Arm instructions cannot load a large constant directly into a
11000 register; they have to come from a pc relative load. The constant
11001 must therefore be placed in the addressable range of the pc
11002 relative load. Depending on the precise pc relative load
11003 instruction the range is somewhere between 256 bytes and 4k. This
11004 means that we often have to dump a constant inside a function, and
11005 generate code to branch around it.
11007 It is important to minimize this, since the branches will slow
11008 things down and make the code larger.
11010 Normally we can hide the table after an existing unconditional
11011 branch so that there is no interruption of the flow, but in the
11012 worst case the code looks like this:
11030 We fix this by performing a scan after scheduling, which notices
11031 which instructions need to have their operands fetched from the
11032 constant table and builds the table.
11034 The algorithm starts by building a table of all the constants that
11035 need fixing up and all the natural barriers in the function (places
11036 where a constant table can be dropped without breaking the flow).
11037 For each fixup we note how far the pc-relative replacement will be
11038 able to reach and the offset of the instruction into the function.
11040 Having built the table we then group the fixes together to form
11041 tables that are as large as possible (subject to addressing
11042 constraints) and emit each table of constants after the last
11043 barrier that is within range of all the instructions in the group.
11044 If a group does not contain a barrier, then we forcibly create one
11045 by inserting a jump instruction into the flow. Once the table has
11046 been inserted, the insns are then modified to reference the
11047 relevant entry in the pool.
11049 Possible enhancements to the algorithm (not implemented) are:
11051 1) For some processors and object formats, there may be benefit in
11052 aligning the pools to the start of cache lines; this alignment
11053 would need to be taken into account when calculating addressability
11056 /* These typedefs are located at the start of this file, so that
11057 they can be used in the prototypes there. This comment is to
11058 remind readers of that fact so that the following structures
11059 can be understood more easily.
11061 typedef struct minipool_node Mnode;
11062 typedef struct minipool_fixup Mfix; */
11064 struct minipool_node
11066 /* Doubly linked chain of entries. */
11069 /* The maximum offset into the code that this entry can be placed. While
11070 pushing fixes for forward references, all entries are sorted in order
11071 of increasing max_address. */
11072 HOST_WIDE_INT max_address;
11073 /* Similarly for an entry inserted for a backwards ref. */
11074 HOST_WIDE_INT min_address;
11075 /* The number of fixes referencing this entry. This can become zero
11076 if we "unpush" an entry. In this case we ignore the entry when we
11077 come to emit the code. */
11079 /* The offset from the start of the minipool. */
11080 HOST_WIDE_INT offset;
11081 /* The value in table. */
11083 /* The mode of value. */
11084 enum machine_mode mode;
11085 /* The size of the value. With iWMMXt enabled
11086 sizes > 4 also imply an alignment of 8-bytes. */
11090 struct minipool_fixup
11094 HOST_WIDE_INT address;
11096 enum machine_mode mode;
11100 HOST_WIDE_INT forwards;
11101 HOST_WIDE_INT backwards;
11104 /* Fixes less than a word need padding out to a word boundary. */
11105 #define MINIPOOL_FIX_SIZE(mode) \
11106 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11108 static Mnode * minipool_vector_head;
11109 static Mnode * minipool_vector_tail;
11110 static rtx minipool_vector_label;
11111 static int minipool_pad;
11113 /* The linked list of all minipool fixes required for this function. */
11114 Mfix * minipool_fix_head;
11115 Mfix * minipool_fix_tail;
11116 /* The fix entry for the current minipool, once it has been placed. */
11117 Mfix * minipool_barrier;
11119 /* Determines if INSN is the start of a jump table. Returns the end
11120 of the TABLE or NULL_RTX. */
11122 is_jump_table (rtx insn)
11126 if (GET_CODE (insn) == JUMP_INSN
11127 && JUMP_LABEL (insn) != NULL
11128 && ((table = next_real_insn (JUMP_LABEL (insn)))
11129 == next_real_insn (insn))
11131 && GET_CODE (table) == JUMP_INSN
11132 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11133 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11139 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11140 #define JUMP_TABLES_IN_TEXT_SECTION 0
11143 static HOST_WIDE_INT
11144 get_jump_table_size (rtx insn)
11146 /* ADDR_VECs only take room if read-only data does into the text
11148 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11150 rtx body = PATTERN (insn);
11151 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11152 HOST_WIDE_INT size;
11153 HOST_WIDE_INT modesize;
11155 modesize = GET_MODE_SIZE (GET_MODE (body));
11156 size = modesize * XVECLEN (body, elt);
11160 /* Round up size of TBB table to a halfword boundary. */
11161 size = (size + 1) & ~(HOST_WIDE_INT)1;
11164 /* No padding necessary for TBH. */
11167 /* Add two bytes for alignment on Thumb. */
11172 gcc_unreachable ();
11180 /* Move a minipool fix MP from its current location to before MAX_MP.
11181 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11182 constraints may need updating. */
11184 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11185 HOST_WIDE_INT max_address)
11187 /* The code below assumes these are different. */
11188 gcc_assert (mp != max_mp);
11190 if (max_mp == NULL)
11192 if (max_address < mp->max_address)
11193 mp->max_address = max_address;
11197 if (max_address > max_mp->max_address - mp->fix_size)
11198 mp->max_address = max_mp->max_address - mp->fix_size;
11200 mp->max_address = max_address;
11202 /* Unlink MP from its current position. Since max_mp is non-null,
11203 mp->prev must be non-null. */
11204 mp->prev->next = mp->next;
11205 if (mp->next != NULL)
11206 mp->next->prev = mp->prev;
11208 minipool_vector_tail = mp->prev;
11210 /* Re-insert it before MAX_MP. */
11212 mp->prev = max_mp->prev;
11215 if (mp->prev != NULL)
11216 mp->prev->next = mp;
11218 minipool_vector_head = mp;
11221 /* Save the new entry. */
11224 /* Scan over the preceding entries and adjust their addresses as
11226 while (mp->prev != NULL
11227 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11229 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11236 /* Add a constant to the minipool for a forward reference. Returns the
11237 node added or NULL if the constant will not fit in this pool. */
11239 add_minipool_forward_ref (Mfix *fix)
11241 /* If set, max_mp is the first pool_entry that has a lower
11242 constraint than the one we are trying to add. */
11243 Mnode * max_mp = NULL;
11244 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11247 /* If the minipool starts before the end of FIX->INSN then this FIX
11248 can not be placed into the current pool. Furthermore, adding the
11249 new constant pool entry may cause the pool to start FIX_SIZE bytes
11251 if (minipool_vector_head &&
11252 (fix->address + get_attr_length (fix->insn)
11253 >= minipool_vector_head->max_address - fix->fix_size))
11256 /* Scan the pool to see if a constant with the same value has
11257 already been added. While we are doing this, also note the
11258 location where we must insert the constant if it doesn't already
11260 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11262 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11263 && fix->mode == mp->mode
11264 && (GET_CODE (fix->value) != CODE_LABEL
11265 || (CODE_LABEL_NUMBER (fix->value)
11266 == CODE_LABEL_NUMBER (mp->value)))
11267 && rtx_equal_p (fix->value, mp->value))
11269 /* More than one fix references this entry. */
11271 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11274 /* Note the insertion point if necessary. */
11276 && mp->max_address > max_address)
11279 /* If we are inserting an 8-bytes aligned quantity and
11280 we have not already found an insertion point, then
11281 make sure that all such 8-byte aligned quantities are
11282 placed at the start of the pool. */
11283 if (ARM_DOUBLEWORD_ALIGN
11285 && fix->fix_size >= 8
11286 && mp->fix_size < 8)
11289 max_address = mp->max_address;
11293 /* The value is not currently in the minipool, so we need to create
11294 a new entry for it. If MAX_MP is NULL, the entry will be put on
11295 the end of the list since the placement is less constrained than
11296 any existing entry. Otherwise, we insert the new fix before
11297 MAX_MP and, if necessary, adjust the constraints on the other
11300 mp->fix_size = fix->fix_size;
11301 mp->mode = fix->mode;
11302 mp->value = fix->value;
11304 /* Not yet required for a backwards ref. */
11305 mp->min_address = -65536;
11307 if (max_mp == NULL)
11309 mp->max_address = max_address;
11311 mp->prev = minipool_vector_tail;
11313 if (mp->prev == NULL)
11315 minipool_vector_head = mp;
11316 minipool_vector_label = gen_label_rtx ();
11319 mp->prev->next = mp;
11321 minipool_vector_tail = mp;
11325 if (max_address > max_mp->max_address - mp->fix_size)
11326 mp->max_address = max_mp->max_address - mp->fix_size;
11328 mp->max_address = max_address;
11331 mp->prev = max_mp->prev;
11333 if (mp->prev != NULL)
11334 mp->prev->next = mp;
11336 minipool_vector_head = mp;
11339 /* Save the new entry. */
11342 /* Scan over the preceding entries and adjust their addresses as
11344 while (mp->prev != NULL
11345 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11347 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11355 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11356 HOST_WIDE_INT min_address)
11358 HOST_WIDE_INT offset;
11360 /* The code below assumes these are different. */
11361 gcc_assert (mp != min_mp);
11363 if (min_mp == NULL)
11365 if (min_address > mp->min_address)
11366 mp->min_address = min_address;
11370 /* We will adjust this below if it is too loose. */
11371 mp->min_address = min_address;
11373 /* Unlink MP from its current position. Since min_mp is non-null,
11374 mp->next must be non-null. */
11375 mp->next->prev = mp->prev;
11376 if (mp->prev != NULL)
11377 mp->prev->next = mp->next;
11379 minipool_vector_head = mp->next;
11381 /* Reinsert it after MIN_MP. */
11383 mp->next = min_mp->next;
11385 if (mp->next != NULL)
11386 mp->next->prev = mp;
11388 minipool_vector_tail = mp;
11394 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11396 mp->offset = offset;
11397 if (mp->refcount > 0)
11398 offset += mp->fix_size;
11400 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11401 mp->next->min_address = mp->min_address + mp->fix_size;
11407 /* Add a constant to the minipool for a backward reference. Returns the
11408 node added or NULL if the constant will not fit in this pool.
11410 Note that the code for insertion for a backwards reference can be
11411 somewhat confusing because the calculated offsets for each fix do
11412 not take into account the size of the pool (which is still under
11415 add_minipool_backward_ref (Mfix *fix)
11417 /* If set, min_mp is the last pool_entry that has a lower constraint
11418 than the one we are trying to add. */
11419 Mnode *min_mp = NULL;
11420 /* This can be negative, since it is only a constraint. */
11421 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11424 /* If we can't reach the current pool from this insn, or if we can't
11425 insert this entry at the end of the pool without pushing other
11426 fixes out of range, then we don't try. This ensures that we
11427 can't fail later on. */
11428 if (min_address >= minipool_barrier->address
11429 || (minipool_vector_tail->min_address + fix->fix_size
11430 >= minipool_barrier->address))
11433 /* Scan the pool to see if a constant with the same value has
11434 already been added. While we are doing this, also note the
11435 location where we must insert the constant if it doesn't already
11437 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11439 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11440 && fix->mode == mp->mode
11441 && (GET_CODE (fix->value) != CODE_LABEL
11442 || (CODE_LABEL_NUMBER (fix->value)
11443 == CODE_LABEL_NUMBER (mp->value)))
11444 && rtx_equal_p (fix->value, mp->value)
11445 /* Check that there is enough slack to move this entry to the
11446 end of the table (this is conservative). */
11447 && (mp->max_address
11448 > (minipool_barrier->address
11449 + minipool_vector_tail->offset
11450 + minipool_vector_tail->fix_size)))
11453 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11456 if (min_mp != NULL)
11457 mp->min_address += fix->fix_size;
11460 /* Note the insertion point if necessary. */
11461 if (mp->min_address < min_address)
11463 /* For now, we do not allow the insertion of 8-byte alignment
11464 requiring nodes anywhere but at the start of the pool. */
11465 if (ARM_DOUBLEWORD_ALIGN
11466 && fix->fix_size >= 8 && mp->fix_size < 8)
11471 else if (mp->max_address
11472 < minipool_barrier->address + mp->offset + fix->fix_size)
11474 /* Inserting before this entry would push the fix beyond
11475 its maximum address (which can happen if we have
11476 re-located a forwards fix); force the new fix to come
11478 if (ARM_DOUBLEWORD_ALIGN
11479 && fix->fix_size >= 8 && mp->fix_size < 8)
11484 min_address = mp->min_address + fix->fix_size;
11487 /* Do not insert a non-8-byte aligned quantity before 8-byte
11488 aligned quantities. */
11489 else if (ARM_DOUBLEWORD_ALIGN
11490 && fix->fix_size < 8
11491 && mp->fix_size >= 8)
11494 min_address = mp->min_address + fix->fix_size;
11499 /* We need to create a new entry. */
11501 mp->fix_size = fix->fix_size;
11502 mp->mode = fix->mode;
11503 mp->value = fix->value;
11505 mp->max_address = minipool_barrier->address + 65536;
11507 mp->min_address = min_address;
11509 if (min_mp == NULL)
11512 mp->next = minipool_vector_head;
11514 if (mp->next == NULL)
11516 minipool_vector_tail = mp;
11517 minipool_vector_label = gen_label_rtx ();
11520 mp->next->prev = mp;
11522 minipool_vector_head = mp;
11526 mp->next = min_mp->next;
11530 if (mp->next != NULL)
11531 mp->next->prev = mp;
11533 minipool_vector_tail = mp;
11536 /* Save the new entry. */
11544 /* Scan over the following entries and adjust their offsets. */
11545 while (mp->next != NULL)
11547 if (mp->next->min_address < mp->min_address + mp->fix_size)
11548 mp->next->min_address = mp->min_address + mp->fix_size;
11551 mp->next->offset = mp->offset + mp->fix_size;
11553 mp->next->offset = mp->offset;
11562 assign_minipool_offsets (Mfix *barrier)
11564 HOST_WIDE_INT offset = 0;
11567 minipool_barrier = barrier;
11569 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11571 mp->offset = offset;
11573 if (mp->refcount > 0)
11574 offset += mp->fix_size;
11578 /* Output the literal table */
11580 dump_minipool (rtx scan)
11586 if (ARM_DOUBLEWORD_ALIGN)
11587 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11588 if (mp->refcount > 0 && mp->fix_size >= 8)
11595 fprintf (dump_file,
11596 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11597 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11599 scan = emit_label_after (gen_label_rtx (), scan);
11600 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11601 scan = emit_label_after (minipool_vector_label, scan);
11603 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11605 if (mp->refcount > 0)
11609 fprintf (dump_file,
11610 ";; Offset %u, min %ld, max %ld ",
11611 (unsigned) mp->offset, (unsigned long) mp->min_address,
11612 (unsigned long) mp->max_address);
11613 arm_print_value (dump_file, mp->value);
11614 fputc ('\n', dump_file);
11617 switch (mp->fix_size)
11619 #ifdef HAVE_consttable_1
11621 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11625 #ifdef HAVE_consttable_2
11627 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11631 #ifdef HAVE_consttable_4
11633 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11637 #ifdef HAVE_consttable_8
11639 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11643 #ifdef HAVE_consttable_16
11645 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11650 gcc_unreachable ();
11658 minipool_vector_head = minipool_vector_tail = NULL;
11659 scan = emit_insn_after (gen_consttable_end (), scan);
11660 scan = emit_barrier_after (scan);
11663 /* Return the cost of forcibly inserting a barrier after INSN. */
11665 arm_barrier_cost (rtx insn)
11667 /* Basing the location of the pool on the loop depth is preferable,
11668 but at the moment, the basic block information seems to be
11669 corrupt by this stage of the compilation. */
11670 int base_cost = 50;
11671 rtx next = next_nonnote_insn (insn);
11673 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11676 switch (GET_CODE (insn))
11679 /* It will always be better to place the table before the label, rather
11688 return base_cost - 10;
11691 return base_cost + 10;
11695 /* Find the best place in the insn stream in the range
11696 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11697 Create the barrier by inserting a jump and add a new fix entry for
11700 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11702 HOST_WIDE_INT count = 0;
11704 rtx from = fix->insn;
11705 /* The instruction after which we will insert the jump. */
11706 rtx selected = NULL;
11708 /* The address at which the jump instruction will be placed. */
11709 HOST_WIDE_INT selected_address;
11711 HOST_WIDE_INT max_count = max_address - fix->address;
11712 rtx label = gen_label_rtx ();
11714 selected_cost = arm_barrier_cost (from);
11715 selected_address = fix->address;
11717 while (from && count < max_count)
11722 /* This code shouldn't have been called if there was a natural barrier
11724 gcc_assert (GET_CODE (from) != BARRIER);
11726 /* Count the length of this insn. */
11727 count += get_attr_length (from);
11729 /* If there is a jump table, add its length. */
11730 tmp = is_jump_table (from);
11733 count += get_jump_table_size (tmp);
11735 /* Jump tables aren't in a basic block, so base the cost on
11736 the dispatch insn. If we select this location, we will
11737 still put the pool after the table. */
11738 new_cost = arm_barrier_cost (from);
11740 if (count < max_count
11741 && (!selected || new_cost <= selected_cost))
11744 selected_cost = new_cost;
11745 selected_address = fix->address + count;
11748 /* Continue after the dispatch table. */
11749 from = NEXT_INSN (tmp);
11753 new_cost = arm_barrier_cost (from);
11755 if (count < max_count
11756 && (!selected || new_cost <= selected_cost))
11759 selected_cost = new_cost;
11760 selected_address = fix->address + count;
11763 from = NEXT_INSN (from);
11766 /* Make sure that we found a place to insert the jump. */
11767 gcc_assert (selected);
11769 /* Create a new JUMP_INSN that branches around a barrier. */
11770 from = emit_jump_insn_after (gen_jump (label), selected);
11771 JUMP_LABEL (from) = label;
11772 barrier = emit_barrier_after (from);
11773 emit_label_after (label, barrier);
11775 /* Create a minipool barrier entry for the new barrier. */
11776 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11777 new_fix->insn = barrier;
11778 new_fix->address = selected_address;
11779 new_fix->next = fix->next;
11780 fix->next = new_fix;
11785 /* Record that there is a natural barrier in the insn stream at
11788 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11790 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11793 fix->address = address;
11796 if (minipool_fix_head != NULL)
11797 minipool_fix_tail->next = fix;
11799 minipool_fix_head = fix;
11801 minipool_fix_tail = fix;
11804 /* Record INSN, which will need fixing up to load a value from the
11805 minipool. ADDRESS is the offset of the insn since the start of the
11806 function; LOC is a pointer to the part of the insn which requires
11807 fixing; VALUE is the constant that must be loaded, which is of type
11810 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11811 enum machine_mode mode, rtx value)
11813 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11816 fix->address = address;
11819 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11820 fix->value = value;
11821 fix->forwards = get_attr_pool_range (insn);
11822 fix->backwards = get_attr_neg_pool_range (insn);
11823 fix->minipool = NULL;
11825 /* If an insn doesn't have a range defined for it, then it isn't
11826 expecting to be reworked by this code. Better to stop now than
11827 to generate duff assembly code. */
11828 gcc_assert (fix->forwards || fix->backwards);
11830 /* If an entry requires 8-byte alignment then assume all constant pools
11831 require 4 bytes of padding. Trying to do this later on a per-pool
11832 basis is awkward because existing pool entries have to be modified. */
11833 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11838 fprintf (dump_file,
11839 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11840 GET_MODE_NAME (mode),
11841 INSN_UID (insn), (unsigned long) address,
11842 -1 * (long)fix->backwards, (long)fix->forwards);
11843 arm_print_value (dump_file, fix->value);
11844 fprintf (dump_file, "\n");
11847 /* Add it to the chain of fixes. */
11850 if (minipool_fix_head != NULL)
11851 minipool_fix_tail->next = fix;
11853 minipool_fix_head = fix;
11855 minipool_fix_tail = fix;
11858 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11859 Returns the number of insns needed, or 99 if we don't know how to
11862 arm_const_double_inline_cost (rtx val)
11864 rtx lowpart, highpart;
11865 enum machine_mode mode;
11867 mode = GET_MODE (val);
11869 if (mode == VOIDmode)
11872 gcc_assert (GET_MODE_SIZE (mode) == 8);
11874 lowpart = gen_lowpart (SImode, val);
11875 highpart = gen_highpart_mode (SImode, mode, val);
11877 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11878 gcc_assert (GET_CODE (highpart) == CONST_INT);
11880 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11881 NULL_RTX, NULL_RTX, 0, 0)
11882 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11883 NULL_RTX, NULL_RTX, 0, 0));
11886 /* Return true if it is worthwhile to split a 64-bit constant into two
11887 32-bit operations. This is the case if optimizing for size, or
11888 if we have load delay slots, or if one 32-bit part can be done with
11889 a single data operation. */
11891 arm_const_double_by_parts (rtx val)
11893 enum machine_mode mode = GET_MODE (val);
11896 if (optimize_size || arm_ld_sched)
11899 if (mode == VOIDmode)
11902 part = gen_highpart_mode (SImode, mode, val);
11904 gcc_assert (GET_CODE (part) == CONST_INT);
11906 if (const_ok_for_arm (INTVAL (part))
11907 || const_ok_for_arm (~INTVAL (part)))
11910 part = gen_lowpart (SImode, val);
11912 gcc_assert (GET_CODE (part) == CONST_INT);
11914 if (const_ok_for_arm (INTVAL (part))
11915 || const_ok_for_arm (~INTVAL (part)))
11921 /* Return true if it is possible to inline both the high and low parts
11922 of a 64-bit constant into 32-bit data processing instructions. */
11924 arm_const_double_by_immediates (rtx val)
11926 enum machine_mode mode = GET_MODE (val);
11929 if (mode == VOIDmode)
11932 part = gen_highpart_mode (SImode, mode, val);
11934 gcc_assert (GET_CODE (part) == CONST_INT);
11936 if (!const_ok_for_arm (INTVAL (part)))
11939 part = gen_lowpart (SImode, val);
11941 gcc_assert (GET_CODE (part) == CONST_INT);
11943 if (!const_ok_for_arm (INTVAL (part)))
11949 /* Scan INSN and note any of its operands that need fixing.
11950 If DO_PUSHES is false we do not actually push any of the fixups
11951 needed. The function returns TRUE if any fixups were needed/pushed.
11952 This is used by arm_memory_load_p() which needs to know about loads
11953 of constants that will be converted into minipool loads. */
11955 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11957 bool result = false;
11960 extract_insn (insn);
11962 if (!constrain_operands (1))
11963 fatal_insn_not_found (insn);
11965 if (recog_data.n_alternatives == 0)
11968 /* Fill in recog_op_alt with information about the constraints of
11970 preprocess_constraints ();
11972 for (opno = 0; opno < recog_data.n_operands; opno++)
11974 /* Things we need to fix can only occur in inputs. */
11975 if (recog_data.operand_type[opno] != OP_IN)
11978 /* If this alternative is a memory reference, then any mention
11979 of constants in this alternative is really to fool reload
11980 into allowing us to accept one there. We need to fix them up
11981 now so that we output the right code. */
11982 if (recog_op_alt[opno][which_alternative].memory_ok)
11984 rtx op = recog_data.operand[opno];
11986 if (CONSTANT_P (op))
11989 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11990 recog_data.operand_mode[opno], op);
11993 else if (GET_CODE (op) == MEM
11994 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11995 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11999 rtx cop = avoid_constant_pool_reference (op);
12001 /* Casting the address of something to a mode narrower
12002 than a word can cause avoid_constant_pool_reference()
12003 to return the pool reference itself. That's no good to
12004 us here. Lets just hope that we can use the
12005 constant pool value directly. */
12007 cop = get_pool_constant (XEXP (op, 0));
12009 push_minipool_fix (insn, address,
12010 recog_data.operand_loc[opno],
12011 recog_data.operand_mode[opno], cop);
12022 /* Convert instructions to their cc-clobbering variant if possible, since
12023 that allows us to use smaller encodings. */
12026 thumb2_reorg (void)
12031 INIT_REG_SET (&live);
12033 /* We are freeing block_for_insn in the toplev to keep compatibility
12034 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12035 compute_bb_for_insn ();
12041 COPY_REG_SET (&live, DF_LR_OUT (bb));
12042 df_simulate_initialize_backwards (bb, &live);
12043 FOR_BB_INSNS_REVERSE (bb, insn)
12045 if (NONJUMP_INSN_P (insn)
12046 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12048 rtx pat = PATTERN (insn);
12049 if (GET_CODE (pat) == SET
12050 && low_register_operand (XEXP (pat, 0), SImode)
12051 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12052 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12053 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12055 rtx dst = XEXP (pat, 0);
12056 rtx src = XEXP (pat, 1);
12057 rtx op0 = XEXP (src, 0);
12058 if (rtx_equal_p (dst, op0)
12059 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12061 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12062 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12063 rtvec vec = gen_rtvec (2, pat, clobber);
12064 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12065 INSN_CODE (insn) = -1;
12069 if (NONDEBUG_INSN_P (insn))
12070 df_simulate_one_insn_backwards (bb, insn, &live);
12073 CLEAR_REG_SET (&live);
12076 /* Gcc puts the pool in the wrong place for ARM, since we can only
12077 load addresses a limited distance around the pc. We do some
12078 special munging to move the constant pool values to the correct
12079 point in the code. */
12084 HOST_WIDE_INT address = 0;
12090 minipool_fix_head = minipool_fix_tail = NULL;
12092 /* The first insn must always be a note, or the code below won't
12093 scan it properly. */
12094 insn = get_insns ();
12095 gcc_assert (GET_CODE (insn) == NOTE);
12098 /* Scan all the insns and record the operands that will need fixing. */
12099 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12101 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12102 && (arm_cirrus_insn_p (insn)
12103 || GET_CODE (insn) == JUMP_INSN
12104 || arm_memory_load_p (insn)))
12105 cirrus_reorg (insn);
12107 if (GET_CODE (insn) == BARRIER)
12108 push_minipool_barrier (insn, address);
12109 else if (INSN_P (insn))
12113 note_invalid_constants (insn, address, true);
12114 address += get_attr_length (insn);
12116 /* If the insn is a vector jump, add the size of the table
12117 and skip the table. */
12118 if ((table = is_jump_table (insn)) != NULL)
12120 address += get_jump_table_size (table);
12126 fix = minipool_fix_head;
12128 /* Now scan the fixups and perform the required changes. */
12133 Mfix * last_added_fix;
12134 Mfix * last_barrier = NULL;
12137 /* Skip any further barriers before the next fix. */
12138 while (fix && GET_CODE (fix->insn) == BARRIER)
12141 /* No more fixes. */
12145 last_added_fix = NULL;
12147 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12149 if (GET_CODE (ftmp->insn) == BARRIER)
12151 if (ftmp->address >= minipool_vector_head->max_address)
12154 last_barrier = ftmp;
12156 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12159 last_added_fix = ftmp; /* Keep track of the last fix added. */
12162 /* If we found a barrier, drop back to that; any fixes that we
12163 could have reached but come after the barrier will now go in
12164 the next mini-pool. */
12165 if (last_barrier != NULL)
12167 /* Reduce the refcount for those fixes that won't go into this
12169 for (fdel = last_barrier->next;
12170 fdel && fdel != ftmp;
12173 fdel->minipool->refcount--;
12174 fdel->minipool = NULL;
12177 ftmp = last_barrier;
12181 /* ftmp is first fix that we can't fit into this pool and
12182 there no natural barriers that we could use. Insert a
12183 new barrier in the code somewhere between the previous
12184 fix and this one, and arrange to jump around it. */
12185 HOST_WIDE_INT max_address;
12187 /* The last item on the list of fixes must be a barrier, so
12188 we can never run off the end of the list of fixes without
12189 last_barrier being set. */
12192 max_address = minipool_vector_head->max_address;
12193 /* Check that there isn't another fix that is in range that
12194 we couldn't fit into this pool because the pool was
12195 already too large: we need to put the pool before such an
12196 instruction. The pool itself may come just after the
12197 fix because create_fix_barrier also allows space for a
12198 jump instruction. */
12199 if (ftmp->address < max_address)
12200 max_address = ftmp->address + 1;
12202 last_barrier = create_fix_barrier (last_added_fix, max_address);
12205 assign_minipool_offsets (last_barrier);
12209 if (GET_CODE (ftmp->insn) != BARRIER
12210 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12217 /* Scan over the fixes we have identified for this pool, fixing them
12218 up and adding the constants to the pool itself. */
12219 for (this_fix = fix; this_fix && ftmp != this_fix;
12220 this_fix = this_fix->next)
12221 if (GET_CODE (this_fix->insn) != BARRIER)
12224 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12225 minipool_vector_label),
12226 this_fix->minipool->offset);
12227 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12230 dump_minipool (last_barrier->insn);
12234 /* From now on we must synthesize any constants that we can't handle
12235 directly. This can happen if the RTL gets split during final
12236 instruction generation. */
12237 after_arm_reorg = 1;
12239 /* Free the minipool memory. */
12240 obstack_free (&minipool_obstack, minipool_startobj);
12243 /* Routines to output assembly language. */
12245 /* If the rtx is the correct value then return the string of the number.
12246 In this way we can ensure that valid double constants are generated even
12247 when cross compiling. */
12249 fp_immediate_constant (rtx x)
12254 if (!fp_consts_inited)
12257 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12258 for (i = 0; i < 8; i++)
12259 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12260 return strings_fp[i];
12262 gcc_unreachable ();
12265 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12266 static const char *
12267 fp_const_from_val (REAL_VALUE_TYPE *r)
12271 if (!fp_consts_inited)
12274 for (i = 0; i < 8; i++)
12275 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12276 return strings_fp[i];
12278 gcc_unreachable ();
12281 /* Output the operands of a LDM/STM instruction to STREAM.
12282 MASK is the ARM register set mask of which only bits 0-15 are important.
12283 REG is the base register, either the frame pointer or the stack pointer,
12284 INSTR is the possibly suffixed load or store instruction.
12285 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12288 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12289 unsigned long mask, int rfe)
12292 bool not_first = FALSE;
12294 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12295 fputc ('\t', stream);
12296 asm_fprintf (stream, instr, reg);
12297 fputc ('{', stream);
12299 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12300 if (mask & (1 << i))
12303 fprintf (stream, ", ");
12305 asm_fprintf (stream, "%r", i);
12310 fprintf (stream, "}^\n");
12312 fprintf (stream, "}\n");
12316 /* Output a FLDMD instruction to STREAM.
12317 BASE if the register containing the address.
12318 REG and COUNT specify the register range.
12319 Extra registers may be added to avoid hardware bugs.
12321 We output FLDMD even for ARMv5 VFP implementations. Although
12322 FLDMD is technically not supported until ARMv6, it is believed
12323 that all VFP implementations support its use in this context. */
12326 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12330 /* Workaround ARM10 VFPr1 bug. */
12331 if (count == 2 && !arm_arch6)
12338 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12339 load into multiple parts if we have to handle more than 16 registers. */
12342 vfp_output_fldmd (stream, base, reg, 16);
12343 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12347 fputc ('\t', stream);
12348 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12350 for (i = reg; i < reg + count; i++)
12353 fputs (", ", stream);
12354 asm_fprintf (stream, "d%d", i);
12356 fputs ("}\n", stream);
12361 /* Output the assembly for a store multiple. */
12364 vfp_output_fstmd (rtx * operands)
12371 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12372 p = strlen (pattern);
12374 gcc_assert (GET_CODE (operands[1]) == REG);
12376 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12377 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12379 p += sprintf (&pattern[p], ", d%d", base + i);
12381 strcpy (&pattern[p], "}");
12383 output_asm_insn (pattern, operands);
12388 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12389 number of bytes pushed. */
12392 vfp_emit_fstmd (int base_reg, int count)
12399 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12400 register pairs are stored by a store multiple insn. We avoid this
12401 by pushing an extra pair. */
12402 if (count == 2 && !arm_arch6)
12404 if (base_reg == LAST_VFP_REGNUM - 3)
12409 /* FSTMD may not store more than 16 doubleword registers at once. Split
12410 larger stores into multiple parts (up to a maximum of two, in
12415 /* NOTE: base_reg is an internal register number, so each D register
12417 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12418 saved += vfp_emit_fstmd (base_reg, 16);
12422 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12423 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12425 reg = gen_rtx_REG (DFmode, base_reg);
12428 XVECEXP (par, 0, 0)
12429 = gen_rtx_SET (VOIDmode,
12432 gen_rtx_PRE_MODIFY (Pmode,
12435 (stack_pointer_rtx,
12438 gen_rtx_UNSPEC (BLKmode,
12439 gen_rtvec (1, reg),
12440 UNSPEC_PUSH_MULT));
12442 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12443 plus_constant (stack_pointer_rtx, -(count * 8)));
12444 RTX_FRAME_RELATED_P (tmp) = 1;
12445 XVECEXP (dwarf, 0, 0) = tmp;
12447 tmp = gen_rtx_SET (VOIDmode,
12448 gen_frame_mem (DFmode, stack_pointer_rtx),
12450 RTX_FRAME_RELATED_P (tmp) = 1;
12451 XVECEXP (dwarf, 0, 1) = tmp;
12453 for (i = 1; i < count; i++)
12455 reg = gen_rtx_REG (DFmode, base_reg);
12457 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12459 tmp = gen_rtx_SET (VOIDmode,
12460 gen_frame_mem (DFmode,
12461 plus_constant (stack_pointer_rtx,
12464 RTX_FRAME_RELATED_P (tmp) = 1;
12465 XVECEXP (dwarf, 0, i + 1) = tmp;
12468 par = emit_insn (par);
12469 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12470 RTX_FRAME_RELATED_P (par) = 1;
12475 /* Emit a call instruction with pattern PAT. ADDR is the address of
12476 the call target. */
12479 arm_emit_call_insn (rtx pat, rtx addr)
12483 insn = emit_call_insn (pat);
12485 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12486 If the call might use such an entry, add a use of the PIC register
12487 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12488 if (TARGET_VXWORKS_RTP
12490 && GET_CODE (addr) == SYMBOL_REF
12491 && (SYMBOL_REF_DECL (addr)
12492 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12493 : !SYMBOL_REF_LOCAL_P (addr)))
12495 require_pic_register ();
12496 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12500 /* Output a 'call' insn. */
12502 output_call (rtx *operands)
12504 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12506 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12507 if (REGNO (operands[0]) == LR_REGNUM)
12509 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12510 output_asm_insn ("mov%?\t%0, %|lr", operands);
12513 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12515 if (TARGET_INTERWORK || arm_arch4t)
12516 output_asm_insn ("bx%?\t%0", operands);
12518 output_asm_insn ("mov%?\t%|pc, %0", operands);
12523 /* Output a 'call' insn that is a reference in memory. This is
12524 disabled for ARMv5 and we prefer a blx instead because otherwise
12525 there's a significant performance overhead. */
12527 output_call_mem (rtx *operands)
12529 gcc_assert (!arm_arch5);
12530 if (TARGET_INTERWORK)
12532 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12533 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12534 output_asm_insn ("bx%?\t%|ip", operands);
12536 else if (regno_use_in (LR_REGNUM, operands[0]))
12538 /* LR is used in the memory address. We load the address in the
12539 first instruction. It's safe to use IP as the target of the
12540 load since the call will kill it anyway. */
12541 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12542 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12544 output_asm_insn ("bx%?\t%|ip", operands);
12546 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12550 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12551 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12558 /* Output a move from arm registers to an fpa registers.
12559 OPERANDS[0] is an fpa register.
12560 OPERANDS[1] is the first registers of an arm register pair. */
12562 output_mov_long_double_fpa_from_arm (rtx *operands)
12564 int arm_reg0 = REGNO (operands[1]);
12567 gcc_assert (arm_reg0 != IP_REGNUM);
12569 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12570 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12571 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12573 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12574 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12579 /* Output a move from an fpa register to arm registers.
12580 OPERANDS[0] is the first registers of an arm register pair.
12581 OPERANDS[1] is an fpa register. */
12583 output_mov_long_double_arm_from_fpa (rtx *operands)
12585 int arm_reg0 = REGNO (operands[0]);
12588 gcc_assert (arm_reg0 != IP_REGNUM);
12590 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12591 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12592 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12594 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12595 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12599 /* Output a move from arm registers to arm registers of a long double
12600 OPERANDS[0] is the destination.
12601 OPERANDS[1] is the source. */
12603 output_mov_long_double_arm_from_arm (rtx *operands)
12605 /* We have to be careful here because the two might overlap. */
12606 int dest_start = REGNO (operands[0]);
12607 int src_start = REGNO (operands[1]);
12611 if (dest_start < src_start)
12613 for (i = 0; i < 3; i++)
12615 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12616 ops[1] = gen_rtx_REG (SImode, src_start + i);
12617 output_asm_insn ("mov%?\t%0, %1", ops);
12622 for (i = 2; i >= 0; i--)
12624 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12625 ops[1] = gen_rtx_REG (SImode, src_start + i);
12626 output_asm_insn ("mov%?\t%0, %1", ops);
12634 arm_emit_movpair (rtx dest, rtx src)
12636 /* If the src is an immediate, simplify it. */
12637 if (CONST_INT_P (src))
12639 HOST_WIDE_INT val = INTVAL (src);
12640 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12641 if ((val >> 16) & 0x0000ffff)
12642 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12644 GEN_INT ((val >> 16) & 0x0000ffff));
12647 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12648 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12651 /* Output a move from arm registers to an fpa registers.
12652 OPERANDS[0] is an fpa register.
12653 OPERANDS[1] is the first registers of an arm register pair. */
12655 output_mov_double_fpa_from_arm (rtx *operands)
12657 int arm_reg0 = REGNO (operands[1]);
12660 gcc_assert (arm_reg0 != IP_REGNUM);
12662 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12663 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12664 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12665 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12669 /* Output a move from an fpa register to arm registers.
12670 OPERANDS[0] is the first registers of an arm register pair.
12671 OPERANDS[1] is an fpa register. */
12673 output_mov_double_arm_from_fpa (rtx *operands)
12675 int arm_reg0 = REGNO (operands[0]);
12678 gcc_assert (arm_reg0 != IP_REGNUM);
12680 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12681 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12682 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12683 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12687 /* Output a move between double words.
12688 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12689 or MEM<-REG and all MEMs must be offsettable addresses. */
12691 output_move_double (rtx *operands)
12693 enum rtx_code code0 = GET_CODE (operands[0]);
12694 enum rtx_code code1 = GET_CODE (operands[1]);
12699 unsigned int reg0 = REGNO (operands[0]);
12701 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12703 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12705 switch (GET_CODE (XEXP (operands[1], 0)))
12709 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12710 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12712 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12716 gcc_assert (TARGET_LDRD);
12717 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12722 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12724 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12729 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12731 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12735 gcc_assert (TARGET_LDRD);
12736 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12741 /* Autoicrement addressing modes should never have overlapping
12742 base and destination registers, and overlapping index registers
12743 are already prohibited, so this doesn't need to worry about
12745 otherops[0] = operands[0];
12746 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12747 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12749 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12751 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12753 /* Registers overlap so split out the increment. */
12754 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12755 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12759 /* Use a single insn if we can.
12760 FIXME: IWMMXT allows offsets larger than ldrd can
12761 handle, fix these up with a pair of ldr. */
12763 || GET_CODE (otherops[2]) != CONST_INT
12764 || (INTVAL (otherops[2]) > -256
12765 && INTVAL (otherops[2]) < 256))
12766 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12769 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12770 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12776 /* Use a single insn if we can.
12777 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12778 fix these up with a pair of ldr. */
12780 || GET_CODE (otherops[2]) != CONST_INT
12781 || (INTVAL (otherops[2]) > -256
12782 && INTVAL (otherops[2]) < 256))
12783 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12786 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12787 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12794 /* We might be able to use ldrd %0, %1 here. However the range is
12795 different to ldr/adr, and it is broken on some ARMv7-M
12796 implementations. */
12797 /* Use the second register of the pair to avoid problematic
12799 otherops[1] = operands[1];
12800 output_asm_insn ("adr%?\t%0, %1", otherops);
12801 operands[1] = otherops[0];
12803 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12805 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12808 /* ??? This needs checking for thumb2. */
12810 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12811 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12813 otherops[0] = operands[0];
12814 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12815 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12817 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12819 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12821 switch ((int) INTVAL (otherops[2]))
12824 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12829 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12834 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12838 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12839 operands[1] = otherops[0];
12841 && (GET_CODE (otherops[2]) == REG
12843 || (GET_CODE (otherops[2]) == CONST_INT
12844 && INTVAL (otherops[2]) > -256
12845 && INTVAL (otherops[2]) < 256)))
12847 if (reg_overlap_mentioned_p (operands[0],
12851 /* Swap base and index registers over to
12852 avoid a conflict. */
12854 otherops[1] = otherops[2];
12857 /* If both registers conflict, it will usually
12858 have been fixed by a splitter. */
12859 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12860 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12862 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12863 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12867 otherops[0] = operands[0];
12868 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12873 if (GET_CODE (otherops[2]) == CONST_INT)
12875 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12876 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12878 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12881 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12884 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12887 return "ldr%(d%)\t%0, [%1]";
12889 return "ldm%(ia%)\t%1, %M0";
12893 otherops[1] = adjust_address (operands[1], SImode, 4);
12894 /* Take care of overlapping base/data reg. */
12895 if (reg_mentioned_p (operands[0], operands[1]))
12897 output_asm_insn ("ldr%?\t%0, %1", otherops);
12898 output_asm_insn ("ldr%?\t%0, %1", operands);
12902 output_asm_insn ("ldr%?\t%0, %1", operands);
12903 output_asm_insn ("ldr%?\t%0, %1", otherops);
12910 /* Constraints should ensure this. */
12911 gcc_assert (code0 == MEM && code1 == REG);
12912 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12914 switch (GET_CODE (XEXP (operands[0], 0)))
12918 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12920 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12924 gcc_assert (TARGET_LDRD);
12925 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12930 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12932 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12937 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12939 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12943 gcc_assert (TARGET_LDRD);
12944 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12949 otherops[0] = operands[1];
12950 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12951 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12953 /* IWMMXT allows offsets larger than ldrd can handle,
12954 fix these up with a pair of ldr. */
12956 && GET_CODE (otherops[2]) == CONST_INT
12957 && (INTVAL(otherops[2]) <= -256
12958 || INTVAL(otherops[2]) >= 256))
12960 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12962 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
12963 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12967 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12968 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
12971 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12972 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12974 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12978 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12979 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12981 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12984 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12990 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12996 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13001 && (GET_CODE (otherops[2]) == REG
13003 || (GET_CODE (otherops[2]) == CONST_INT
13004 && INTVAL (otherops[2]) > -256
13005 && INTVAL (otherops[2]) < 256)))
13007 otherops[0] = operands[1];
13008 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13009 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13015 otherops[0] = adjust_address (operands[0], SImode, 4);
13016 otherops[1] = operands[1];
13017 output_asm_insn ("str%?\t%1, %0", operands);
13018 output_asm_insn ("str%?\t%H1, %0", otherops);
13025 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13026 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13029 output_move_quad (rtx *operands)
13031 if (REG_P (operands[0]))
13033 /* Load, or reg->reg move. */
13035 if (MEM_P (operands[1]))
13037 switch (GET_CODE (XEXP (operands[1], 0)))
13040 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13045 output_asm_insn ("adr%?\t%0, %1", operands);
13046 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13050 gcc_unreachable ();
13058 gcc_assert (REG_P (operands[1]));
13060 dest = REGNO (operands[0]);
13061 src = REGNO (operands[1]);
13063 /* This seems pretty dumb, but hopefully GCC won't try to do it
13066 for (i = 0; i < 4; i++)
13068 ops[0] = gen_rtx_REG (SImode, dest + i);
13069 ops[1] = gen_rtx_REG (SImode, src + i);
13070 output_asm_insn ("mov%?\t%0, %1", ops);
13073 for (i = 3; i >= 0; i--)
13075 ops[0] = gen_rtx_REG (SImode, dest + i);
13076 ops[1] = gen_rtx_REG (SImode, src + i);
13077 output_asm_insn ("mov%?\t%0, %1", ops);
13083 gcc_assert (MEM_P (operands[0]));
13084 gcc_assert (REG_P (operands[1]));
13085 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13087 switch (GET_CODE (XEXP (operands[0], 0)))
13090 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13094 gcc_unreachable ();
13101 /* Output a VFP load or store instruction. */
13104 output_move_vfp (rtx *operands)
13106 rtx reg, mem, addr, ops[2];
13107 int load = REG_P (operands[0]);
13108 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13109 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13112 enum machine_mode mode;
13114 reg = operands[!load];
13115 mem = operands[load];
13117 mode = GET_MODE (reg);
13119 gcc_assert (REG_P (reg));
13120 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13121 gcc_assert (mode == SFmode
13125 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13126 gcc_assert (MEM_P (mem));
13128 addr = XEXP (mem, 0);
13130 switch (GET_CODE (addr))
13133 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13134 ops[0] = XEXP (addr, 0);
13139 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13140 ops[0] = XEXP (addr, 0);
13145 templ = "f%s%c%%?\t%%%s0, %%1%s";
13151 sprintf (buff, templ,
13152 load ? "ld" : "st",
13155 integer_p ? "\t%@ int" : "");
13156 output_asm_insn (buff, ops);
13161 /* Output a Neon quad-word load or store, or a load or store for
13162 larger structure modes.
13164 WARNING: The ordering of elements is weird in big-endian mode,
13165 because we use VSTM, as required by the EABI. GCC RTL defines
13166 element ordering based on in-memory order. This can be differ
13167 from the architectural ordering of elements within a NEON register.
13168 The intrinsics defined in arm_neon.h use the NEON register element
13169 ordering, not the GCC RTL element ordering.
13171 For example, the in-memory ordering of a big-endian a quadword
13172 vector with 16-bit elements when stored from register pair {d0,d1}
13173 will be (lowest address first, d0[N] is NEON register element N):
13175 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13177 When necessary, quadword registers (dN, dN+1) are moved to ARM
13178 registers from rN in the order:
13180 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13182 So that STM/LDM can be used on vectors in ARM registers, and the
13183 same memory layout will result as if VSTM/VLDM were used. */
13186 output_move_neon (rtx *operands)
13188 rtx reg, mem, addr, ops[2];
13189 int regno, load = REG_P (operands[0]);
13192 enum machine_mode mode;
13194 reg = operands[!load];
13195 mem = operands[load];
13197 mode = GET_MODE (reg);
13199 gcc_assert (REG_P (reg));
13200 regno = REGNO (reg);
13201 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13202 || NEON_REGNO_OK_FOR_QUAD (regno));
13203 gcc_assert (VALID_NEON_DREG_MODE (mode)
13204 || VALID_NEON_QREG_MODE (mode)
13205 || VALID_NEON_STRUCT_MODE (mode));
13206 gcc_assert (MEM_P (mem));
13208 addr = XEXP (mem, 0);
13210 /* Strip off const from addresses like (const (plus (...))). */
13211 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13212 addr = XEXP (addr, 0);
13214 switch (GET_CODE (addr))
13217 templ = "v%smia%%?\t%%0!, %%h1";
13218 ops[0] = XEXP (addr, 0);
13223 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13224 templ = "v%smdb%%?\t%%0!, %%h1";
13225 ops[0] = XEXP (addr, 0);
13230 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13231 gcc_unreachable ();
13236 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13239 for (i = 0; i < nregs; i++)
13241 /* We're only using DImode here because it's a convenient size. */
13242 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13243 ops[1] = adjust_address (mem, DImode, 8 * i);
13244 if (reg_overlap_mentioned_p (ops[0], mem))
13246 gcc_assert (overlap == -1);
13251 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13252 output_asm_insn (buff, ops);
13257 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13258 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13259 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13260 output_asm_insn (buff, ops);
13267 templ = "v%smia%%?\t%%m0, %%h1";
13272 sprintf (buff, templ, load ? "ld" : "st");
13273 output_asm_insn (buff, ops);
13278 /* Compute and return the length of neon_mov<mode>, where <mode> is
13279 one of VSTRUCT modes: EI, OI, CI or XI. */
13281 arm_attr_length_move_neon (rtx insn)
13283 rtx reg, mem, addr;
13285 enum machine_mode mode;
13287 extract_insn_cached (insn);
13289 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13291 mode = GET_MODE (recog_data.operand[0]);
13302 gcc_unreachable ();
13306 load = REG_P (recog_data.operand[0]);
13307 reg = recog_data.operand[!load];
13308 mem = recog_data.operand[load];
13310 gcc_assert (MEM_P (mem));
13312 mode = GET_MODE (reg);
13313 addr = XEXP (mem, 0);
13315 /* Strip off const from addresses like (const (plus (...))). */
13316 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13317 addr = XEXP (addr, 0);
13319 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13321 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13328 /* Output an ADD r, s, #n where n may be too big for one instruction.
13329 If adding zero to one register, output nothing. */
13331 output_add_immediate (rtx *operands)
13333 HOST_WIDE_INT n = INTVAL (operands[2]);
13335 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13338 output_multi_immediate (operands,
13339 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13342 output_multi_immediate (operands,
13343 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13350 /* Output a multiple immediate operation.
13351 OPERANDS is the vector of operands referred to in the output patterns.
13352 INSTR1 is the output pattern to use for the first constant.
13353 INSTR2 is the output pattern to use for subsequent constants.
13354 IMMED_OP is the index of the constant slot in OPERANDS.
13355 N is the constant value. */
13356 static const char *
13357 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13358 int immed_op, HOST_WIDE_INT n)
13360 #if HOST_BITS_PER_WIDE_INT > 32
13366 /* Quick and easy output. */
13367 operands[immed_op] = const0_rtx;
13368 output_asm_insn (instr1, operands);
13373 const char * instr = instr1;
13375 /* Note that n is never zero here (which would give no output). */
13376 for (i = 0; i < 32; i += 2)
13380 operands[immed_op] = GEN_INT (n & (255 << i));
13381 output_asm_insn (instr, operands);
13391 /* Return the name of a shifter operation. */
13392 static const char *
13393 arm_shift_nmem(enum rtx_code code)
13398 return ARM_LSL_NAME;
13414 /* Return the appropriate ARM instruction for the operation code.
13415 The returned result should not be overwritten. OP is the rtx of the
13416 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13419 arithmetic_instr (rtx op, int shift_first_arg)
13421 switch (GET_CODE (op))
13427 return shift_first_arg ? "rsb" : "sub";
13442 return arm_shift_nmem(GET_CODE(op));
13445 gcc_unreachable ();
13449 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13450 for the operation code. The returned result should not be overwritten.
13451 OP is the rtx code of the shift.
13452 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13454 static const char *
13455 shift_op (rtx op, HOST_WIDE_INT *amountp)
13458 enum rtx_code code = GET_CODE (op);
13460 switch (GET_CODE (XEXP (op, 1)))
13468 *amountp = INTVAL (XEXP (op, 1));
13472 gcc_unreachable ();
13478 gcc_assert (*amountp != -1);
13479 *amountp = 32 - *amountp;
13482 /* Fall through. */
13488 mnem = arm_shift_nmem(code);
13492 /* We never have to worry about the amount being other than a
13493 power of 2, since this case can never be reloaded from a reg. */
13494 gcc_assert (*amountp != -1);
13495 *amountp = int_log2 (*amountp);
13496 return ARM_LSL_NAME;
13499 gcc_unreachable ();
13502 if (*amountp != -1)
13504 /* This is not 100% correct, but follows from the desire to merge
13505 multiplication by a power of 2 with the recognizer for a
13506 shift. >=32 is not a valid shift for "lsl", so we must try and
13507 output a shift that produces the correct arithmetical result.
13508 Using lsr #32 is identical except for the fact that the carry bit
13509 is not set correctly if we set the flags; but we never use the
13510 carry bit from such an operation, so we can ignore that. */
13511 if (code == ROTATERT)
13512 /* Rotate is just modulo 32. */
13514 else if (*amountp != (*amountp & 31))
13516 if (code == ASHIFT)
13521 /* Shifts of 0 are no-ops. */
13529 /* Obtain the shift from the POWER of two. */
13531 static HOST_WIDE_INT
13532 int_log2 (HOST_WIDE_INT power)
13534 HOST_WIDE_INT shift = 0;
13536 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13538 gcc_assert (shift <= 31);
13545 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13546 because /bin/as is horribly restrictive. The judgement about
13547 whether or not each character is 'printable' (and can be output as
13548 is) or not (and must be printed with an octal escape) must be made
13549 with reference to the *host* character set -- the situation is
13550 similar to that discussed in the comments above pp_c_char in
13551 c-pretty-print.c. */
13553 #define MAX_ASCII_LEN 51
13556 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13559 int len_so_far = 0;
13561 fputs ("\t.ascii\t\"", stream);
13563 for (i = 0; i < len; i++)
13567 if (len_so_far >= MAX_ASCII_LEN)
13569 fputs ("\"\n\t.ascii\t\"", stream);
13575 if (c == '\\' || c == '\"')
13577 putc ('\\', stream);
13585 fprintf (stream, "\\%03o", c);
13590 fputs ("\"\n", stream);
13593 /* Compute the register save mask for registers 0 through 12
13594 inclusive. This code is used by arm_compute_save_reg_mask. */
13596 static unsigned long
13597 arm_compute_save_reg0_reg12_mask (void)
13599 unsigned long func_type = arm_current_func_type ();
13600 unsigned long save_reg_mask = 0;
13603 if (IS_INTERRUPT (func_type))
13605 unsigned int max_reg;
13606 /* Interrupt functions must not corrupt any registers,
13607 even call clobbered ones. If this is a leaf function
13608 we can just examine the registers used by the RTL, but
13609 otherwise we have to assume that whatever function is
13610 called might clobber anything, and so we have to save
13611 all the call-clobbered registers as well. */
13612 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13613 /* FIQ handlers have registers r8 - r12 banked, so
13614 we only need to check r0 - r7, Normal ISRs only
13615 bank r14 and r15, so we must check up to r12.
13616 r13 is the stack pointer which is always preserved,
13617 so we do not need to consider it here. */
13622 for (reg = 0; reg <= max_reg; reg++)
13623 if (df_regs_ever_live_p (reg)
13624 || (! current_function_is_leaf && call_used_regs[reg]))
13625 save_reg_mask |= (1 << reg);
13627 /* Also save the pic base register if necessary. */
13629 && !TARGET_SINGLE_PIC_BASE
13630 && arm_pic_register != INVALID_REGNUM
13631 && crtl->uses_pic_offset_table)
13632 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13634 else if (IS_VOLATILE(func_type))
13636 /* For noreturn functions we historically omitted register saves
13637 altogether. However this really messes up debugging. As a
13638 compromise save just the frame pointers. Combined with the link
13639 register saved elsewhere this should be sufficient to get
13641 if (frame_pointer_needed)
13642 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13643 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13644 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13645 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13646 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13650 /* In the normal case we only need to save those registers
13651 which are call saved and which are used by this function. */
13652 for (reg = 0; reg <= 11; reg++)
13653 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13654 save_reg_mask |= (1 << reg);
13656 /* Handle the frame pointer as a special case. */
13657 if (frame_pointer_needed)
13658 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13660 /* If we aren't loading the PIC register,
13661 don't stack it even though it may be live. */
13663 && !TARGET_SINGLE_PIC_BASE
13664 && arm_pic_register != INVALID_REGNUM
13665 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13666 || crtl->uses_pic_offset_table))
13667 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13669 /* The prologue will copy SP into R0, so save it. */
13670 if (IS_STACKALIGN (func_type))
13671 save_reg_mask |= 1;
13674 /* Save registers so the exception handler can modify them. */
13675 if (crtl->calls_eh_return)
13681 reg = EH_RETURN_DATA_REGNO (i);
13682 if (reg == INVALID_REGNUM)
13684 save_reg_mask |= 1 << reg;
13688 return save_reg_mask;
13692 /* Compute the number of bytes used to store the static chain register on the
13693 stack, above the stack frame. We need to know this accurately to get the
13694 alignment of the rest of the stack frame correct. */
13696 static int arm_compute_static_chain_stack_bytes (void)
13698 unsigned long func_type = arm_current_func_type ();
13699 int static_chain_stack_bytes = 0;
13701 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13702 IS_NESTED (func_type) &&
13703 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13704 static_chain_stack_bytes = 4;
13706 return static_chain_stack_bytes;
13710 /* Compute a bit mask of which registers need to be
13711 saved on the stack for the current function.
13712 This is used by arm_get_frame_offsets, which may add extra registers. */
13714 static unsigned long
13715 arm_compute_save_reg_mask (void)
13717 unsigned int save_reg_mask = 0;
13718 unsigned long func_type = arm_current_func_type ();
13721 if (IS_NAKED (func_type))
13722 /* This should never really happen. */
13725 /* If we are creating a stack frame, then we must save the frame pointer,
13726 IP (which will hold the old stack pointer), LR and the PC. */
13727 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13729 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13732 | (1 << PC_REGNUM);
13734 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13736 /* Decide if we need to save the link register.
13737 Interrupt routines have their own banked link register,
13738 so they never need to save it.
13739 Otherwise if we do not use the link register we do not need to save
13740 it. If we are pushing other registers onto the stack however, we
13741 can save an instruction in the epilogue by pushing the link register
13742 now and then popping it back into the PC. This incurs extra memory
13743 accesses though, so we only do it when optimizing for size, and only
13744 if we know that we will not need a fancy return sequence. */
13745 if (df_regs_ever_live_p (LR_REGNUM)
13748 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13749 && !crtl->calls_eh_return))
13750 save_reg_mask |= 1 << LR_REGNUM;
13752 if (cfun->machine->lr_save_eliminated)
13753 save_reg_mask &= ~ (1 << LR_REGNUM);
13755 if (TARGET_REALLY_IWMMXT
13756 && ((bit_count (save_reg_mask)
13757 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13758 arm_compute_static_chain_stack_bytes())
13761 /* The total number of registers that are going to be pushed
13762 onto the stack is odd. We need to ensure that the stack
13763 is 64-bit aligned before we start to save iWMMXt registers,
13764 and also before we start to create locals. (A local variable
13765 might be a double or long long which we will load/store using
13766 an iWMMXt instruction). Therefore we need to push another
13767 ARM register, so that the stack will be 64-bit aligned. We
13768 try to avoid using the arg registers (r0 -r3) as they might be
13769 used to pass values in a tail call. */
13770 for (reg = 4; reg <= 12; reg++)
13771 if ((save_reg_mask & (1 << reg)) == 0)
13775 save_reg_mask |= (1 << reg);
13778 cfun->machine->sibcall_blocked = 1;
13779 save_reg_mask |= (1 << 3);
13783 /* We may need to push an additional register for use initializing the
13784 PIC base register. */
13785 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13786 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13788 reg = thumb_find_work_register (1 << 4);
13789 if (!call_used_regs[reg])
13790 save_reg_mask |= (1 << reg);
13793 return save_reg_mask;
13797 /* Compute a bit mask of which registers need to be
13798 saved on the stack for the current function. */
13799 static unsigned long
13800 thumb1_compute_save_reg_mask (void)
13802 unsigned long mask;
13806 for (reg = 0; reg < 12; reg ++)
13807 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13811 && !TARGET_SINGLE_PIC_BASE
13812 && arm_pic_register != INVALID_REGNUM
13813 && crtl->uses_pic_offset_table)
13814 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13816 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13817 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13818 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13820 /* LR will also be pushed if any lo regs are pushed. */
13821 if (mask & 0xff || thumb_force_lr_save ())
13822 mask |= (1 << LR_REGNUM);
13824 /* Make sure we have a low work register if we need one.
13825 We will need one if we are going to push a high register,
13826 but we are not currently intending to push a low register. */
13827 if ((mask & 0xff) == 0
13828 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13830 /* Use thumb_find_work_register to choose which register
13831 we will use. If the register is live then we will
13832 have to push it. Use LAST_LO_REGNUM as our fallback
13833 choice for the register to select. */
13834 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13835 /* Make sure the register returned by thumb_find_work_register is
13836 not part of the return value. */
13837 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13838 reg = LAST_LO_REGNUM;
13840 if (! call_used_regs[reg])
13844 /* The 504 below is 8 bytes less than 512 because there are two possible
13845 alignment words. We can't tell here if they will be present or not so we
13846 have to play it safe and assume that they are. */
13847 if ((CALLER_INTERWORKING_SLOT_SIZE +
13848 ROUND_UP_WORD (get_frame_size ()) +
13849 crtl->outgoing_args_size) >= 504)
13851 /* This is the same as the code in thumb1_expand_prologue() which
13852 determines which register to use for stack decrement. */
13853 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13854 if (mask & (1 << reg))
13857 if (reg > LAST_LO_REGNUM)
13859 /* Make sure we have a register available for stack decrement. */
13860 mask |= 1 << LAST_LO_REGNUM;
13868 /* Return the number of bytes required to save VFP registers. */
13870 arm_get_vfp_saved_size (void)
13872 unsigned int regno;
13877 /* Space for saved VFP registers. */
13878 if (TARGET_HARD_FLOAT && TARGET_VFP)
13881 for (regno = FIRST_VFP_REGNUM;
13882 regno < LAST_VFP_REGNUM;
13885 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13886 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13890 /* Workaround ARM10 VFPr1 bug. */
13891 if (count == 2 && !arm_arch6)
13893 saved += count * 8;
13902 if (count == 2 && !arm_arch6)
13904 saved += count * 8;
13911 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13912 everything bar the final return instruction. */
13914 output_return_instruction (rtx operand, int really_return, int reverse)
13916 char conditional[10];
13919 unsigned long live_regs_mask;
13920 unsigned long func_type;
13921 arm_stack_offsets *offsets;
13923 func_type = arm_current_func_type ();
13925 if (IS_NAKED (func_type))
13928 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13930 /* If this function was declared non-returning, and we have
13931 found a tail call, then we have to trust that the called
13932 function won't return. */
13937 /* Otherwise, trap an attempted return by aborting. */
13939 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13941 assemble_external_libcall (ops[1]);
13942 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13948 gcc_assert (!cfun->calls_alloca || really_return);
13950 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13952 cfun->machine->return_used_this_function = 1;
13954 offsets = arm_get_frame_offsets ();
13955 live_regs_mask = offsets->saved_regs_mask;
13957 if (live_regs_mask)
13959 const char * return_reg;
13961 /* If we do not have any special requirements for function exit
13962 (e.g. interworking) then we can load the return address
13963 directly into the PC. Otherwise we must load it into LR. */
13965 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13966 return_reg = reg_names[PC_REGNUM];
13968 return_reg = reg_names[LR_REGNUM];
13970 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13972 /* There are three possible reasons for the IP register
13973 being saved. 1) a stack frame was created, in which case
13974 IP contains the old stack pointer, or 2) an ISR routine
13975 corrupted it, or 3) it was saved to align the stack on
13976 iWMMXt. In case 1, restore IP into SP, otherwise just
13978 if (frame_pointer_needed)
13980 live_regs_mask &= ~ (1 << IP_REGNUM);
13981 live_regs_mask |= (1 << SP_REGNUM);
13984 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13987 /* On some ARM architectures it is faster to use LDR rather than
13988 LDM to load a single register. On other architectures, the
13989 cost is the same. In 26 bit mode, or for exception handlers,
13990 we have to use LDM to load the PC so that the CPSR is also
13992 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13993 if (live_regs_mask == (1U << reg))
13996 if (reg <= LAST_ARM_REGNUM
13997 && (reg != LR_REGNUM
13999 || ! IS_INTERRUPT (func_type)))
14001 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14002 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14009 /* Generate the load multiple instruction to restore the
14010 registers. Note we can get here, even if
14011 frame_pointer_needed is true, but only if sp already
14012 points to the base of the saved core registers. */
14013 if (live_regs_mask & (1 << SP_REGNUM))
14015 unsigned HOST_WIDE_INT stack_adjust;
14017 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14018 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14020 if (stack_adjust && arm_arch5 && TARGET_ARM)
14021 if (TARGET_UNIFIED_ASM)
14022 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14024 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14027 /* If we can't use ldmib (SA110 bug),
14028 then try to pop r3 instead. */
14030 live_regs_mask |= 1 << 3;
14032 if (TARGET_UNIFIED_ASM)
14033 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14035 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14039 if (TARGET_UNIFIED_ASM)
14040 sprintf (instr, "pop%s\t{", conditional);
14042 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14044 p = instr + strlen (instr);
14046 for (reg = 0; reg <= SP_REGNUM; reg++)
14047 if (live_regs_mask & (1 << reg))
14049 int l = strlen (reg_names[reg]);
14055 memcpy (p, ", ", 2);
14059 memcpy (p, "%|", 2);
14060 memcpy (p + 2, reg_names[reg], l);
14064 if (live_regs_mask & (1 << LR_REGNUM))
14066 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14067 /* If returning from an interrupt, restore the CPSR. */
14068 if (IS_INTERRUPT (func_type))
14075 output_asm_insn (instr, & operand);
14077 /* See if we need to generate an extra instruction to
14078 perform the actual function return. */
14080 && func_type != ARM_FT_INTERWORKED
14081 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14083 /* The return has already been handled
14084 by loading the LR into the PC. */
14091 switch ((int) ARM_FUNC_TYPE (func_type))
14095 /* ??? This is wrong for unified assembly syntax. */
14096 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14099 case ARM_FT_INTERWORKED:
14100 sprintf (instr, "bx%s\t%%|lr", conditional);
14103 case ARM_FT_EXCEPTION:
14104 /* ??? This is wrong for unified assembly syntax. */
14105 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14109 /* Use bx if it's available. */
14110 if (arm_arch5 || arm_arch4t)
14111 sprintf (instr, "bx%s\t%%|lr", conditional);
14113 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14117 output_asm_insn (instr, & operand);
14123 /* Write the function name into the code section, directly preceding
14124 the function prologue.
14126 Code will be output similar to this:
14128 .ascii "arm_poke_function_name", 0
14131 .word 0xff000000 + (t1 - t0)
14132 arm_poke_function_name
14134 stmfd sp!, {fp, ip, lr, pc}
14137 When performing a stack backtrace, code can inspect the value
14138 of 'pc' stored at 'fp' + 0. If the trace function then looks
14139 at location pc - 12 and the top 8 bits are set, then we know
14140 that there is a function name embedded immediately preceding this
14141 location and has length ((pc[-3]) & 0xff000000).
14143 We assume that pc is declared as a pointer to an unsigned long.
14145 It is of no benefit to output the function name if we are assembling
14146 a leaf function. These function types will not contain a stack
14147 backtrace structure, therefore it is not possible to determine the
14150 arm_poke_function_name (FILE *stream, const char *name)
14152 unsigned long alignlength;
14153 unsigned long length;
14156 length = strlen (name) + 1;
14157 alignlength = ROUND_UP_WORD (length);
14159 ASM_OUTPUT_ASCII (stream, name, length);
14160 ASM_OUTPUT_ALIGN (stream, 2);
14161 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14162 assemble_aligned_integer (UNITS_PER_WORD, x);
14165 /* Place some comments into the assembler stream
14166 describing the current function. */
14168 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14170 unsigned long func_type;
14174 thumb1_output_function_prologue (f, frame_size);
14178 /* Sanity check. */
14179 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14181 func_type = arm_current_func_type ();
14183 switch ((int) ARM_FUNC_TYPE (func_type))
14186 case ARM_FT_NORMAL:
14188 case ARM_FT_INTERWORKED:
14189 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14192 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14195 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14197 case ARM_FT_EXCEPTION:
14198 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14202 if (IS_NAKED (func_type))
14203 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14205 if (IS_VOLATILE (func_type))
14206 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14208 if (IS_NESTED (func_type))
14209 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14210 if (IS_STACKALIGN (func_type))
14211 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14213 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14215 crtl->args.pretend_args_size, frame_size);
14217 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14218 frame_pointer_needed,
14219 cfun->machine->uses_anonymous_args);
14221 if (cfun->machine->lr_save_eliminated)
14222 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14224 if (crtl->calls_eh_return)
14225 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14230 arm_output_epilogue (rtx sibling)
14233 unsigned long saved_regs_mask;
14234 unsigned long func_type;
14235 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14236 frame that is $fp + 4 for a non-variadic function. */
14237 int floats_offset = 0;
14239 FILE * f = asm_out_file;
14240 unsigned int lrm_count = 0;
14241 int really_return = (sibling == NULL);
14243 arm_stack_offsets *offsets;
14245 /* If we have already generated the return instruction
14246 then it is futile to generate anything else. */
14247 if (use_return_insn (FALSE, sibling) &&
14248 (cfun->machine->return_used_this_function != 0))
14251 func_type = arm_current_func_type ();
14253 if (IS_NAKED (func_type))
14254 /* Naked functions don't have epilogues. */
14257 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14261 /* A volatile function should never return. Call abort. */
14262 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14263 assemble_external_libcall (op);
14264 output_asm_insn ("bl\t%a0", &op);
14269 /* If we are throwing an exception, then we really must be doing a
14270 return, so we can't tail-call. */
14271 gcc_assert (!crtl->calls_eh_return || really_return);
14273 offsets = arm_get_frame_offsets ();
14274 saved_regs_mask = offsets->saved_regs_mask;
14277 lrm_count = bit_count (saved_regs_mask);
14279 floats_offset = offsets->saved_args;
14280 /* Compute how far away the floats will be. */
14281 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14282 if (saved_regs_mask & (1 << reg))
14283 floats_offset += 4;
14285 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14287 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14288 int vfp_offset = offsets->frame;
14290 if (TARGET_FPA_EMU2)
14292 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14293 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14295 floats_offset += 12;
14296 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14297 reg, FP_REGNUM, floats_offset - vfp_offset);
14302 start_reg = LAST_FPA_REGNUM;
14304 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14306 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14308 floats_offset += 12;
14310 /* We can't unstack more than four registers at once. */
14311 if (start_reg - reg == 3)
14313 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14314 reg, FP_REGNUM, floats_offset - vfp_offset);
14315 start_reg = reg - 1;
14320 if (reg != start_reg)
14321 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14322 reg + 1, start_reg - reg,
14323 FP_REGNUM, floats_offset - vfp_offset);
14324 start_reg = reg - 1;
14328 /* Just in case the last register checked also needs unstacking. */
14329 if (reg != start_reg)
14330 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14331 reg + 1, start_reg - reg,
14332 FP_REGNUM, floats_offset - vfp_offset);
14335 if (TARGET_HARD_FLOAT && TARGET_VFP)
14339 /* The fldmd insns do not have base+offset addressing
14340 modes, so we use IP to hold the address. */
14341 saved_size = arm_get_vfp_saved_size ();
14343 if (saved_size > 0)
14345 floats_offset += saved_size;
14346 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14347 FP_REGNUM, floats_offset - vfp_offset);
14349 start_reg = FIRST_VFP_REGNUM;
14350 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14352 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14353 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14355 if (start_reg != reg)
14356 vfp_output_fldmd (f, IP_REGNUM,
14357 (start_reg - FIRST_VFP_REGNUM) / 2,
14358 (reg - start_reg) / 2);
14359 start_reg = reg + 2;
14362 if (start_reg != reg)
14363 vfp_output_fldmd (f, IP_REGNUM,
14364 (start_reg - FIRST_VFP_REGNUM) / 2,
14365 (reg - start_reg) / 2);
14370 /* The frame pointer is guaranteed to be non-double-word aligned.
14371 This is because it is set to (old_stack_pointer - 4) and the
14372 old_stack_pointer was double word aligned. Thus the offset to
14373 the iWMMXt registers to be loaded must also be non-double-word
14374 sized, so that the resultant address *is* double-word aligned.
14375 We can ignore floats_offset since that was already included in
14376 the live_regs_mask. */
14377 lrm_count += (lrm_count % 2 ? 2 : 1);
14379 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14380 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14382 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14383 reg, FP_REGNUM, lrm_count * 4);
14388 /* saved_regs_mask should contain the IP, which at the time of stack
14389 frame generation actually contains the old stack pointer. So a
14390 quick way to unwind the stack is just pop the IP register directly
14391 into the stack pointer. */
14392 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14393 saved_regs_mask &= ~ (1 << IP_REGNUM);
14394 saved_regs_mask |= (1 << SP_REGNUM);
14396 /* There are two registers left in saved_regs_mask - LR and PC. We
14397 only need to restore the LR register (the return address), but to
14398 save time we can load it directly into the PC, unless we need a
14399 special function exit sequence, or we are not really returning. */
14401 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14402 && !crtl->calls_eh_return)
14403 /* Delete the LR from the register mask, so that the LR on
14404 the stack is loaded into the PC in the register mask. */
14405 saved_regs_mask &= ~ (1 << LR_REGNUM);
14407 saved_regs_mask &= ~ (1 << PC_REGNUM);
14409 /* We must use SP as the base register, because SP is one of the
14410 registers being restored. If an interrupt or page fault
14411 happens in the ldm instruction, the SP might or might not
14412 have been restored. That would be bad, as then SP will no
14413 longer indicate the safe area of stack, and we can get stack
14414 corruption. Using SP as the base register means that it will
14415 be reset correctly to the original value, should an interrupt
14416 occur. If the stack pointer already points at the right
14417 place, then omit the subtraction. */
14418 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14419 || cfun->calls_alloca)
14420 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14421 4 * bit_count (saved_regs_mask));
14422 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14424 if (IS_INTERRUPT (func_type))
14425 /* Interrupt handlers will have pushed the
14426 IP onto the stack, so restore it now. */
14427 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14431 /* This branch is executed for ARM mode (non-apcs frames) and
14432 Thumb-2 mode. Frame layout is essentially the same for those
14433 cases, except that in ARM mode frame pointer points to the
14434 first saved register, while in Thumb-2 mode the frame pointer points
14435 to the last saved register.
14437 It is possible to make frame pointer point to last saved
14438 register in both cases, and remove some conditionals below.
14439 That means that fp setup in prologue would be just "mov fp, sp"
14440 and sp restore in epilogue would be just "mov sp, fp", whereas
14441 now we have to use add/sub in those cases. However, the value
14442 of that would be marginal, as both mov and add/sub are 32-bit
14443 in ARM mode, and it would require extra conditionals
14444 in arm_expand_prologue to distingish ARM-apcs-frame case
14445 (where frame pointer is required to point at first register)
14446 and ARM-non-apcs-frame. Therefore, such change is postponed
14447 until real need arise. */
14448 unsigned HOST_WIDE_INT amount;
14450 /* Restore stack pointer if necessary. */
14451 if (TARGET_ARM && frame_pointer_needed)
14453 operands[0] = stack_pointer_rtx;
14454 operands[1] = hard_frame_pointer_rtx;
14456 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14457 output_add_immediate (operands);
14461 if (frame_pointer_needed)
14463 /* For Thumb-2 restore sp from the frame pointer.
14464 Operand restrictions mean we have to incrememnt FP, then copy
14466 amount = offsets->locals_base - offsets->saved_regs;
14467 operands[0] = hard_frame_pointer_rtx;
14471 unsigned long count;
14472 operands[0] = stack_pointer_rtx;
14473 amount = offsets->outgoing_args - offsets->saved_regs;
14474 /* pop call clobbered registers if it avoids a
14475 separate stack adjustment. */
14476 count = offsets->saved_regs - offsets->saved_args;
14479 && !crtl->calls_eh_return
14480 && bit_count(saved_regs_mask) * 4 == count
14481 && !IS_INTERRUPT (func_type)
14482 && !crtl->tail_call_emit)
14484 unsigned long mask;
14485 /* Preserve return values, of any size. */
14486 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14488 mask &= ~saved_regs_mask;
14490 while (bit_count (mask) * 4 > amount)
14492 while ((mask & (1 << reg)) == 0)
14494 mask &= ~(1 << reg);
14496 if (bit_count (mask) * 4 == amount) {
14498 saved_regs_mask |= mask;
14505 operands[1] = operands[0];
14506 operands[2] = GEN_INT (amount);
14507 output_add_immediate (operands);
14509 if (frame_pointer_needed)
14510 asm_fprintf (f, "\tmov\t%r, %r\n",
14511 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14514 if (TARGET_FPA_EMU2)
14516 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14517 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14518 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14523 start_reg = FIRST_FPA_REGNUM;
14525 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14527 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14529 if (reg - start_reg == 3)
14531 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14532 start_reg, SP_REGNUM);
14533 start_reg = reg + 1;
14538 if (reg != start_reg)
14539 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14540 start_reg, reg - start_reg,
14543 start_reg = reg + 1;
14547 /* Just in case the last register checked also needs unstacking. */
14548 if (reg != start_reg)
14549 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14550 start_reg, reg - start_reg, SP_REGNUM);
14553 if (TARGET_HARD_FLOAT && TARGET_VFP)
14555 int end_reg = LAST_VFP_REGNUM + 1;
14557 /* Scan the registers in reverse order. We need to match
14558 any groupings made in the prologue and generate matching
14560 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14562 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14563 && (!df_regs_ever_live_p (reg + 1)
14564 || call_used_regs[reg + 1]))
14566 if (end_reg > reg + 2)
14567 vfp_output_fldmd (f, SP_REGNUM,
14568 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14569 (end_reg - (reg + 2)) / 2);
14573 if (end_reg > reg + 2)
14574 vfp_output_fldmd (f, SP_REGNUM, 0,
14575 (end_reg - (reg + 2)) / 2);
14579 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14580 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14581 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14583 /* If we can, restore the LR into the PC. */
14584 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14585 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14586 && !IS_STACKALIGN (func_type)
14588 && crtl->args.pretend_args_size == 0
14589 && saved_regs_mask & (1 << LR_REGNUM)
14590 && !crtl->calls_eh_return)
14592 saved_regs_mask &= ~ (1 << LR_REGNUM);
14593 saved_regs_mask |= (1 << PC_REGNUM);
14594 rfe = IS_INTERRUPT (func_type);
14599 /* Load the registers off the stack. If we only have one register
14600 to load use the LDR instruction - it is faster. For Thumb-2
14601 always use pop and the assembler will pick the best instruction.*/
14602 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14603 && !IS_INTERRUPT(func_type))
14605 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14607 else if (saved_regs_mask)
14609 if (saved_regs_mask & (1 << SP_REGNUM))
14610 /* Note - write back to the stack register is not enabled
14611 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14612 in the list of registers and if we add writeback the
14613 instruction becomes UNPREDICTABLE. */
14614 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14616 else if (TARGET_ARM)
14617 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14620 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14623 if (crtl->args.pretend_args_size)
14625 /* Unwind the pre-pushed regs. */
14626 operands[0] = operands[1] = stack_pointer_rtx;
14627 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14628 output_add_immediate (operands);
14632 /* We may have already restored PC directly from the stack. */
14633 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14636 /* Stack adjustment for exception handler. */
14637 if (crtl->calls_eh_return)
14638 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14639 ARM_EH_STACKADJ_REGNUM);
14641 /* Generate the return instruction. */
14642 switch ((int) ARM_FUNC_TYPE (func_type))
14646 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14649 case ARM_FT_EXCEPTION:
14650 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14653 case ARM_FT_INTERWORKED:
14654 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14658 if (IS_STACKALIGN (func_type))
14660 /* See comment in arm_expand_prologue. */
14661 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14663 if (arm_arch5 || arm_arch4t)
14664 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14666 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14674 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14675 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14677 arm_stack_offsets *offsets;
14683 /* Emit any call-via-reg trampolines that are needed for v4t support
14684 of call_reg and call_value_reg type insns. */
14685 for (regno = 0; regno < LR_REGNUM; regno++)
14687 rtx label = cfun->machine->call_via[regno];
14691 switch_to_section (function_section (current_function_decl));
14692 targetm.asm_out.internal_label (asm_out_file, "L",
14693 CODE_LABEL_NUMBER (label));
14694 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14698 /* ??? Probably not safe to set this here, since it assumes that a
14699 function will be emitted as assembly immediately after we generate
14700 RTL for it. This does not happen for inline functions. */
14701 cfun->machine->return_used_this_function = 0;
14703 else /* TARGET_32BIT */
14705 /* We need to take into account any stack-frame rounding. */
14706 offsets = arm_get_frame_offsets ();
14708 gcc_assert (!use_return_insn (FALSE, NULL)
14709 || (cfun->machine->return_used_this_function != 0)
14710 || offsets->saved_regs == offsets->outgoing_args
14711 || frame_pointer_needed);
14713 /* Reset the ARM-specific per-function variables. */
14714 after_arm_reorg = 0;
14718 /* Generate and emit an insn that we will recognize as a push_multi.
14719 Unfortunately, since this insn does not reflect very well the actual
14720 semantics of the operation, we need to annotate the insn for the benefit
14721 of DWARF2 frame unwind information. */
14723 emit_multi_reg_push (unsigned long mask)
14726 int num_dwarf_regs;
14730 int dwarf_par_index;
14733 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14734 if (mask & (1 << i))
14737 gcc_assert (num_regs && num_regs <= 16);
14739 /* We don't record the PC in the dwarf frame information. */
14740 num_dwarf_regs = num_regs;
14741 if (mask & (1 << PC_REGNUM))
14744 /* For the body of the insn we are going to generate an UNSPEC in
14745 parallel with several USEs. This allows the insn to be recognized
14746 by the push_multi pattern in the arm.md file.
14748 The body of the insn looks something like this:
14751 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14752 (const_int:SI <num>)))
14753 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14759 For the frame note however, we try to be more explicit and actually
14760 show each register being stored into the stack frame, plus a (single)
14761 decrement of the stack pointer. We do it this way in order to be
14762 friendly to the stack unwinding code, which only wants to see a single
14763 stack decrement per instruction. The RTL we generate for the note looks
14764 something like this:
14767 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14768 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14769 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14770 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14774 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14775 instead we'd have a parallel expression detailing all
14776 the stores to the various memory addresses so that debug
14777 information is more up-to-date. Remember however while writing
14778 this to take care of the constraints with the push instruction.
14780 Note also that this has to be taken care of for the VFP registers.
14782 For more see PR43399. */
14784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14785 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14786 dwarf_par_index = 1;
14788 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14790 if (mask & (1 << i))
14792 reg = gen_rtx_REG (SImode, i);
14794 XVECEXP (par, 0, 0)
14795 = gen_rtx_SET (VOIDmode,
14798 gen_rtx_PRE_MODIFY (Pmode,
14801 (stack_pointer_rtx,
14804 gen_rtx_UNSPEC (BLKmode,
14805 gen_rtvec (1, reg),
14806 UNSPEC_PUSH_MULT));
14808 if (i != PC_REGNUM)
14810 tmp = gen_rtx_SET (VOIDmode,
14811 gen_frame_mem (SImode, stack_pointer_rtx),
14813 RTX_FRAME_RELATED_P (tmp) = 1;
14814 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14822 for (j = 1, i++; j < num_regs; i++)
14824 if (mask & (1 << i))
14826 reg = gen_rtx_REG (SImode, i);
14828 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14830 if (i != PC_REGNUM)
14833 = gen_rtx_SET (VOIDmode,
14836 plus_constant (stack_pointer_rtx,
14839 RTX_FRAME_RELATED_P (tmp) = 1;
14840 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14847 par = emit_insn (par);
14849 tmp = gen_rtx_SET (VOIDmode,
14851 plus_constant (stack_pointer_rtx, -4 * num_regs));
14852 RTX_FRAME_RELATED_P (tmp) = 1;
14853 XVECEXP (dwarf, 0, 0) = tmp;
14855 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14860 /* Calculate the size of the return value that is passed in registers. */
14862 arm_size_return_regs (void)
14864 enum machine_mode mode;
14866 if (crtl->return_rtx != 0)
14867 mode = GET_MODE (crtl->return_rtx);
14869 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14871 return GET_MODE_SIZE (mode);
14875 emit_sfm (int base_reg, int count)
14882 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14883 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14885 reg = gen_rtx_REG (XFmode, base_reg++);
14887 XVECEXP (par, 0, 0)
14888 = gen_rtx_SET (VOIDmode,
14891 gen_rtx_PRE_MODIFY (Pmode,
14894 (stack_pointer_rtx,
14897 gen_rtx_UNSPEC (BLKmode,
14898 gen_rtvec (1, reg),
14899 UNSPEC_PUSH_MULT));
14900 tmp = gen_rtx_SET (VOIDmode,
14901 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14902 RTX_FRAME_RELATED_P (tmp) = 1;
14903 XVECEXP (dwarf, 0, 1) = tmp;
14905 for (i = 1; i < count; i++)
14907 reg = gen_rtx_REG (XFmode, base_reg++);
14908 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14910 tmp = gen_rtx_SET (VOIDmode,
14911 gen_frame_mem (XFmode,
14912 plus_constant (stack_pointer_rtx,
14915 RTX_FRAME_RELATED_P (tmp) = 1;
14916 XVECEXP (dwarf, 0, i + 1) = tmp;
14919 tmp = gen_rtx_SET (VOIDmode,
14921 plus_constant (stack_pointer_rtx, -12 * count));
14923 RTX_FRAME_RELATED_P (tmp) = 1;
14924 XVECEXP (dwarf, 0, 0) = tmp;
14926 par = emit_insn (par);
14927 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14933 /* Return true if the current function needs to save/restore LR. */
14936 thumb_force_lr_save (void)
14938 return !cfun->machine->lr_save_eliminated
14939 && (!leaf_function_p ()
14940 || thumb_far_jump_used_p ()
14941 || df_regs_ever_live_p (LR_REGNUM));
14945 /* Compute the distance from register FROM to register TO.
14946 These can be the arg pointer (26), the soft frame pointer (25),
14947 the stack pointer (13) or the hard frame pointer (11).
14948 In thumb mode r7 is used as the soft frame pointer, if needed.
14949 Typical stack layout looks like this:
14951 old stack pointer -> | |
14954 | | saved arguments for
14955 | | vararg functions
14958 hard FP & arg pointer -> | | \
14966 soft frame pointer -> | | /
14971 locals base pointer -> | | /
14976 current stack pointer -> | | /
14979 For a given function some or all of these stack components
14980 may not be needed, giving rise to the possibility of
14981 eliminating some of the registers.
14983 The values returned by this function must reflect the behavior
14984 of arm_expand_prologue() and arm_compute_save_reg_mask().
14986 The sign of the number returned reflects the direction of stack
14987 growth, so the values are positive for all eliminations except
14988 from the soft frame pointer to the hard frame pointer.
14990 SFP may point just inside the local variables block to ensure correct
14994 /* Calculate stack offsets. These are used to calculate register elimination
14995 offsets and in prologue/epilogue code. Also calculates which registers
14996 should be saved. */
14998 static arm_stack_offsets *
14999 arm_get_frame_offsets (void)
15001 struct arm_stack_offsets *offsets;
15002 unsigned long func_type;
15006 HOST_WIDE_INT frame_size;
15009 offsets = &cfun->machine->stack_offsets;
15011 /* We need to know if we are a leaf function. Unfortunately, it
15012 is possible to be called after start_sequence has been called,
15013 which causes get_insns to return the insns for the sequence,
15014 not the function, which will cause leaf_function_p to return
15015 the incorrect result.
15017 to know about leaf functions once reload has completed, and the
15018 frame size cannot be changed after that time, so we can safely
15019 use the cached value. */
15021 if (reload_completed)
15024 /* Initially this is the size of the local variables. It will translated
15025 into an offset once we have determined the size of preceding data. */
15026 frame_size = ROUND_UP_WORD (get_frame_size ());
15028 leaf = leaf_function_p ();
15030 /* Space for variadic functions. */
15031 offsets->saved_args = crtl->args.pretend_args_size;
15033 /* In Thumb mode this is incorrect, but never used. */
15034 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15035 arm_compute_static_chain_stack_bytes();
15039 unsigned int regno;
15041 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15042 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15043 saved = core_saved;
15045 /* We know that SP will be doubleword aligned on entry, and we must
15046 preserve that condition at any subroutine call. We also require the
15047 soft frame pointer to be doubleword aligned. */
15049 if (TARGET_REALLY_IWMMXT)
15051 /* Check for the call-saved iWMMXt registers. */
15052 for (regno = FIRST_IWMMXT_REGNUM;
15053 regno <= LAST_IWMMXT_REGNUM;
15055 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15059 func_type = arm_current_func_type ();
15060 if (! IS_VOLATILE (func_type))
15062 /* Space for saved FPA registers. */
15063 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15064 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15067 /* Space for saved VFP registers. */
15068 if (TARGET_HARD_FLOAT && TARGET_VFP)
15069 saved += arm_get_vfp_saved_size ();
15072 else /* TARGET_THUMB1 */
15074 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15075 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15076 saved = core_saved;
15077 if (TARGET_BACKTRACE)
15081 /* Saved registers include the stack frame. */
15082 offsets->saved_regs = offsets->saved_args + saved +
15083 arm_compute_static_chain_stack_bytes();
15084 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15085 /* A leaf function does not need any stack alignment if it has nothing
15087 if (leaf && frame_size == 0)
15089 offsets->outgoing_args = offsets->soft_frame;
15090 offsets->locals_base = offsets->soft_frame;
15094 /* Ensure SFP has the correct alignment. */
15095 if (ARM_DOUBLEWORD_ALIGN
15096 && (offsets->soft_frame & 7))
15098 offsets->soft_frame += 4;
15099 /* Try to align stack by pushing an extra reg. Don't bother doing this
15100 when there is a stack frame as the alignment will be rolled into
15101 the normal stack adjustment. */
15102 if (frame_size + crtl->outgoing_args_size == 0)
15106 /* If it is safe to use r3, then do so. This sometimes
15107 generates better code on Thumb-2 by avoiding the need to
15108 use 32-bit push/pop instructions. */
15109 if (!crtl->tail_call_emit
15110 && arm_size_return_regs () <= 12
15111 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15116 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15118 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15127 offsets->saved_regs += 4;
15128 offsets->saved_regs_mask |= (1 << reg);
15133 offsets->locals_base = offsets->soft_frame + frame_size;
15134 offsets->outgoing_args = (offsets->locals_base
15135 + crtl->outgoing_args_size);
15137 if (ARM_DOUBLEWORD_ALIGN)
15139 /* Ensure SP remains doubleword aligned. */
15140 if (offsets->outgoing_args & 7)
15141 offsets->outgoing_args += 4;
15142 gcc_assert (!(offsets->outgoing_args & 7));
15149 /* Calculate the relative offsets for the different stack pointers. Positive
15150 offsets are in the direction of stack growth. */
15153 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15155 arm_stack_offsets *offsets;
15157 offsets = arm_get_frame_offsets ();
15159 /* OK, now we have enough information to compute the distances.
15160 There must be an entry in these switch tables for each pair
15161 of registers in ELIMINABLE_REGS, even if some of the entries
15162 seem to be redundant or useless. */
15165 case ARG_POINTER_REGNUM:
15168 case THUMB_HARD_FRAME_POINTER_REGNUM:
15171 case FRAME_POINTER_REGNUM:
15172 /* This is the reverse of the soft frame pointer
15173 to hard frame pointer elimination below. */
15174 return offsets->soft_frame - offsets->saved_args;
15176 case ARM_HARD_FRAME_POINTER_REGNUM:
15177 /* This is only non-zero in the case where the static chain register
15178 is stored above the frame. */
15179 return offsets->frame - offsets->saved_args - 4;
15181 case STACK_POINTER_REGNUM:
15182 /* If nothing has been pushed on the stack at all
15183 then this will return -4. This *is* correct! */
15184 return offsets->outgoing_args - (offsets->saved_args + 4);
15187 gcc_unreachable ();
15189 gcc_unreachable ();
15191 case FRAME_POINTER_REGNUM:
15194 case THUMB_HARD_FRAME_POINTER_REGNUM:
15197 case ARM_HARD_FRAME_POINTER_REGNUM:
15198 /* The hard frame pointer points to the top entry in the
15199 stack frame. The soft frame pointer to the bottom entry
15200 in the stack frame. If there is no stack frame at all,
15201 then they are identical. */
15203 return offsets->frame - offsets->soft_frame;
15205 case STACK_POINTER_REGNUM:
15206 return offsets->outgoing_args - offsets->soft_frame;
15209 gcc_unreachable ();
15211 gcc_unreachable ();
15214 /* You cannot eliminate from the stack pointer.
15215 In theory you could eliminate from the hard frame
15216 pointer to the stack pointer, but this will never
15217 happen, since if a stack frame is not needed the
15218 hard frame pointer will never be used. */
15219 gcc_unreachable ();
15223 /* Given FROM and TO register numbers, say whether this elimination is
15224 allowed. Frame pointer elimination is automatically handled.
15226 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15227 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15228 pointer, we must eliminate FRAME_POINTER_REGNUM into
15229 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15230 ARG_POINTER_REGNUM. */
15233 arm_can_eliminate (const int from, const int to)
15235 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15236 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15237 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15238 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15242 /* Emit RTL to save coprocessor registers on function entry. Returns the
15243 number of bytes pushed. */
15246 arm_save_coproc_regs(void)
15248 int saved_size = 0;
15250 unsigned start_reg;
15253 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15254 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15256 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15257 insn = gen_rtx_MEM (V2SImode, insn);
15258 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15259 RTX_FRAME_RELATED_P (insn) = 1;
15263 /* Save any floating point call-saved registers used by this
15265 if (TARGET_FPA_EMU2)
15267 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15268 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15270 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15271 insn = gen_rtx_MEM (XFmode, insn);
15272 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15273 RTX_FRAME_RELATED_P (insn) = 1;
15279 start_reg = LAST_FPA_REGNUM;
15281 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15283 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15285 if (start_reg - reg == 3)
15287 insn = emit_sfm (reg, 4);
15288 RTX_FRAME_RELATED_P (insn) = 1;
15290 start_reg = reg - 1;
15295 if (start_reg != reg)
15297 insn = emit_sfm (reg + 1, start_reg - reg);
15298 RTX_FRAME_RELATED_P (insn) = 1;
15299 saved_size += (start_reg - reg) * 12;
15301 start_reg = reg - 1;
15305 if (start_reg != reg)
15307 insn = emit_sfm (reg + 1, start_reg - reg);
15308 saved_size += (start_reg - reg) * 12;
15309 RTX_FRAME_RELATED_P (insn) = 1;
15312 if (TARGET_HARD_FLOAT && TARGET_VFP)
15314 start_reg = FIRST_VFP_REGNUM;
15316 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15318 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15319 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15321 if (start_reg != reg)
15322 saved_size += vfp_emit_fstmd (start_reg,
15323 (reg - start_reg) / 2);
15324 start_reg = reg + 2;
15327 if (start_reg != reg)
15328 saved_size += vfp_emit_fstmd (start_reg,
15329 (reg - start_reg) / 2);
15335 /* Set the Thumb frame pointer from the stack pointer. */
15338 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15340 HOST_WIDE_INT amount;
15343 amount = offsets->outgoing_args - offsets->locals_base;
15345 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15346 stack_pointer_rtx, GEN_INT (amount)));
15349 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15350 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15351 expects the first two operands to be the same. */
15354 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15356 hard_frame_pointer_rtx));
15360 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15361 hard_frame_pointer_rtx,
15362 stack_pointer_rtx));
15364 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15365 plus_constant (stack_pointer_rtx, amount));
15366 RTX_FRAME_RELATED_P (dwarf) = 1;
15367 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15370 RTX_FRAME_RELATED_P (insn) = 1;
15373 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15376 arm_expand_prologue (void)
15381 unsigned long live_regs_mask;
15382 unsigned long func_type;
15384 int saved_pretend_args = 0;
15385 int saved_regs = 0;
15386 unsigned HOST_WIDE_INT args_to_push;
15387 arm_stack_offsets *offsets;
15389 func_type = arm_current_func_type ();
15391 /* Naked functions don't have prologues. */
15392 if (IS_NAKED (func_type))
15395 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15396 args_to_push = crtl->args.pretend_args_size;
15398 /* Compute which register we will have to save onto the stack. */
15399 offsets = arm_get_frame_offsets ();
15400 live_regs_mask = offsets->saved_regs_mask;
15402 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15404 if (IS_STACKALIGN (func_type))
15409 /* Handle a word-aligned stack pointer. We generate the following:
15414 <save and restore r0 in normal prologue/epilogue>
15418 The unwinder doesn't need to know about the stack realignment.
15419 Just tell it we saved SP in r0. */
15420 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15422 r0 = gen_rtx_REG (SImode, 0);
15423 r1 = gen_rtx_REG (SImode, 1);
15424 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15425 compiler won't choke. */
15426 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15427 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15428 insn = gen_movsi (r0, stack_pointer_rtx);
15429 RTX_FRAME_RELATED_P (insn) = 1;
15430 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15432 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15433 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15436 /* For APCS frames, if IP register is clobbered
15437 when creating frame, save that register in a special
15439 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15441 if (IS_INTERRUPT (func_type))
15443 /* Interrupt functions must not corrupt any registers.
15444 Creating a frame pointer however, corrupts the IP
15445 register, so we must push it first. */
15446 insn = emit_multi_reg_push (1 << IP_REGNUM);
15448 /* Do not set RTX_FRAME_RELATED_P on this insn.
15449 The dwarf stack unwinding code only wants to see one
15450 stack decrement per function, and this is not it. If
15451 this instruction is labeled as being part of the frame
15452 creation sequence then dwarf2out_frame_debug_expr will
15453 die when it encounters the assignment of IP to FP
15454 later on, since the use of SP here establishes SP as
15455 the CFA register and not IP.
15457 Anyway this instruction is not really part of the stack
15458 frame creation although it is part of the prologue. */
15460 else if (IS_NESTED (func_type))
15462 /* The Static chain register is the same as the IP register
15463 used as a scratch register during stack frame creation.
15464 To get around this need to find somewhere to store IP
15465 whilst the frame is being created. We try the following
15468 1. The last argument register.
15469 2. A slot on the stack above the frame. (This only
15470 works if the function is not a varargs function).
15471 3. Register r3, after pushing the argument registers
15474 Note - we only need to tell the dwarf2 backend about the SP
15475 adjustment in the second variant; the static chain register
15476 doesn't need to be unwound, as it doesn't contain a value
15477 inherited from the caller. */
15479 if (df_regs_ever_live_p (3) == false)
15480 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15481 else if (args_to_push == 0)
15485 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15488 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15489 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15492 /* Just tell the dwarf backend that we adjusted SP. */
15493 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15494 plus_constant (stack_pointer_rtx,
15496 RTX_FRAME_RELATED_P (insn) = 1;
15497 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15501 /* Store the args on the stack. */
15502 if (cfun->machine->uses_anonymous_args)
15503 insn = emit_multi_reg_push
15504 ((0xf0 >> (args_to_push / 4)) & 0xf);
15507 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15508 GEN_INT (- args_to_push)));
15510 RTX_FRAME_RELATED_P (insn) = 1;
15512 saved_pretend_args = 1;
15513 fp_offset = args_to_push;
15516 /* Now reuse r3 to preserve IP. */
15517 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15521 insn = emit_set_insn (ip_rtx,
15522 plus_constant (stack_pointer_rtx, fp_offset));
15523 RTX_FRAME_RELATED_P (insn) = 1;
15528 /* Push the argument registers, or reserve space for them. */
15529 if (cfun->machine->uses_anonymous_args)
15530 insn = emit_multi_reg_push
15531 ((0xf0 >> (args_to_push / 4)) & 0xf);
15534 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15535 GEN_INT (- args_to_push)));
15536 RTX_FRAME_RELATED_P (insn) = 1;
15539 /* If this is an interrupt service routine, and the link register
15540 is going to be pushed, and we're not generating extra
15541 push of IP (needed when frame is needed and frame layout if apcs),
15542 subtracting four from LR now will mean that the function return
15543 can be done with a single instruction. */
15544 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15545 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15546 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15549 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15551 emit_set_insn (lr, plus_constant (lr, -4));
15554 if (live_regs_mask)
15556 saved_regs += bit_count (live_regs_mask) * 4;
15557 if (optimize_size && !frame_pointer_needed
15558 && saved_regs == offsets->saved_regs - offsets->saved_args)
15560 /* If no coprocessor registers are being pushed and we don't have
15561 to worry about a frame pointer then push extra registers to
15562 create the stack frame. This is done is a way that does not
15563 alter the frame layout, so is independent of the epilogue. */
15567 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15569 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15570 if (frame && n * 4 >= frame)
15573 live_regs_mask |= (1 << n) - 1;
15574 saved_regs += frame;
15577 insn = emit_multi_reg_push (live_regs_mask);
15578 RTX_FRAME_RELATED_P (insn) = 1;
15581 if (! IS_VOLATILE (func_type))
15582 saved_regs += arm_save_coproc_regs ();
15584 if (frame_pointer_needed && TARGET_ARM)
15586 /* Create the new frame pointer. */
15587 if (TARGET_APCS_FRAME)
15589 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15590 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15591 RTX_FRAME_RELATED_P (insn) = 1;
15593 if (IS_NESTED (func_type))
15595 /* Recover the static chain register. */
15596 if (!df_regs_ever_live_p (3)
15597 || saved_pretend_args)
15598 insn = gen_rtx_REG (SImode, 3);
15599 else /* if (crtl->args.pretend_args_size == 0) */
15601 insn = plus_constant (hard_frame_pointer_rtx, 4);
15602 insn = gen_frame_mem (SImode, insn);
15604 emit_set_insn (ip_rtx, insn);
15605 /* Add a USE to stop propagate_one_insn() from barfing. */
15606 emit_insn (gen_prologue_use (ip_rtx));
15611 insn = GEN_INT (saved_regs - 4);
15612 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15613 stack_pointer_rtx, insn));
15614 RTX_FRAME_RELATED_P (insn) = 1;
15618 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15620 /* This add can produce multiple insns for a large constant, so we
15621 need to get tricky. */
15622 rtx last = get_last_insn ();
15624 amount = GEN_INT (offsets->saved_args + saved_regs
15625 - offsets->outgoing_args);
15627 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15631 last = last ? NEXT_INSN (last) : get_insns ();
15632 RTX_FRAME_RELATED_P (last) = 1;
15634 while (last != insn);
15636 /* If the frame pointer is needed, emit a special barrier that
15637 will prevent the scheduler from moving stores to the frame
15638 before the stack adjustment. */
15639 if (frame_pointer_needed)
15640 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15641 hard_frame_pointer_rtx));
15645 if (frame_pointer_needed && TARGET_THUMB2)
15646 thumb_set_frame_pointer (offsets);
15648 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15650 unsigned long mask;
15652 mask = live_regs_mask;
15653 mask &= THUMB2_WORK_REGS;
15654 if (!IS_NESTED (func_type))
15655 mask |= (1 << IP_REGNUM);
15656 arm_load_pic_register (mask);
15659 /* If we are profiling, make sure no instructions are scheduled before
15660 the call to mcount. Similarly if the user has requested no
15661 scheduling in the prolog. Similarly if we want non-call exceptions
15662 using the EABI unwinder, to prevent faulting instructions from being
15663 swapped with a stack adjustment. */
15664 if (crtl->profile || !TARGET_SCHED_PROLOG
15665 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15666 emit_insn (gen_blockage ());
15668 /* If the link register is being kept alive, with the return address in it,
15669 then make sure that it does not get reused by the ce2 pass. */
15670 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15671 cfun->machine->lr_save_eliminated = 1;
15674 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15676 arm_print_condition (FILE *stream)
15678 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15680 /* Branch conversion is not implemented for Thumb-2. */
15683 output_operand_lossage ("predicated Thumb instruction");
15686 if (current_insn_predicate != NULL)
15688 output_operand_lossage
15689 ("predicated instruction in conditional sequence");
15693 fputs (arm_condition_codes[arm_current_cc], stream);
15695 else if (current_insn_predicate)
15697 enum arm_cond_code code;
15701 output_operand_lossage ("predicated Thumb instruction");
15705 code = get_arm_condition_code (current_insn_predicate);
15706 fputs (arm_condition_codes[code], stream);
15711 /* If CODE is 'd', then the X is a condition operand and the instruction
15712 should only be executed if the condition is true.
15713 if CODE is 'D', then the X is a condition operand and the instruction
15714 should only be executed if the condition is false: however, if the mode
15715 of the comparison is CCFPEmode, then always execute the instruction -- we
15716 do this because in these circumstances !GE does not necessarily imply LT;
15717 in these cases the instruction pattern will take care to make sure that
15718 an instruction containing %d will follow, thereby undoing the effects of
15719 doing this instruction unconditionally.
15720 If CODE is 'N' then X is a floating point operand that must be negated
15722 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15723 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15725 arm_print_operand (FILE *stream, rtx x, int code)
15730 fputs (ASM_COMMENT_START, stream);
15734 fputs (user_label_prefix, stream);
15738 fputs (REGISTER_PREFIX, stream);
15742 arm_print_condition (stream);
15746 /* Nothing in unified syntax, otherwise the current condition code. */
15747 if (!TARGET_UNIFIED_ASM)
15748 arm_print_condition (stream);
15752 /* The current condition code in unified syntax, otherwise nothing. */
15753 if (TARGET_UNIFIED_ASM)
15754 arm_print_condition (stream);
15758 /* The current condition code for a condition code setting instruction.
15759 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15760 if (TARGET_UNIFIED_ASM)
15762 fputc('s', stream);
15763 arm_print_condition (stream);
15767 arm_print_condition (stream);
15768 fputc('s', stream);
15773 /* If the instruction is conditionally executed then print
15774 the current condition code, otherwise print 's'. */
15775 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15776 if (current_insn_predicate)
15777 arm_print_condition (stream);
15779 fputc('s', stream);
15782 /* %# is a "break" sequence. It doesn't output anything, but is used to
15783 separate e.g. operand numbers from following text, if that text consists
15784 of further digits which we don't want to be part of the operand
15792 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15793 r = real_value_negate (&r);
15794 fprintf (stream, "%s", fp_const_from_val (&r));
15798 /* An integer or symbol address without a preceding # sign. */
15800 switch (GET_CODE (x))
15803 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15807 output_addr_const (stream, x);
15811 gcc_unreachable ();
15816 if (GET_CODE (x) == CONST_INT)
15819 val = ARM_SIGN_EXTEND (~INTVAL (x));
15820 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15824 putc ('~', stream);
15825 output_addr_const (stream, x);
15830 /* The low 16 bits of an immediate constant. */
15831 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15835 fprintf (stream, "%s", arithmetic_instr (x, 1));
15838 /* Truncate Cirrus shift counts. */
15840 if (GET_CODE (x) == CONST_INT)
15842 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15845 arm_print_operand (stream, x, 0);
15849 fprintf (stream, "%s", arithmetic_instr (x, 0));
15857 if (!shift_operator (x, SImode))
15859 output_operand_lossage ("invalid shift operand");
15863 shift = shift_op (x, &val);
15867 fprintf (stream, ", %s ", shift);
15869 arm_print_operand (stream, XEXP (x, 1), 0);
15871 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15876 /* An explanation of the 'Q', 'R' and 'H' register operands:
15878 In a pair of registers containing a DI or DF value the 'Q'
15879 operand returns the register number of the register containing
15880 the least significant part of the value. The 'R' operand returns
15881 the register number of the register containing the most
15882 significant part of the value.
15884 The 'H' operand returns the higher of the two register numbers.
15885 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15886 same as the 'Q' operand, since the most significant part of the
15887 value is held in the lower number register. The reverse is true
15888 on systems where WORDS_BIG_ENDIAN is false.
15890 The purpose of these operands is to distinguish between cases
15891 where the endian-ness of the values is important (for example
15892 when they are added together), and cases where the endian-ness
15893 is irrelevant, but the order of register operations is important.
15894 For example when loading a value from memory into a register
15895 pair, the endian-ness does not matter. Provided that the value
15896 from the lower memory address is put into the lower numbered
15897 register, and the value from the higher address is put into the
15898 higher numbered register, the load will work regardless of whether
15899 the value being loaded is big-wordian or little-wordian. The
15900 order of the two register loads can matter however, if the address
15901 of the memory location is actually held in one of the registers
15902 being overwritten by the load.
15904 The 'Q' and 'R' constraints are also available for 64-bit
15907 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15909 rtx part = gen_lowpart (SImode, x);
15910 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15914 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15916 output_operand_lossage ("invalid operand for code '%c'", code);
15920 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15924 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15926 enum machine_mode mode = GET_MODE (x);
15929 if (mode == VOIDmode)
15931 part = gen_highpart_mode (SImode, mode, x);
15932 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15936 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15938 output_operand_lossage ("invalid operand for code '%c'", code);
15942 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15946 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15948 output_operand_lossage ("invalid operand for code '%c'", code);
15952 asm_fprintf (stream, "%r", REGNO (x) + 1);
15956 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15958 output_operand_lossage ("invalid operand for code '%c'", code);
15962 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15966 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15968 output_operand_lossage ("invalid operand for code '%c'", code);
15972 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15976 asm_fprintf (stream, "%r",
15977 GET_CODE (XEXP (x, 0)) == REG
15978 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15982 asm_fprintf (stream, "{%r-%r}",
15984 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15987 /* Like 'M', but writing doubleword vector registers, for use by Neon
15991 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15992 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15994 asm_fprintf (stream, "{d%d}", regno);
15996 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16001 /* CONST_TRUE_RTX means always -- that's the default. */
16002 if (x == const_true_rtx)
16005 if (!COMPARISON_P (x))
16007 output_operand_lossage ("invalid operand for code '%c'", code);
16011 fputs (arm_condition_codes[get_arm_condition_code (x)],
16016 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16017 want to do that. */
16018 if (x == const_true_rtx)
16020 output_operand_lossage ("instruction never executed");
16023 if (!COMPARISON_P (x))
16025 output_operand_lossage ("invalid operand for code '%c'", code);
16029 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16030 (get_arm_condition_code (x))],
16034 /* Cirrus registers can be accessed in a variety of ways:
16035 single floating point (f)
16036 double floating point (d)
16038 64bit integer (dx). */
16039 case 'W': /* Cirrus register in F mode. */
16040 case 'X': /* Cirrus register in D mode. */
16041 case 'Y': /* Cirrus register in FX mode. */
16042 case 'Z': /* Cirrus register in DX mode. */
16043 gcc_assert (GET_CODE (x) == REG
16044 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16046 fprintf (stream, "mv%s%s",
16048 : code == 'X' ? "d"
16049 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16053 /* Print cirrus register in the mode specified by the register's mode. */
16056 int mode = GET_MODE (x);
16058 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16060 output_operand_lossage ("invalid operand for code '%c'", code);
16064 fprintf (stream, "mv%s%s",
16065 mode == DFmode ? "d"
16066 : mode == SImode ? "fx"
16067 : mode == DImode ? "dx"
16068 : "f", reg_names[REGNO (x)] + 2);
16074 if (GET_CODE (x) != REG
16075 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16076 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16077 /* Bad value for wCG register number. */
16079 output_operand_lossage ("invalid operand for code '%c'", code);
16084 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16087 /* Print an iWMMXt control register name. */
16089 if (GET_CODE (x) != CONST_INT
16091 || INTVAL (x) >= 16)
16092 /* Bad value for wC register number. */
16094 output_operand_lossage ("invalid operand for code '%c'", code);
16100 static const char * wc_reg_names [16] =
16102 "wCID", "wCon", "wCSSF", "wCASF",
16103 "wC4", "wC5", "wC6", "wC7",
16104 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16105 "wC12", "wC13", "wC14", "wC15"
16108 fprintf (stream, wc_reg_names [INTVAL (x)]);
16112 /* Print the high single-precision register of a VFP double-precision
16116 int mode = GET_MODE (x);
16119 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16121 output_operand_lossage ("invalid operand for code '%c'", code);
16126 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16128 output_operand_lossage ("invalid operand for code '%c'", code);
16132 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16136 /* Print a VFP/Neon double precision or quad precision register name. */
16140 int mode = GET_MODE (x);
16141 int is_quad = (code == 'q');
16144 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16146 output_operand_lossage ("invalid operand for code '%c'", code);
16150 if (GET_CODE (x) != REG
16151 || !IS_VFP_REGNUM (REGNO (x)))
16153 output_operand_lossage ("invalid operand for code '%c'", code);
16158 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16159 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16161 output_operand_lossage ("invalid operand for code '%c'", code);
16165 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16166 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16170 /* These two codes print the low/high doubleword register of a Neon quad
16171 register, respectively. For pair-structure types, can also print
16172 low/high quadword registers. */
16176 int mode = GET_MODE (x);
16179 if ((GET_MODE_SIZE (mode) != 16
16180 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16182 output_operand_lossage ("invalid operand for code '%c'", code);
16187 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16189 output_operand_lossage ("invalid operand for code '%c'", code);
16193 if (GET_MODE_SIZE (mode) == 16)
16194 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16195 + (code == 'f' ? 1 : 0));
16197 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16198 + (code == 'f' ? 1 : 0));
16202 /* Print a VFPv3 floating-point constant, represented as an integer
16206 int index = vfp3_const_double_index (x);
16207 gcc_assert (index != -1);
16208 fprintf (stream, "%d", index);
16212 /* Print bits representing opcode features for Neon.
16214 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16215 and polynomials as unsigned.
16217 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16219 Bit 2 is 1 for rounding functions, 0 otherwise. */
16221 /* Identify the type as 's', 'u', 'p' or 'f'. */
16224 HOST_WIDE_INT bits = INTVAL (x);
16225 fputc ("uspf"[bits & 3], stream);
16229 /* Likewise, but signed and unsigned integers are both 'i'. */
16232 HOST_WIDE_INT bits = INTVAL (x);
16233 fputc ("iipf"[bits & 3], stream);
16237 /* As for 'T', but emit 'u' instead of 'p'. */
16240 HOST_WIDE_INT bits = INTVAL (x);
16241 fputc ("usuf"[bits & 3], stream);
16245 /* Bit 2: rounding (vs none). */
16248 HOST_WIDE_INT bits = INTVAL (x);
16249 fputs ((bits & 4) != 0 ? "r" : "", stream);
16253 /* Memory operand for vld1/vst1 instruction. */
16257 bool postinc = FALSE;
16258 gcc_assert (GET_CODE (x) == MEM);
16259 addr = XEXP (x, 0);
16260 if (GET_CODE (addr) == POST_INC)
16263 addr = XEXP (addr, 0);
16265 asm_fprintf (stream, "[%r]", REGNO (addr));
16267 fputs("!", stream);
16275 gcc_assert (GET_CODE (x) == MEM);
16276 addr = XEXP (x, 0);
16277 gcc_assert (GET_CODE (addr) == REG);
16278 asm_fprintf (stream, "[%r]", REGNO (addr));
16282 /* Translate an S register number into a D register number and element index. */
16285 int mode = GET_MODE (x);
16288 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16290 output_operand_lossage ("invalid operand for code '%c'", code);
16295 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16297 output_operand_lossage ("invalid operand for code '%c'", code);
16301 regno = regno - FIRST_VFP_REGNUM;
16302 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16306 /* Register specifier for vld1.16/vst1.16. Translate the S register
16307 number into a D register number and element index. */
16310 int mode = GET_MODE (x);
16313 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16315 output_operand_lossage ("invalid operand for code '%c'", code);
16320 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16322 output_operand_lossage ("invalid operand for code '%c'", code);
16326 regno = regno - FIRST_VFP_REGNUM;
16327 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16334 output_operand_lossage ("missing operand");
16338 switch (GET_CODE (x))
16341 asm_fprintf (stream, "%r", REGNO (x));
16345 output_memory_reference_mode = GET_MODE (x);
16346 output_address (XEXP (x, 0));
16353 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16354 sizeof (fpstr), 0, 1);
16355 fprintf (stream, "#%s", fpstr);
16358 fprintf (stream, "#%s", fp_immediate_constant (x));
16362 gcc_assert (GET_CODE (x) != NEG);
16363 fputc ('#', stream);
16364 if (GET_CODE (x) == HIGH)
16366 fputs (":lower16:", stream);
16370 output_addr_const (stream, x);
16376 /* Target hook for printing a memory address. */
16378 arm_print_operand_address (FILE *stream, rtx x)
16382 int is_minus = GET_CODE (x) == MINUS;
16384 if (GET_CODE (x) == REG)
16385 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16386 else if (GET_CODE (x) == PLUS || is_minus)
16388 rtx base = XEXP (x, 0);
16389 rtx index = XEXP (x, 1);
16390 HOST_WIDE_INT offset = 0;
16391 if (GET_CODE (base) != REG
16392 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16394 /* Ensure that BASE is a register. */
16395 /* (one of them must be). */
16396 /* Also ensure the SP is not used as in index register. */
16401 switch (GET_CODE (index))
16404 offset = INTVAL (index);
16407 asm_fprintf (stream, "[%r, #%wd]",
16408 REGNO (base), offset);
16412 asm_fprintf (stream, "[%r, %s%r]",
16413 REGNO (base), is_minus ? "-" : "",
16423 asm_fprintf (stream, "[%r, %s%r",
16424 REGNO (base), is_minus ? "-" : "",
16425 REGNO (XEXP (index, 0)));
16426 arm_print_operand (stream, index, 'S');
16427 fputs ("]", stream);
16432 gcc_unreachable ();
16435 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16436 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16438 extern enum machine_mode output_memory_reference_mode;
16440 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16442 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16443 asm_fprintf (stream, "[%r, #%s%d]!",
16444 REGNO (XEXP (x, 0)),
16445 GET_CODE (x) == PRE_DEC ? "-" : "",
16446 GET_MODE_SIZE (output_memory_reference_mode));
16448 asm_fprintf (stream, "[%r], #%s%d",
16449 REGNO (XEXP (x, 0)),
16450 GET_CODE (x) == POST_DEC ? "-" : "",
16451 GET_MODE_SIZE (output_memory_reference_mode));
16453 else if (GET_CODE (x) == PRE_MODIFY)
16455 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16456 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16457 asm_fprintf (stream, "#%wd]!",
16458 INTVAL (XEXP (XEXP (x, 1), 1)));
16460 asm_fprintf (stream, "%r]!",
16461 REGNO (XEXP (XEXP (x, 1), 1)));
16463 else if (GET_CODE (x) == POST_MODIFY)
16465 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16466 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16467 asm_fprintf (stream, "#%wd",
16468 INTVAL (XEXP (XEXP (x, 1), 1)));
16470 asm_fprintf (stream, "%r",
16471 REGNO (XEXP (XEXP (x, 1), 1)));
16473 else output_addr_const (stream, x);
16477 if (GET_CODE (x) == REG)
16478 asm_fprintf (stream, "[%r]", REGNO (x));
16479 else if (GET_CODE (x) == POST_INC)
16480 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16481 else if (GET_CODE (x) == PLUS)
16483 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16484 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16485 asm_fprintf (stream, "[%r, #%wd]",
16486 REGNO (XEXP (x, 0)),
16487 INTVAL (XEXP (x, 1)));
16489 asm_fprintf (stream, "[%r, %r]",
16490 REGNO (XEXP (x, 0)),
16491 REGNO (XEXP (x, 1)));
16494 output_addr_const (stream, x);
16498 /* Target hook for indicating whether a punctuation character for
16499 TARGET_PRINT_OPERAND is valid. */
16501 arm_print_operand_punct_valid_p (unsigned char code)
16503 return (code == '@' || code == '|' || code == '.'
16504 || code == '(' || code == ')' || code == '#'
16505 || (TARGET_32BIT && (code == '?'))
16506 || (TARGET_THUMB2 && (code == '!'))
16507 || (TARGET_THUMB && (code == '_')));
16510 /* Target hook for assembling integer objects. The ARM version needs to
16511 handle word-sized values specially. */
16513 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16515 enum machine_mode mode;
16517 if (size == UNITS_PER_WORD && aligned_p)
16519 fputs ("\t.word\t", asm_out_file);
16520 output_addr_const (asm_out_file, x);
16522 /* Mark symbols as position independent. We only do this in the
16523 .text segment, not in the .data segment. */
16524 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16525 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16527 /* See legitimize_pic_address for an explanation of the
16528 TARGET_VXWORKS_RTP check. */
16529 if (TARGET_VXWORKS_RTP
16530 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16531 fputs ("(GOT)", asm_out_file);
16533 fputs ("(GOTOFF)", asm_out_file);
16535 fputc ('\n', asm_out_file);
16539 mode = GET_MODE (x);
16541 if (arm_vector_mode_supported_p (mode))
16545 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16547 units = CONST_VECTOR_NUNITS (x);
16548 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16550 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16551 for (i = 0; i < units; i++)
16553 rtx elt = CONST_VECTOR_ELT (x, i);
16555 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16558 for (i = 0; i < units; i++)
16560 rtx elt = CONST_VECTOR_ELT (x, i);
16561 REAL_VALUE_TYPE rval;
16563 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16566 (rval, GET_MODE_INNER (mode),
16567 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16573 return default_assemble_integer (x, size, aligned_p);
16577 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16581 if (!TARGET_AAPCS_BASED)
16584 default_named_section_asm_out_constructor
16585 : default_named_section_asm_out_destructor) (symbol, priority);
16589 /* Put these in the .init_array section, using a special relocation. */
16590 if (priority != DEFAULT_INIT_PRIORITY)
16593 sprintf (buf, "%s.%.5u",
16594 is_ctor ? ".init_array" : ".fini_array",
16596 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16603 switch_to_section (s);
16604 assemble_align (POINTER_SIZE);
16605 fputs ("\t.word\t", asm_out_file);
16606 output_addr_const (asm_out_file, symbol);
16607 fputs ("(target1)\n", asm_out_file);
16610 /* Add a function to the list of static constructors. */
16613 arm_elf_asm_constructor (rtx symbol, int priority)
16615 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16618 /* Add a function to the list of static destructors. */
16621 arm_elf_asm_destructor (rtx symbol, int priority)
16623 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16626 /* A finite state machine takes care of noticing whether or not instructions
16627 can be conditionally executed, and thus decrease execution time and code
16628 size by deleting branch instructions. The fsm is controlled by
16629 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16631 /* The state of the fsm controlling condition codes are:
16632 0: normal, do nothing special
16633 1: make ASM_OUTPUT_OPCODE not output this instruction
16634 2: make ASM_OUTPUT_OPCODE not output this instruction
16635 3: make instructions conditional
16636 4: make instructions conditional
16638 State transitions (state->state by whom under condition):
16639 0 -> 1 final_prescan_insn if the `target' is a label
16640 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16641 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16642 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16643 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16644 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16645 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16646 (the target insn is arm_target_insn).
16648 If the jump clobbers the conditions then we use states 2 and 4.
16650 A similar thing can be done with conditional return insns.
16652 XXX In case the `target' is an unconditional branch, this conditionalising
16653 of the instructions always reduces code size, but not always execution
16654 time. But then, I want to reduce the code size to somewhere near what
16655 /bin/cc produces. */
16657 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16658 instructions. When a COND_EXEC instruction is seen the subsequent
16659 instructions are scanned so that multiple conditional instructions can be
16660 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16661 specify the length and true/false mask for the IT block. These will be
16662 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16664 /* Returns the index of the ARM condition code string in
16665 `arm_condition_codes'. COMPARISON should be an rtx like
16666 `(eq (...) (...))'. */
16667 static enum arm_cond_code
16668 get_arm_condition_code (rtx comparison)
16670 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16671 enum arm_cond_code code;
16672 enum rtx_code comp_code = GET_CODE (comparison);
16674 if (GET_MODE_CLASS (mode) != MODE_CC)
16675 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16676 XEXP (comparison, 1));
16680 case CC_DNEmode: code = ARM_NE; goto dominance;
16681 case CC_DEQmode: code = ARM_EQ; goto dominance;
16682 case CC_DGEmode: code = ARM_GE; goto dominance;
16683 case CC_DGTmode: code = ARM_GT; goto dominance;
16684 case CC_DLEmode: code = ARM_LE; goto dominance;
16685 case CC_DLTmode: code = ARM_LT; goto dominance;
16686 case CC_DGEUmode: code = ARM_CS; goto dominance;
16687 case CC_DGTUmode: code = ARM_HI; goto dominance;
16688 case CC_DLEUmode: code = ARM_LS; goto dominance;
16689 case CC_DLTUmode: code = ARM_CC;
16692 gcc_assert (comp_code == EQ || comp_code == NE);
16694 if (comp_code == EQ)
16695 return ARM_INVERSE_CONDITION_CODE (code);
16701 case NE: return ARM_NE;
16702 case EQ: return ARM_EQ;
16703 case GE: return ARM_PL;
16704 case LT: return ARM_MI;
16705 default: gcc_unreachable ();
16711 case NE: return ARM_NE;
16712 case EQ: return ARM_EQ;
16713 default: gcc_unreachable ();
16719 case NE: return ARM_MI;
16720 case EQ: return ARM_PL;
16721 default: gcc_unreachable ();
16726 /* These encodings assume that AC=1 in the FPA system control
16727 byte. This allows us to handle all cases except UNEQ and
16731 case GE: return ARM_GE;
16732 case GT: return ARM_GT;
16733 case LE: return ARM_LS;
16734 case LT: return ARM_MI;
16735 case NE: return ARM_NE;
16736 case EQ: return ARM_EQ;
16737 case ORDERED: return ARM_VC;
16738 case UNORDERED: return ARM_VS;
16739 case UNLT: return ARM_LT;
16740 case UNLE: return ARM_LE;
16741 case UNGT: return ARM_HI;
16742 case UNGE: return ARM_PL;
16743 /* UNEQ and LTGT do not have a representation. */
16744 case UNEQ: /* Fall through. */
16745 case LTGT: /* Fall through. */
16746 default: gcc_unreachable ();
16752 case NE: return ARM_NE;
16753 case EQ: return ARM_EQ;
16754 case GE: return ARM_LE;
16755 case GT: return ARM_LT;
16756 case LE: return ARM_GE;
16757 case LT: return ARM_GT;
16758 case GEU: return ARM_LS;
16759 case GTU: return ARM_CC;
16760 case LEU: return ARM_CS;
16761 case LTU: return ARM_HI;
16762 default: gcc_unreachable ();
16768 case LTU: return ARM_CS;
16769 case GEU: return ARM_CC;
16770 default: gcc_unreachable ();
16776 case NE: return ARM_NE;
16777 case EQ: return ARM_EQ;
16778 case GEU: return ARM_CS;
16779 case GTU: return ARM_HI;
16780 case LEU: return ARM_LS;
16781 case LTU: return ARM_CC;
16782 default: gcc_unreachable ();
16788 case GE: return ARM_GE;
16789 case LT: return ARM_LT;
16790 case GEU: return ARM_CS;
16791 case LTU: return ARM_CC;
16792 default: gcc_unreachable ();
16798 case NE: return ARM_NE;
16799 case EQ: return ARM_EQ;
16800 case GE: return ARM_GE;
16801 case GT: return ARM_GT;
16802 case LE: return ARM_LE;
16803 case LT: return ARM_LT;
16804 case GEU: return ARM_CS;
16805 case GTU: return ARM_HI;
16806 case LEU: return ARM_LS;
16807 case LTU: return ARM_CC;
16808 default: gcc_unreachable ();
16811 default: gcc_unreachable ();
16815 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16818 thumb2_final_prescan_insn (rtx insn)
16820 rtx first_insn = insn;
16821 rtx body = PATTERN (insn);
16823 enum arm_cond_code code;
16827 /* Remove the previous insn from the count of insns to be output. */
16828 if (arm_condexec_count)
16829 arm_condexec_count--;
16831 /* Nothing to do if we are already inside a conditional block. */
16832 if (arm_condexec_count)
16835 if (GET_CODE (body) != COND_EXEC)
16838 /* Conditional jumps are implemented directly. */
16839 if (GET_CODE (insn) == JUMP_INSN)
16842 predicate = COND_EXEC_TEST (body);
16843 arm_current_cc = get_arm_condition_code (predicate);
16845 n = get_attr_ce_count (insn);
16846 arm_condexec_count = 1;
16847 arm_condexec_mask = (1 << n) - 1;
16848 arm_condexec_masklen = n;
16849 /* See if subsequent instructions can be combined into the same block. */
16852 insn = next_nonnote_insn (insn);
16854 /* Jumping into the middle of an IT block is illegal, so a label or
16855 barrier terminates the block. */
16856 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16859 body = PATTERN (insn);
16860 /* USE and CLOBBER aren't really insns, so just skip them. */
16861 if (GET_CODE (body) == USE
16862 || GET_CODE (body) == CLOBBER)
16865 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16866 if (GET_CODE (body) != COND_EXEC)
16868 /* Allow up to 4 conditionally executed instructions in a block. */
16869 n = get_attr_ce_count (insn);
16870 if (arm_condexec_masklen + n > 4)
16873 predicate = COND_EXEC_TEST (body);
16874 code = get_arm_condition_code (predicate);
16875 mask = (1 << n) - 1;
16876 if (arm_current_cc == code)
16877 arm_condexec_mask |= (mask << arm_condexec_masklen);
16878 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16881 arm_condexec_count++;
16882 arm_condexec_masklen += n;
16884 /* A jump must be the last instruction in a conditional block. */
16885 if (GET_CODE(insn) == JUMP_INSN)
16888 /* Restore recog_data (getting the attributes of other insns can
16889 destroy this array, but final.c assumes that it remains intact
16890 across this call). */
16891 extract_constrain_insn_cached (first_insn);
16895 arm_final_prescan_insn (rtx insn)
16897 /* BODY will hold the body of INSN. */
16898 rtx body = PATTERN (insn);
16900 /* This will be 1 if trying to repeat the trick, and things need to be
16901 reversed if it appears to fail. */
16904 /* If we start with a return insn, we only succeed if we find another one. */
16905 int seeking_return = 0;
16907 /* START_INSN will hold the insn from where we start looking. This is the
16908 first insn after the following code_label if REVERSE is true. */
16909 rtx start_insn = insn;
16911 /* If in state 4, check if the target branch is reached, in order to
16912 change back to state 0. */
16913 if (arm_ccfsm_state == 4)
16915 if (insn == arm_target_insn)
16917 arm_target_insn = NULL;
16918 arm_ccfsm_state = 0;
16923 /* If in state 3, it is possible to repeat the trick, if this insn is an
16924 unconditional branch to a label, and immediately following this branch
16925 is the previous target label which is only used once, and the label this
16926 branch jumps to is not too far off. */
16927 if (arm_ccfsm_state == 3)
16929 if (simplejump_p (insn))
16931 start_insn = next_nonnote_insn (start_insn);
16932 if (GET_CODE (start_insn) == BARRIER)
16934 /* XXX Isn't this always a barrier? */
16935 start_insn = next_nonnote_insn (start_insn);
16937 if (GET_CODE (start_insn) == CODE_LABEL
16938 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16939 && LABEL_NUSES (start_insn) == 1)
16944 else if (GET_CODE (body) == RETURN)
16946 start_insn = next_nonnote_insn (start_insn);
16947 if (GET_CODE (start_insn) == BARRIER)
16948 start_insn = next_nonnote_insn (start_insn);
16949 if (GET_CODE (start_insn) == CODE_LABEL
16950 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16951 && LABEL_NUSES (start_insn) == 1)
16954 seeking_return = 1;
16963 gcc_assert (!arm_ccfsm_state || reverse);
16964 if (GET_CODE (insn) != JUMP_INSN)
16967 /* This jump might be paralleled with a clobber of the condition codes
16968 the jump should always come first */
16969 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16970 body = XVECEXP (body, 0, 0);
16973 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16974 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16977 int fail = FALSE, succeed = FALSE;
16978 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16979 int then_not_else = TRUE;
16980 rtx this_insn = start_insn, label = 0;
16982 /* Register the insn jumped to. */
16985 if (!seeking_return)
16986 label = XEXP (SET_SRC (body), 0);
16988 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16989 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16990 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16992 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16993 then_not_else = FALSE;
16995 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16996 seeking_return = 1;
16997 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16999 seeking_return = 1;
17000 then_not_else = FALSE;
17003 gcc_unreachable ();
17005 /* See how many insns this branch skips, and what kind of insns. If all
17006 insns are okay, and the label or unconditional branch to the same
17007 label is not too far away, succeed. */
17008 for (insns_skipped = 0;
17009 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17013 this_insn = next_nonnote_insn (this_insn);
17017 switch (GET_CODE (this_insn))
17020 /* Succeed if it is the target label, otherwise fail since
17021 control falls in from somewhere else. */
17022 if (this_insn == label)
17024 arm_ccfsm_state = 1;
17032 /* Succeed if the following insn is the target label.
17034 If return insns are used then the last insn in a function
17035 will be a barrier. */
17036 this_insn = next_nonnote_insn (this_insn);
17037 if (this_insn && this_insn == label)
17039 arm_ccfsm_state = 1;
17047 /* The AAPCS says that conditional calls should not be
17048 used since they make interworking inefficient (the
17049 linker can't transform BL<cond> into BLX). That's
17050 only a problem if the machine has BLX. */
17057 /* Succeed if the following insn is the target label, or
17058 if the following two insns are a barrier and the
17060 this_insn = next_nonnote_insn (this_insn);
17061 if (this_insn && GET_CODE (this_insn) == BARRIER)
17062 this_insn = next_nonnote_insn (this_insn);
17064 if (this_insn && this_insn == label
17065 && insns_skipped < max_insns_skipped)
17067 arm_ccfsm_state = 1;
17075 /* If this is an unconditional branch to the same label, succeed.
17076 If it is to another label, do nothing. If it is conditional,
17078 /* XXX Probably, the tests for SET and the PC are
17081 scanbody = PATTERN (this_insn);
17082 if (GET_CODE (scanbody) == SET
17083 && GET_CODE (SET_DEST (scanbody)) == PC)
17085 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17086 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17088 arm_ccfsm_state = 2;
17091 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17094 /* Fail if a conditional return is undesirable (e.g. on a
17095 StrongARM), but still allow this if optimizing for size. */
17096 else if (GET_CODE (scanbody) == RETURN
17097 && !use_return_insn (TRUE, NULL)
17100 else if (GET_CODE (scanbody) == RETURN
17103 arm_ccfsm_state = 2;
17106 else if (GET_CODE (scanbody) == PARALLEL)
17108 switch (get_attr_conds (this_insn))
17118 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17123 /* Instructions using or affecting the condition codes make it
17125 scanbody = PATTERN (this_insn);
17126 if (!(GET_CODE (scanbody) == SET
17127 || GET_CODE (scanbody) == PARALLEL)
17128 || get_attr_conds (this_insn) != CONDS_NOCOND)
17131 /* A conditional cirrus instruction must be followed by
17132 a non Cirrus instruction. However, since we
17133 conditionalize instructions in this function and by
17134 the time we get here we can't add instructions
17135 (nops), because shorten_branches() has already been
17136 called, we will disable conditionalizing Cirrus
17137 instructions to be safe. */
17138 if (GET_CODE (scanbody) != USE
17139 && GET_CODE (scanbody) != CLOBBER
17140 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17150 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17151 arm_target_label = CODE_LABEL_NUMBER (label);
17154 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17156 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17158 this_insn = next_nonnote_insn (this_insn);
17159 gcc_assert (!this_insn
17160 || (GET_CODE (this_insn) != BARRIER
17161 && GET_CODE (this_insn) != CODE_LABEL));
17165 /* Oh, dear! we ran off the end.. give up. */
17166 extract_constrain_insn_cached (insn);
17167 arm_ccfsm_state = 0;
17168 arm_target_insn = NULL;
17171 arm_target_insn = this_insn;
17174 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17177 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17179 if (reverse || then_not_else)
17180 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17183 /* Restore recog_data (getting the attributes of other insns can
17184 destroy this array, but final.c assumes that it remains intact
17185 across this call. */
17186 extract_constrain_insn_cached (insn);
17190 /* Output IT instructions. */
17192 thumb2_asm_output_opcode (FILE * stream)
17197 if (arm_condexec_mask)
17199 for (n = 0; n < arm_condexec_masklen; n++)
17200 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17202 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17203 arm_condition_codes[arm_current_cc]);
17204 arm_condexec_mask = 0;
17208 /* Returns true if REGNO is a valid register
17209 for holding a quantity of type MODE. */
17211 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17213 if (GET_MODE_CLASS (mode) == MODE_CC)
17214 return (regno == CC_REGNUM
17215 || (TARGET_HARD_FLOAT && TARGET_VFP
17216 && regno == VFPCC_REGNUM));
17219 /* For the Thumb we only allow values bigger than SImode in
17220 registers 0 - 6, so that there is always a second low
17221 register available to hold the upper part of the value.
17222 We probably we ought to ensure that the register is the
17223 start of an even numbered register pair. */
17224 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17226 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17227 && IS_CIRRUS_REGNUM (regno))
17228 /* We have outlawed SI values in Cirrus registers because they
17229 reside in the lower 32 bits, but SF values reside in the
17230 upper 32 bits. This causes gcc all sorts of grief. We can't
17231 even split the registers into pairs because Cirrus SI values
17232 get sign extended to 64bits-- aldyh. */
17233 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17235 if (TARGET_HARD_FLOAT && TARGET_VFP
17236 && IS_VFP_REGNUM (regno))
17238 if (mode == SFmode || mode == SImode)
17239 return VFP_REGNO_OK_FOR_SINGLE (regno);
17241 if (mode == DFmode)
17242 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17244 /* VFP registers can hold HFmode values, but there is no point in
17245 putting them there unless we have hardware conversion insns. */
17246 if (mode == HFmode)
17247 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17250 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17251 || (VALID_NEON_QREG_MODE (mode)
17252 && NEON_REGNO_OK_FOR_QUAD (regno))
17253 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17254 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17255 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17256 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17257 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17262 if (TARGET_REALLY_IWMMXT)
17264 if (IS_IWMMXT_GR_REGNUM (regno))
17265 return mode == SImode;
17267 if (IS_IWMMXT_REGNUM (regno))
17268 return VALID_IWMMXT_REG_MODE (mode);
17271 /* We allow almost any value to be stored in the general registers.
17272 Restrict doubleword quantities to even register pairs so that we can
17273 use ldrd. Do not allow very large Neon structure opaque modes in
17274 general registers; they would use too many. */
17275 if (regno <= LAST_ARM_REGNUM)
17276 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17277 && ARM_NUM_REGS (mode) <= 4;
17279 if (regno == FRAME_POINTER_REGNUM
17280 || regno == ARG_POINTER_REGNUM)
17281 /* We only allow integers in the fake hard registers. */
17282 return GET_MODE_CLASS (mode) == MODE_INT;
17284 /* The only registers left are the FPA registers
17285 which we only allow to hold FP values. */
17286 return (TARGET_HARD_FLOAT && TARGET_FPA
17287 && GET_MODE_CLASS (mode) == MODE_FLOAT
17288 && regno >= FIRST_FPA_REGNUM
17289 && regno <= LAST_FPA_REGNUM);
17292 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17293 not used in arm mode. */
17296 arm_regno_class (int regno)
17300 if (regno == STACK_POINTER_REGNUM)
17302 if (regno == CC_REGNUM)
17309 if (TARGET_THUMB2 && regno < 8)
17312 if ( regno <= LAST_ARM_REGNUM
17313 || regno == FRAME_POINTER_REGNUM
17314 || regno == ARG_POINTER_REGNUM)
17315 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17317 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17318 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17320 if (IS_CIRRUS_REGNUM (regno))
17321 return CIRRUS_REGS;
17323 if (IS_VFP_REGNUM (regno))
17325 if (regno <= D7_VFP_REGNUM)
17326 return VFP_D0_D7_REGS;
17327 else if (regno <= LAST_LO_VFP_REGNUM)
17328 return VFP_LO_REGS;
17330 return VFP_HI_REGS;
17333 if (IS_IWMMXT_REGNUM (regno))
17334 return IWMMXT_REGS;
17336 if (IS_IWMMXT_GR_REGNUM (regno))
17337 return IWMMXT_GR_REGS;
17342 /* Handle a special case when computing the offset
17343 of an argument from the frame pointer. */
17345 arm_debugger_arg_offset (int value, rtx addr)
17349 /* We are only interested if dbxout_parms() failed to compute the offset. */
17353 /* We can only cope with the case where the address is held in a register. */
17354 if (GET_CODE (addr) != REG)
17357 /* If we are using the frame pointer to point at the argument, then
17358 an offset of 0 is correct. */
17359 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17362 /* If we are using the stack pointer to point at the
17363 argument, then an offset of 0 is correct. */
17364 /* ??? Check this is consistent with thumb2 frame layout. */
17365 if ((TARGET_THUMB || !frame_pointer_needed)
17366 && REGNO (addr) == SP_REGNUM)
17369 /* Oh dear. The argument is pointed to by a register rather
17370 than being held in a register, or being stored at a known
17371 offset from the frame pointer. Since GDB only understands
17372 those two kinds of argument we must translate the address
17373 held in the register into an offset from the frame pointer.
17374 We do this by searching through the insns for the function
17375 looking to see where this register gets its value. If the
17376 register is initialized from the frame pointer plus an offset
17377 then we are in luck and we can continue, otherwise we give up.
17379 This code is exercised by producing debugging information
17380 for a function with arguments like this:
17382 double func (double a, double b, int c, double d) {return d;}
17384 Without this code the stab for parameter 'd' will be set to
17385 an offset of 0 from the frame pointer, rather than 8. */
17387 /* The if() statement says:
17389 If the insn is a normal instruction
17390 and if the insn is setting the value in a register
17391 and if the register being set is the register holding the address of the argument
17392 and if the address is computing by an addition
17393 that involves adding to a register
17394 which is the frame pointer
17399 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17401 if ( GET_CODE (insn) == INSN
17402 && GET_CODE (PATTERN (insn)) == SET
17403 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17404 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17405 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17406 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17407 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17410 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17419 warning (0, "unable to compute real location of stacked parameter");
17420 value = 8; /* XXX magic hack */
17426 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17429 if ((MASK) & insn_flags) \
17430 add_builtin_function ((NAME), (TYPE), (CODE), \
17431 BUILT_IN_MD, NULL, NULL_TREE); \
17435 struct builtin_description
17437 const unsigned int mask;
17438 const enum insn_code icode;
17439 const char * const name;
17440 const enum arm_builtins code;
17441 const enum rtx_code comparison;
17442 const unsigned int flag;
17445 static const struct builtin_description bdesc_2arg[] =
17447 #define IWMMXT_BUILTIN(code, string, builtin) \
17448 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17449 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17451 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17452 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17453 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17454 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17455 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17456 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17457 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17458 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17459 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17460 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17461 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17462 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17463 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17464 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17465 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17466 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17467 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17468 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17469 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17470 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17471 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17472 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17473 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17474 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17475 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17476 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17477 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17478 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17479 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17480 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17481 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17482 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17483 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17484 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17485 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17486 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17487 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17488 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17489 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17490 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17491 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17492 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17493 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17494 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17495 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17496 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17497 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17498 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17499 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17500 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17501 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17502 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17503 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17504 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17505 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17506 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17507 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17508 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17510 #define IWMMXT_BUILTIN2(code, builtin) \
17511 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17513 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17514 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17515 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17516 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17517 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17518 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17519 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17520 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17521 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17522 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17523 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17524 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17525 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17526 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17527 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17528 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17529 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17530 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17531 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17532 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17533 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17534 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17535 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17536 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17537 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17538 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17539 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17540 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17541 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17542 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17543 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17544 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17547 static const struct builtin_description bdesc_1arg[] =
17549 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17550 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17551 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17552 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17553 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17554 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17555 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17556 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17557 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17558 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17559 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17560 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17561 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17562 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17563 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17564 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17565 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17566 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17569 /* Set up all the iWMMXt builtins. This is
17570 not called if TARGET_IWMMXT is zero. */
17573 arm_init_iwmmxt_builtins (void)
17575 const struct builtin_description * d;
17577 tree endlink = void_list_node;
17579 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17580 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17581 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17584 = build_function_type (integer_type_node,
17585 tree_cons (NULL_TREE, integer_type_node, endlink));
17586 tree v8qi_ftype_v8qi_v8qi_int
17587 = build_function_type (V8QI_type_node,
17588 tree_cons (NULL_TREE, V8QI_type_node,
17589 tree_cons (NULL_TREE, V8QI_type_node,
17590 tree_cons (NULL_TREE,
17593 tree v4hi_ftype_v4hi_int
17594 = build_function_type (V4HI_type_node,
17595 tree_cons (NULL_TREE, V4HI_type_node,
17596 tree_cons (NULL_TREE, integer_type_node,
17598 tree v2si_ftype_v2si_int
17599 = build_function_type (V2SI_type_node,
17600 tree_cons (NULL_TREE, V2SI_type_node,
17601 tree_cons (NULL_TREE, integer_type_node,
17603 tree v2si_ftype_di_di
17604 = build_function_type (V2SI_type_node,
17605 tree_cons (NULL_TREE, long_long_integer_type_node,
17606 tree_cons (NULL_TREE, long_long_integer_type_node,
17608 tree di_ftype_di_int
17609 = build_function_type (long_long_integer_type_node,
17610 tree_cons (NULL_TREE, long_long_integer_type_node,
17611 tree_cons (NULL_TREE, integer_type_node,
17613 tree di_ftype_di_int_int
17614 = build_function_type (long_long_integer_type_node,
17615 tree_cons (NULL_TREE, long_long_integer_type_node,
17616 tree_cons (NULL_TREE, integer_type_node,
17617 tree_cons (NULL_TREE,
17620 tree int_ftype_v8qi
17621 = build_function_type (integer_type_node,
17622 tree_cons (NULL_TREE, V8QI_type_node,
17624 tree int_ftype_v4hi
17625 = build_function_type (integer_type_node,
17626 tree_cons (NULL_TREE, V4HI_type_node,
17628 tree int_ftype_v2si
17629 = build_function_type (integer_type_node,
17630 tree_cons (NULL_TREE, V2SI_type_node,
17632 tree int_ftype_v8qi_int
17633 = build_function_type (integer_type_node,
17634 tree_cons (NULL_TREE, V8QI_type_node,
17635 tree_cons (NULL_TREE, integer_type_node,
17637 tree int_ftype_v4hi_int
17638 = build_function_type (integer_type_node,
17639 tree_cons (NULL_TREE, V4HI_type_node,
17640 tree_cons (NULL_TREE, integer_type_node,
17642 tree int_ftype_v2si_int
17643 = build_function_type (integer_type_node,
17644 tree_cons (NULL_TREE, V2SI_type_node,
17645 tree_cons (NULL_TREE, integer_type_node,
17647 tree v8qi_ftype_v8qi_int_int
17648 = build_function_type (V8QI_type_node,
17649 tree_cons (NULL_TREE, V8QI_type_node,
17650 tree_cons (NULL_TREE, integer_type_node,
17651 tree_cons (NULL_TREE,
17654 tree v4hi_ftype_v4hi_int_int
17655 = build_function_type (V4HI_type_node,
17656 tree_cons (NULL_TREE, V4HI_type_node,
17657 tree_cons (NULL_TREE, integer_type_node,
17658 tree_cons (NULL_TREE,
17661 tree v2si_ftype_v2si_int_int
17662 = build_function_type (V2SI_type_node,
17663 tree_cons (NULL_TREE, V2SI_type_node,
17664 tree_cons (NULL_TREE, integer_type_node,
17665 tree_cons (NULL_TREE,
17668 /* Miscellaneous. */
17669 tree v8qi_ftype_v4hi_v4hi
17670 = build_function_type (V8QI_type_node,
17671 tree_cons (NULL_TREE, V4HI_type_node,
17672 tree_cons (NULL_TREE, V4HI_type_node,
17674 tree v4hi_ftype_v2si_v2si
17675 = build_function_type (V4HI_type_node,
17676 tree_cons (NULL_TREE, V2SI_type_node,
17677 tree_cons (NULL_TREE, V2SI_type_node,
17679 tree v2si_ftype_v4hi_v4hi
17680 = build_function_type (V2SI_type_node,
17681 tree_cons (NULL_TREE, V4HI_type_node,
17682 tree_cons (NULL_TREE, V4HI_type_node,
17684 tree v2si_ftype_v8qi_v8qi
17685 = build_function_type (V2SI_type_node,
17686 tree_cons (NULL_TREE, V8QI_type_node,
17687 tree_cons (NULL_TREE, V8QI_type_node,
17689 tree v4hi_ftype_v4hi_di
17690 = build_function_type (V4HI_type_node,
17691 tree_cons (NULL_TREE, V4HI_type_node,
17692 tree_cons (NULL_TREE,
17693 long_long_integer_type_node,
17695 tree v2si_ftype_v2si_di
17696 = build_function_type (V2SI_type_node,
17697 tree_cons (NULL_TREE, V2SI_type_node,
17698 tree_cons (NULL_TREE,
17699 long_long_integer_type_node,
17701 tree void_ftype_int_int
17702 = build_function_type (void_type_node,
17703 tree_cons (NULL_TREE, integer_type_node,
17704 tree_cons (NULL_TREE, integer_type_node,
17707 = build_function_type (long_long_unsigned_type_node, endlink);
17709 = build_function_type (long_long_integer_type_node,
17710 tree_cons (NULL_TREE, V8QI_type_node,
17713 = build_function_type (long_long_integer_type_node,
17714 tree_cons (NULL_TREE, V4HI_type_node,
17717 = build_function_type (long_long_integer_type_node,
17718 tree_cons (NULL_TREE, V2SI_type_node,
17720 tree v2si_ftype_v4hi
17721 = build_function_type (V2SI_type_node,
17722 tree_cons (NULL_TREE, V4HI_type_node,
17724 tree v4hi_ftype_v8qi
17725 = build_function_type (V4HI_type_node,
17726 tree_cons (NULL_TREE, V8QI_type_node,
17729 tree di_ftype_di_v4hi_v4hi
17730 = build_function_type (long_long_unsigned_type_node,
17731 tree_cons (NULL_TREE,
17732 long_long_unsigned_type_node,
17733 tree_cons (NULL_TREE, V4HI_type_node,
17734 tree_cons (NULL_TREE,
17738 tree di_ftype_v4hi_v4hi
17739 = build_function_type (long_long_unsigned_type_node,
17740 tree_cons (NULL_TREE, V4HI_type_node,
17741 tree_cons (NULL_TREE, V4HI_type_node,
17744 /* Normal vector binops. */
17745 tree v8qi_ftype_v8qi_v8qi
17746 = build_function_type (V8QI_type_node,
17747 tree_cons (NULL_TREE, V8QI_type_node,
17748 tree_cons (NULL_TREE, V8QI_type_node,
17750 tree v4hi_ftype_v4hi_v4hi
17751 = build_function_type (V4HI_type_node,
17752 tree_cons (NULL_TREE, V4HI_type_node,
17753 tree_cons (NULL_TREE, V4HI_type_node,
17755 tree v2si_ftype_v2si_v2si
17756 = build_function_type (V2SI_type_node,
17757 tree_cons (NULL_TREE, V2SI_type_node,
17758 tree_cons (NULL_TREE, V2SI_type_node,
17760 tree di_ftype_di_di
17761 = build_function_type (long_long_unsigned_type_node,
17762 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17763 tree_cons (NULL_TREE,
17764 long_long_unsigned_type_node,
17767 /* Add all builtins that are more or less simple operations on two
17769 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17771 /* Use one of the operands; the target can have a different mode for
17772 mask-generating compares. */
17773 enum machine_mode mode;
17779 mode = insn_data[d->icode].operand[1].mode;
17784 type = v8qi_ftype_v8qi_v8qi;
17787 type = v4hi_ftype_v4hi_v4hi;
17790 type = v2si_ftype_v2si_v2si;
17793 type = di_ftype_di_di;
17797 gcc_unreachable ();
17800 def_mbuiltin (d->mask, d->name, type, d->code);
17803 /* Add the remaining MMX insns with somewhat more complicated types. */
17804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17805 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17808 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17810 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17817 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17818 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17819 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17820 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17824 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17825 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17826 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17832 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17834 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17836 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17838 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17839 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17840 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17843 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17844 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17846 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17848 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17849 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17851 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17853 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17854 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17855 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17857 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17858 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17859 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17861 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17862 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17863 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17864 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17865 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17866 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17868 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17869 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17870 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17871 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17872 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17874 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17875 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17876 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17877 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17878 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17879 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17881 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17882 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17883 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17884 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17886 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17887 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17888 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17889 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17890 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17891 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17892 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17896 arm_init_tls_builtins (void)
17900 ftype = build_function_type (ptr_type_node, void_list_node);
17901 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17902 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17904 TREE_NOTHROW (decl) = 1;
17905 TREE_READONLY (decl) = 1;
17908 enum neon_builtin_type_bits {
17924 #define v8qi_UP T_V8QI
17925 #define v4hi_UP T_V4HI
17926 #define v2si_UP T_V2SI
17927 #define v2sf_UP T_V2SF
17929 #define v16qi_UP T_V16QI
17930 #define v8hi_UP T_V8HI
17931 #define v4si_UP T_V4SI
17932 #define v4sf_UP T_V4SF
17933 #define v2di_UP T_V2DI
17938 #define UP(X) X##_UP
17973 NEON_LOADSTRUCTLANE,
17975 NEON_STORESTRUCTLANE,
17984 const neon_itype itype;
17986 const enum insn_code codes[T_MAX];
17987 const unsigned int num_vars;
17988 unsigned int base_fcode;
17989 } neon_builtin_datum;
17991 #define CF(N,X) CODE_FOR_neon_##N##X
17993 #define VAR1(T, N, A) \
17994 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17995 #define VAR2(T, N, A, B) \
17996 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17997 #define VAR3(T, N, A, B, C) \
17998 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17999 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18000 #define VAR4(T, N, A, B, C, D) \
18001 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18002 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18003 #define VAR5(T, N, A, B, C, D, E) \
18004 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18005 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18006 #define VAR6(T, N, A, B, C, D, E, F) \
18007 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18008 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18009 #define VAR7(T, N, A, B, C, D, E, F, G) \
18010 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18011 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18013 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18014 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18016 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18017 CF (N, G), CF (N, H) }, 8, 0
18018 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18019 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18020 | UP (H) | UP (I), \
18021 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18022 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18023 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18024 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18025 | UP (H) | UP (I) | UP (J), \
18026 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18027 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18029 /* The mode entries in the following table correspond to the "key" type of the
18030 instruction variant, i.e. equivalent to that which would be specified after
18031 the assembler mnemonic, which usually refers to the last vector operand.
18032 (Signed/unsigned/polynomial types are not differentiated between though, and
18033 are all mapped onto the same mode for a given element size.) The modes
18034 listed per instruction should be the same as those defined for that
18035 instruction's pattern in neon.md.
18036 WARNING: Variants should be listed in the same increasing order as
18037 neon_builtin_type_bits. */
18039 static neon_builtin_datum neon_builtin_data[] =
18041 { VAR10 (BINOP, vadd,
18042 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18043 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18044 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18045 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18046 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18047 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18048 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18049 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18050 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18051 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18052 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18053 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18054 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18055 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18056 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18057 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18058 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18059 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18060 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18061 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18062 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18063 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18064 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18065 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18066 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18067 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18068 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18069 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18070 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18071 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18072 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18073 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18074 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18075 { VAR10 (BINOP, vsub,
18076 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18077 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18078 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18079 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18080 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18081 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18082 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18083 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18084 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18085 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18086 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18087 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18088 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18089 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18090 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18091 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18092 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18093 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18094 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18095 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18096 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18097 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18098 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18099 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18100 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18101 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18102 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18103 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18104 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18105 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18106 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18107 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18108 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18109 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18110 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18111 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18112 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18113 /* FIXME: vget_lane supports more variants than this! */
18114 { VAR10 (GETLANE, vget_lane,
18115 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18116 { VAR10 (SETLANE, vset_lane,
18117 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18118 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18119 { VAR10 (DUP, vdup_n,
18120 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18121 { VAR10 (DUPLANE, vdup_lane,
18122 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18123 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18124 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18125 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18126 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18127 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18128 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18129 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18130 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18131 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18132 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18133 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18134 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18135 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18136 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18137 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18138 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18139 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18140 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18141 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18142 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18143 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18144 { VAR10 (BINOP, vext,
18145 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18146 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18147 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18148 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18149 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18150 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18151 { VAR10 (SELECT, vbsl,
18152 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18153 { VAR1 (VTBL, vtbl1, v8qi) },
18154 { VAR1 (VTBL, vtbl2, v8qi) },
18155 { VAR1 (VTBL, vtbl3, v8qi) },
18156 { VAR1 (VTBL, vtbl4, v8qi) },
18157 { VAR1 (VTBX, vtbx1, v8qi) },
18158 { VAR1 (VTBX, vtbx2, v8qi) },
18159 { VAR1 (VTBX, vtbx3, v8qi) },
18160 { VAR1 (VTBX, vtbx4, v8qi) },
18161 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18162 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18163 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18164 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18165 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18166 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18167 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18168 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18169 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18170 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18171 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18172 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18173 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18174 { VAR10 (LOAD1, vld1,
18175 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18176 { VAR10 (LOAD1LANE, vld1_lane,
18177 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18178 { VAR10 (LOAD1, vld1_dup,
18179 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18180 { VAR10 (STORE1, vst1,
18181 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18182 { VAR10 (STORE1LANE, vst1_lane,
18183 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18184 { VAR9 (LOADSTRUCT,
18185 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18186 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18187 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18188 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18189 { VAR9 (STORESTRUCT, vst2,
18190 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18191 { VAR7 (STORESTRUCTLANE, vst2_lane,
18192 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18193 { VAR9 (LOADSTRUCT,
18194 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18195 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18196 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18197 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18198 { VAR9 (STORESTRUCT, vst3,
18199 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18200 { VAR7 (STORESTRUCTLANE, vst3_lane,
18201 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18202 { VAR9 (LOADSTRUCT, vld4,
18203 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18204 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18205 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18206 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18207 { VAR9 (STORESTRUCT, vst4,
18208 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18209 { VAR7 (STORESTRUCTLANE, vst4_lane,
18210 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18211 { VAR10 (LOGICBINOP, vand,
18212 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18213 { VAR10 (LOGICBINOP, vorr,
18214 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18215 { VAR10 (BINOP, veor,
18216 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18217 { VAR10 (LOGICBINOP, vbic,
18218 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18219 { VAR10 (LOGICBINOP, vorn,
18220 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18236 arm_init_neon_builtins (void)
18238 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18240 tree neon_intQI_type_node;
18241 tree neon_intHI_type_node;
18242 tree neon_polyQI_type_node;
18243 tree neon_polyHI_type_node;
18244 tree neon_intSI_type_node;
18245 tree neon_intDI_type_node;
18246 tree neon_float_type_node;
18248 tree intQI_pointer_node;
18249 tree intHI_pointer_node;
18250 tree intSI_pointer_node;
18251 tree intDI_pointer_node;
18252 tree float_pointer_node;
18254 tree const_intQI_node;
18255 tree const_intHI_node;
18256 tree const_intSI_node;
18257 tree const_intDI_node;
18258 tree const_float_node;
18260 tree const_intQI_pointer_node;
18261 tree const_intHI_pointer_node;
18262 tree const_intSI_pointer_node;
18263 tree const_intDI_pointer_node;
18264 tree const_float_pointer_node;
18266 tree V8QI_type_node;
18267 tree V4HI_type_node;
18268 tree V2SI_type_node;
18269 tree V2SF_type_node;
18270 tree V16QI_type_node;
18271 tree V8HI_type_node;
18272 tree V4SI_type_node;
18273 tree V4SF_type_node;
18274 tree V2DI_type_node;
18276 tree intUQI_type_node;
18277 tree intUHI_type_node;
18278 tree intUSI_type_node;
18279 tree intUDI_type_node;
18281 tree intEI_type_node;
18282 tree intOI_type_node;
18283 tree intCI_type_node;
18284 tree intXI_type_node;
18286 tree V8QI_pointer_node;
18287 tree V4HI_pointer_node;
18288 tree V2SI_pointer_node;
18289 tree V2SF_pointer_node;
18290 tree V16QI_pointer_node;
18291 tree V8HI_pointer_node;
18292 tree V4SI_pointer_node;
18293 tree V4SF_pointer_node;
18294 tree V2DI_pointer_node;
18296 tree void_ftype_pv8qi_v8qi_v8qi;
18297 tree void_ftype_pv4hi_v4hi_v4hi;
18298 tree void_ftype_pv2si_v2si_v2si;
18299 tree void_ftype_pv2sf_v2sf_v2sf;
18300 tree void_ftype_pdi_di_di;
18301 tree void_ftype_pv16qi_v16qi_v16qi;
18302 tree void_ftype_pv8hi_v8hi_v8hi;
18303 tree void_ftype_pv4si_v4si_v4si;
18304 tree void_ftype_pv4sf_v4sf_v4sf;
18305 tree void_ftype_pv2di_v2di_v2di;
18307 tree reinterp_ftype_dreg[5][5];
18308 tree reinterp_ftype_qreg[5][5];
18309 tree dreg_types[5], qreg_types[5];
18311 /* Create distinguished type nodes for NEON vector element types,
18312 and pointers to values of such types, so we can detect them later. */
18313 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18314 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18315 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18316 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18317 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18318 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18319 neon_float_type_node = make_node (REAL_TYPE);
18320 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18321 layout_type (neon_float_type_node);
18323 /* Define typedefs which exactly correspond to the modes we are basing vector
18324 types on. If you change these names you'll need to change
18325 the table used by arm_mangle_type too. */
18326 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18327 "__builtin_neon_qi");
18328 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18329 "__builtin_neon_hi");
18330 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18331 "__builtin_neon_si");
18332 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18333 "__builtin_neon_sf");
18334 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18335 "__builtin_neon_di");
18336 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18337 "__builtin_neon_poly8");
18338 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18339 "__builtin_neon_poly16");
18341 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18342 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18343 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18344 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18345 float_pointer_node = build_pointer_type (neon_float_type_node);
18347 /* Next create constant-qualified versions of the above types. */
18348 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18350 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18352 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18354 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18356 const_float_node = build_qualified_type (neon_float_type_node,
18359 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18360 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18361 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18362 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18363 const_float_pointer_node = build_pointer_type (const_float_node);
18365 /* Now create vector types based on our NEON element types. */
18366 /* 64-bit vectors. */
18368 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18370 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18372 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18374 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18375 /* 128-bit vectors. */
18377 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18379 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18381 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18383 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18385 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18387 /* Unsigned integer types for various mode sizes. */
18388 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18389 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18390 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18391 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18393 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18394 "__builtin_neon_uqi");
18395 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18396 "__builtin_neon_uhi");
18397 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18398 "__builtin_neon_usi");
18399 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18400 "__builtin_neon_udi");
18402 /* Opaque integer types for structures of vectors. */
18403 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18404 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18405 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18406 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18408 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18409 "__builtin_neon_ti");
18410 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18411 "__builtin_neon_ei");
18412 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18413 "__builtin_neon_oi");
18414 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18415 "__builtin_neon_ci");
18416 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18417 "__builtin_neon_xi");
18419 /* Pointers to vector types. */
18420 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18421 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18422 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18423 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18424 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18425 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18426 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18427 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18428 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18430 /* Operations which return results as pairs. */
18431 void_ftype_pv8qi_v8qi_v8qi =
18432 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18433 V8QI_type_node, NULL);
18434 void_ftype_pv4hi_v4hi_v4hi =
18435 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18436 V4HI_type_node, NULL);
18437 void_ftype_pv2si_v2si_v2si =
18438 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18439 V2SI_type_node, NULL);
18440 void_ftype_pv2sf_v2sf_v2sf =
18441 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18442 V2SF_type_node, NULL);
18443 void_ftype_pdi_di_di =
18444 build_function_type_list (void_type_node, intDI_pointer_node,
18445 neon_intDI_type_node, neon_intDI_type_node, NULL);
18446 void_ftype_pv16qi_v16qi_v16qi =
18447 build_function_type_list (void_type_node, V16QI_pointer_node,
18448 V16QI_type_node, V16QI_type_node, NULL);
18449 void_ftype_pv8hi_v8hi_v8hi =
18450 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18451 V8HI_type_node, NULL);
18452 void_ftype_pv4si_v4si_v4si =
18453 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18454 V4SI_type_node, NULL);
18455 void_ftype_pv4sf_v4sf_v4sf =
18456 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18457 V4SF_type_node, NULL);
18458 void_ftype_pv2di_v2di_v2di =
18459 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18460 V2DI_type_node, NULL);
18462 dreg_types[0] = V8QI_type_node;
18463 dreg_types[1] = V4HI_type_node;
18464 dreg_types[2] = V2SI_type_node;
18465 dreg_types[3] = V2SF_type_node;
18466 dreg_types[4] = neon_intDI_type_node;
18468 qreg_types[0] = V16QI_type_node;
18469 qreg_types[1] = V8HI_type_node;
18470 qreg_types[2] = V4SI_type_node;
18471 qreg_types[3] = V4SF_type_node;
18472 qreg_types[4] = V2DI_type_node;
18474 for (i = 0; i < 5; i++)
18477 for (j = 0; j < 5; j++)
18479 reinterp_ftype_dreg[i][j]
18480 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18481 reinterp_ftype_qreg[i][j]
18482 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18486 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18488 neon_builtin_datum *d = &neon_builtin_data[i];
18489 unsigned int j, codeidx = 0;
18491 d->base_fcode = fcode;
18493 for (j = 0; j < T_MAX; j++)
18495 const char* const modenames[] = {
18496 "v8qi", "v4hi", "v2si", "v2sf", "di",
18497 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18501 enum insn_code icode;
18502 int is_load = 0, is_store = 0;
18504 if ((d->bits & (1 << j)) == 0)
18507 icode = d->codes[codeidx++];
18512 case NEON_LOAD1LANE:
18513 case NEON_LOADSTRUCT:
18514 case NEON_LOADSTRUCTLANE:
18516 /* Fall through. */
18518 case NEON_STORE1LANE:
18519 case NEON_STORESTRUCT:
18520 case NEON_STORESTRUCTLANE:
18523 /* Fall through. */
18526 case NEON_LOGICBINOP:
18527 case NEON_SHIFTINSERT:
18534 case NEON_SHIFTIMM:
18535 case NEON_SHIFTACC:
18541 case NEON_LANEMULL:
18542 case NEON_LANEMULH:
18544 case NEON_SCALARMUL:
18545 case NEON_SCALARMULL:
18546 case NEON_SCALARMULH:
18547 case NEON_SCALARMAC:
18553 tree return_type = void_type_node, args = void_list_node;
18555 /* Build a function type directly from the insn_data for this
18556 builtin. The build_function_type() function takes care of
18557 removing duplicates for us. */
18558 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18562 if (is_load && k == 1)
18564 /* Neon load patterns always have the memory operand
18565 (a SImode pointer) in the operand 1 position. We
18566 want a const pointer to the element type in that
18568 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18574 eltype = const_intQI_pointer_node;
18579 eltype = const_intHI_pointer_node;
18584 eltype = const_intSI_pointer_node;
18589 eltype = const_float_pointer_node;
18594 eltype = const_intDI_pointer_node;
18597 default: gcc_unreachable ();
18600 else if (is_store && k == 0)
18602 /* Similarly, Neon store patterns use operand 0 as
18603 the memory location to store to (a SImode pointer).
18604 Use a pointer to the element type of the store in
18606 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18612 eltype = intQI_pointer_node;
18617 eltype = intHI_pointer_node;
18622 eltype = intSI_pointer_node;
18627 eltype = float_pointer_node;
18632 eltype = intDI_pointer_node;
18635 default: gcc_unreachable ();
18640 switch (insn_data[icode].operand[k].mode)
18642 case VOIDmode: eltype = void_type_node; break;
18644 case QImode: eltype = neon_intQI_type_node; break;
18645 case HImode: eltype = neon_intHI_type_node; break;
18646 case SImode: eltype = neon_intSI_type_node; break;
18647 case SFmode: eltype = neon_float_type_node; break;
18648 case DImode: eltype = neon_intDI_type_node; break;
18649 case TImode: eltype = intTI_type_node; break;
18650 case EImode: eltype = intEI_type_node; break;
18651 case OImode: eltype = intOI_type_node; break;
18652 case CImode: eltype = intCI_type_node; break;
18653 case XImode: eltype = intXI_type_node; break;
18654 /* 64-bit vectors. */
18655 case V8QImode: eltype = V8QI_type_node; break;
18656 case V4HImode: eltype = V4HI_type_node; break;
18657 case V2SImode: eltype = V2SI_type_node; break;
18658 case V2SFmode: eltype = V2SF_type_node; break;
18659 /* 128-bit vectors. */
18660 case V16QImode: eltype = V16QI_type_node; break;
18661 case V8HImode: eltype = V8HI_type_node; break;
18662 case V4SImode: eltype = V4SI_type_node; break;
18663 case V4SFmode: eltype = V4SF_type_node; break;
18664 case V2DImode: eltype = V2DI_type_node; break;
18665 default: gcc_unreachable ();
18669 if (k == 0 && !is_store)
18670 return_type = eltype;
18672 args = tree_cons (NULL_TREE, eltype, args);
18675 ftype = build_function_type (return_type, args);
18679 case NEON_RESULTPAIR:
18681 switch (insn_data[icode].operand[1].mode)
18683 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18684 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18685 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18686 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18687 case DImode: ftype = void_ftype_pdi_di_di; break;
18688 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18689 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18690 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18691 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18692 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18693 default: gcc_unreachable ();
18698 case NEON_REINTERP:
18700 /* We iterate over 5 doubleword types, then 5 quadword
18703 switch (insn_data[icode].operand[0].mode)
18705 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18706 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18707 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18708 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18709 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18710 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18711 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18712 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18713 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18714 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18715 default: gcc_unreachable ();
18721 gcc_unreachable ();
18724 gcc_assert (ftype != NULL);
18726 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18728 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18735 arm_init_fp16_builtins (void)
18737 tree fp16_type = make_node (REAL_TYPE);
18738 TYPE_PRECISION (fp16_type) = 16;
18739 layout_type (fp16_type);
18740 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18744 arm_init_builtins (void)
18746 arm_init_tls_builtins ();
18748 if (TARGET_REALLY_IWMMXT)
18749 arm_init_iwmmxt_builtins ();
18752 arm_init_neon_builtins ();
18754 if (arm_fp16_format)
18755 arm_init_fp16_builtins ();
18758 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18760 static const char *
18761 arm_invalid_parameter_type (const_tree t)
18763 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18764 return N_("function parameters cannot have __fp16 type");
18768 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18770 static const char *
18771 arm_invalid_return_type (const_tree t)
18773 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18774 return N_("functions cannot return __fp16 type");
18778 /* Implement TARGET_PROMOTED_TYPE. */
18781 arm_promoted_type (const_tree t)
18783 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18784 return float_type_node;
18788 /* Implement TARGET_CONVERT_TO_TYPE.
18789 Specifically, this hook implements the peculiarity of the ARM
18790 half-precision floating-point C semantics that requires conversions between
18791 __fp16 to or from double to do an intermediate conversion to float. */
18794 arm_convert_to_type (tree type, tree expr)
18796 tree fromtype = TREE_TYPE (expr);
18797 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18799 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18800 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18801 return convert (type, convert (float_type_node, expr));
18805 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18806 This simply adds HFmode as a supported mode; even though we don't
18807 implement arithmetic on this type directly, it's supported by
18808 optabs conversions, much the way the double-word arithmetic is
18809 special-cased in the default hook. */
18812 arm_scalar_mode_supported_p (enum machine_mode mode)
18814 if (mode == HFmode)
18815 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18817 return default_scalar_mode_supported_p (mode);
18820 /* Errors in the source file can cause expand_expr to return const0_rtx
18821 where we expect a vector. To avoid crashing, use one of the vector
18822 clear instructions. */
18825 safe_vector_operand (rtx x, enum machine_mode mode)
18827 if (x != const0_rtx)
18829 x = gen_reg_rtx (mode);
18831 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18832 : gen_rtx_SUBREG (DImode, x, 0)));
18836 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18839 arm_expand_binop_builtin (enum insn_code icode,
18840 tree exp, rtx target)
18843 tree arg0 = CALL_EXPR_ARG (exp, 0);
18844 tree arg1 = CALL_EXPR_ARG (exp, 1);
18845 rtx op0 = expand_normal (arg0);
18846 rtx op1 = expand_normal (arg1);
18847 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18848 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18849 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18851 if (VECTOR_MODE_P (mode0))
18852 op0 = safe_vector_operand (op0, mode0);
18853 if (VECTOR_MODE_P (mode1))
18854 op1 = safe_vector_operand (op1, mode1);
18857 || GET_MODE (target) != tmode
18858 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18859 target = gen_reg_rtx (tmode);
18861 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18863 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18864 op0 = copy_to_mode_reg (mode0, op0);
18865 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18866 op1 = copy_to_mode_reg (mode1, op1);
18868 pat = GEN_FCN (icode) (target, op0, op1);
18875 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18878 arm_expand_unop_builtin (enum insn_code icode,
18879 tree exp, rtx target, int do_load)
18882 tree arg0 = CALL_EXPR_ARG (exp, 0);
18883 rtx op0 = expand_normal (arg0);
18884 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18885 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18888 || GET_MODE (target) != tmode
18889 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18890 target = gen_reg_rtx (tmode);
18892 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18895 if (VECTOR_MODE_P (mode0))
18896 op0 = safe_vector_operand (op0, mode0);
18898 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18899 op0 = copy_to_mode_reg (mode0, op0);
18902 pat = GEN_FCN (icode) (target, op0);
18910 neon_builtin_compare (const void *a, const void *b)
18912 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18913 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18914 unsigned int soughtcode = key->base_fcode;
18916 if (soughtcode >= memb->base_fcode
18917 && soughtcode < memb->base_fcode + memb->num_vars)
18919 else if (soughtcode < memb->base_fcode)
18925 static enum insn_code
18926 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18928 neon_builtin_datum key, *found;
18931 key.base_fcode = fcode;
18932 found = (neon_builtin_datum *)
18933 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18934 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18935 gcc_assert (found);
18936 idx = fcode - (int) found->base_fcode;
18937 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18940 *itype = found->itype;
18942 return found->codes[idx];
18946 NEON_ARG_COPY_TO_REG,
18951 #define NEON_MAX_BUILTIN_ARGS 5
18953 /* Expand a Neon builtin. */
18955 arm_expand_neon_args (rtx target, int icode, int have_retval,
18960 tree arg[NEON_MAX_BUILTIN_ARGS];
18961 rtx op[NEON_MAX_BUILTIN_ARGS];
18962 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18963 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18968 || GET_MODE (target) != tmode
18969 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18970 target = gen_reg_rtx (tmode);
18972 va_start (ap, exp);
18976 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18978 if (thisarg == NEON_ARG_STOP)
18982 arg[argc] = CALL_EXPR_ARG (exp, argc);
18983 op[argc] = expand_normal (arg[argc]);
18984 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18988 case NEON_ARG_COPY_TO_REG:
18989 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18990 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18991 (op[argc], mode[argc]))
18992 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18995 case NEON_ARG_CONSTANT:
18996 /* FIXME: This error message is somewhat unhelpful. */
18997 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18998 (op[argc], mode[argc]))
18999 error ("argument must be a constant");
19002 case NEON_ARG_STOP:
19003 gcc_unreachable ();
19016 pat = GEN_FCN (icode) (target, op[0]);
19020 pat = GEN_FCN (icode) (target, op[0], op[1]);
19024 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19028 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19032 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19036 gcc_unreachable ();
19042 pat = GEN_FCN (icode) (op[0]);
19046 pat = GEN_FCN (icode) (op[0], op[1]);
19050 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19054 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19058 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19062 gcc_unreachable ();
19073 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19074 constants defined per-instruction or per instruction-variant. Instead, the
19075 required info is looked up in the table neon_builtin_data. */
19077 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19080 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19087 return arm_expand_neon_args (target, icode, 1, exp,
19088 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19092 case NEON_SCALARMUL:
19093 case NEON_SCALARMULL:
19094 case NEON_SCALARMULH:
19095 case NEON_SHIFTINSERT:
19096 case NEON_LOGICBINOP:
19097 return arm_expand_neon_args (target, icode, 1, exp,
19098 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19102 return arm_expand_neon_args (target, icode, 1, exp,
19103 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19104 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19108 case NEON_SHIFTIMM:
19109 return arm_expand_neon_args (target, icode, 1, exp,
19110 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19114 return arm_expand_neon_args (target, icode, 1, exp,
19115 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19119 case NEON_REINTERP:
19120 return arm_expand_neon_args (target, icode, 1, exp,
19121 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19125 return arm_expand_neon_args (target, icode, 1, exp,
19126 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19128 case NEON_RESULTPAIR:
19129 return arm_expand_neon_args (target, icode, 0, exp,
19130 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19134 case NEON_LANEMULL:
19135 case NEON_LANEMULH:
19136 return arm_expand_neon_args (target, icode, 1, exp,
19137 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19138 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19141 return arm_expand_neon_args (target, icode, 1, exp,
19142 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19143 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19145 case NEON_SHIFTACC:
19146 return arm_expand_neon_args (target, icode, 1, exp,
19147 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19148 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19150 case NEON_SCALARMAC:
19151 return arm_expand_neon_args (target, icode, 1, exp,
19152 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19153 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19157 return arm_expand_neon_args (target, icode, 1, exp,
19158 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19162 case NEON_LOADSTRUCT:
19163 return arm_expand_neon_args (target, icode, 1, exp,
19164 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19166 case NEON_LOAD1LANE:
19167 case NEON_LOADSTRUCTLANE:
19168 return arm_expand_neon_args (target, icode, 1, exp,
19169 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19173 case NEON_STORESTRUCT:
19174 return arm_expand_neon_args (target, icode, 0, exp,
19175 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19177 case NEON_STORE1LANE:
19178 case NEON_STORESTRUCTLANE:
19179 return arm_expand_neon_args (target, icode, 0, exp,
19180 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19184 gcc_unreachable ();
19187 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19189 neon_reinterpret (rtx dest, rtx src)
19191 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19194 /* Emit code to place a Neon pair result in memory locations (with equal
19197 neon_emit_pair_result_insn (enum machine_mode mode,
19198 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19201 rtx mem = gen_rtx_MEM (mode, destaddr);
19202 rtx tmp1 = gen_reg_rtx (mode);
19203 rtx tmp2 = gen_reg_rtx (mode);
19205 emit_insn (intfn (tmp1, op1, tmp2, op2));
19207 emit_move_insn (mem, tmp1);
19208 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19209 emit_move_insn (mem, tmp2);
19212 /* Set up operands for a register copy from src to dest, taking care not to
19213 clobber registers in the process.
19214 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19215 be called with a large N, so that should be OK. */
19218 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19220 unsigned int copied = 0, opctr = 0;
19221 unsigned int done = (1 << count) - 1;
19224 while (copied != done)
19226 for (i = 0; i < count; i++)
19230 for (j = 0; good && j < count; j++)
19231 if (i != j && (copied & (1 << j)) == 0
19232 && reg_overlap_mentioned_p (src[j], dest[i]))
19237 operands[opctr++] = dest[i];
19238 operands[opctr++] = src[i];
19244 gcc_assert (opctr == count * 2);
19247 /* Expand an expression EXP that calls a built-in function,
19248 with result going to TARGET if that's convenient
19249 (and in mode MODE if that's convenient).
19250 SUBTARGET may be used as the target for computing one of EXP's operands.
19251 IGNORE is nonzero if the value is to be ignored. */
19254 arm_expand_builtin (tree exp,
19256 rtx subtarget ATTRIBUTE_UNUSED,
19257 enum machine_mode mode ATTRIBUTE_UNUSED,
19258 int ignore ATTRIBUTE_UNUSED)
19260 const struct builtin_description * d;
19261 enum insn_code icode;
19262 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19270 int fcode = DECL_FUNCTION_CODE (fndecl);
19272 enum machine_mode tmode;
19273 enum machine_mode mode0;
19274 enum machine_mode mode1;
19275 enum machine_mode mode2;
19277 if (fcode >= ARM_BUILTIN_NEON_BASE)
19278 return arm_expand_neon_builtin (fcode, exp, target);
19282 case ARM_BUILTIN_TEXTRMSB:
19283 case ARM_BUILTIN_TEXTRMUB:
19284 case ARM_BUILTIN_TEXTRMSH:
19285 case ARM_BUILTIN_TEXTRMUH:
19286 case ARM_BUILTIN_TEXTRMSW:
19287 case ARM_BUILTIN_TEXTRMUW:
19288 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19289 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19290 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19291 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19292 : CODE_FOR_iwmmxt_textrmw);
19294 arg0 = CALL_EXPR_ARG (exp, 0);
19295 arg1 = CALL_EXPR_ARG (exp, 1);
19296 op0 = expand_normal (arg0);
19297 op1 = expand_normal (arg1);
19298 tmode = insn_data[icode].operand[0].mode;
19299 mode0 = insn_data[icode].operand[1].mode;
19300 mode1 = insn_data[icode].operand[2].mode;
19302 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19303 op0 = copy_to_mode_reg (mode0, op0);
19304 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19306 /* @@@ better error message */
19307 error ("selector must be an immediate");
19308 return gen_reg_rtx (tmode);
19311 || GET_MODE (target) != tmode
19312 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19313 target = gen_reg_rtx (tmode);
19314 pat = GEN_FCN (icode) (target, op0, op1);
19320 case ARM_BUILTIN_TINSRB:
19321 case ARM_BUILTIN_TINSRH:
19322 case ARM_BUILTIN_TINSRW:
19323 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19324 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19325 : CODE_FOR_iwmmxt_tinsrw);
19326 arg0 = CALL_EXPR_ARG (exp, 0);
19327 arg1 = CALL_EXPR_ARG (exp, 1);
19328 arg2 = CALL_EXPR_ARG (exp, 2);
19329 op0 = expand_normal (arg0);
19330 op1 = expand_normal (arg1);
19331 op2 = expand_normal (arg2);
19332 tmode = insn_data[icode].operand[0].mode;
19333 mode0 = insn_data[icode].operand[1].mode;
19334 mode1 = insn_data[icode].operand[2].mode;
19335 mode2 = insn_data[icode].operand[3].mode;
19337 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19338 op0 = copy_to_mode_reg (mode0, op0);
19339 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19340 op1 = copy_to_mode_reg (mode1, op1);
19341 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19343 /* @@@ better error message */
19344 error ("selector must be an immediate");
19348 || GET_MODE (target) != tmode
19349 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19350 target = gen_reg_rtx (tmode);
19351 pat = GEN_FCN (icode) (target, op0, op1, op2);
19357 case ARM_BUILTIN_SETWCX:
19358 arg0 = CALL_EXPR_ARG (exp, 0);
19359 arg1 = CALL_EXPR_ARG (exp, 1);
19360 op0 = force_reg (SImode, expand_normal (arg0));
19361 op1 = expand_normal (arg1);
19362 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19365 case ARM_BUILTIN_GETWCX:
19366 arg0 = CALL_EXPR_ARG (exp, 0);
19367 op0 = expand_normal (arg0);
19368 target = gen_reg_rtx (SImode);
19369 emit_insn (gen_iwmmxt_tmrc (target, op0));
19372 case ARM_BUILTIN_WSHUFH:
19373 icode = CODE_FOR_iwmmxt_wshufh;
19374 arg0 = CALL_EXPR_ARG (exp, 0);
19375 arg1 = CALL_EXPR_ARG (exp, 1);
19376 op0 = expand_normal (arg0);
19377 op1 = expand_normal (arg1);
19378 tmode = insn_data[icode].operand[0].mode;
19379 mode1 = insn_data[icode].operand[1].mode;
19380 mode2 = insn_data[icode].operand[2].mode;
19382 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19383 op0 = copy_to_mode_reg (mode1, op0);
19384 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19386 /* @@@ better error message */
19387 error ("mask must be an immediate");
19391 || GET_MODE (target) != tmode
19392 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19393 target = gen_reg_rtx (tmode);
19394 pat = GEN_FCN (icode) (target, op0, op1);
19400 case ARM_BUILTIN_WSADB:
19401 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19402 case ARM_BUILTIN_WSADH:
19403 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19404 case ARM_BUILTIN_WSADBZ:
19405 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19406 case ARM_BUILTIN_WSADHZ:
19407 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19409 /* Several three-argument builtins. */
19410 case ARM_BUILTIN_WMACS:
19411 case ARM_BUILTIN_WMACU:
19412 case ARM_BUILTIN_WALIGN:
19413 case ARM_BUILTIN_TMIA:
19414 case ARM_BUILTIN_TMIAPH:
19415 case ARM_BUILTIN_TMIATT:
19416 case ARM_BUILTIN_TMIATB:
19417 case ARM_BUILTIN_TMIABT:
19418 case ARM_BUILTIN_TMIABB:
19419 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19420 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19421 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19422 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19423 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19424 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19425 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19426 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19427 : CODE_FOR_iwmmxt_walign);
19428 arg0 = CALL_EXPR_ARG (exp, 0);
19429 arg1 = CALL_EXPR_ARG (exp, 1);
19430 arg2 = CALL_EXPR_ARG (exp, 2);
19431 op0 = expand_normal (arg0);
19432 op1 = expand_normal (arg1);
19433 op2 = expand_normal (arg2);
19434 tmode = insn_data[icode].operand[0].mode;
19435 mode0 = insn_data[icode].operand[1].mode;
19436 mode1 = insn_data[icode].operand[2].mode;
19437 mode2 = insn_data[icode].operand[3].mode;
19439 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19440 op0 = copy_to_mode_reg (mode0, op0);
19441 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19442 op1 = copy_to_mode_reg (mode1, op1);
19443 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19444 op2 = copy_to_mode_reg (mode2, op2);
19446 || GET_MODE (target) != tmode
19447 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19448 target = gen_reg_rtx (tmode);
19449 pat = GEN_FCN (icode) (target, op0, op1, op2);
19455 case ARM_BUILTIN_WZERO:
19456 target = gen_reg_rtx (DImode);
19457 emit_insn (gen_iwmmxt_clrdi (target));
19460 case ARM_BUILTIN_THREAD_POINTER:
19461 return arm_load_tp (target);
19467 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19468 if (d->code == (const enum arm_builtins) fcode)
19469 return arm_expand_binop_builtin (d->icode, exp, target);
19471 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19472 if (d->code == (const enum arm_builtins) fcode)
19473 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19475 /* @@@ Should really do something sensible here. */
19479 /* Return the number (counting from 0) of
19480 the least significant set bit in MASK. */
19483 number_of_first_bit_set (unsigned mask)
19488 (mask & (1 << bit)) == 0;
19495 /* Emit code to push or pop registers to or from the stack. F is the
19496 assembly file. MASK is the registers to push or pop. PUSH is
19497 nonzero if we should push, and zero if we should pop. For debugging
19498 output, if pushing, adjust CFA_OFFSET by the amount of space added
19499 to the stack. REAL_REGS should have the same number of bits set as
19500 MASK, and will be used instead (in the same order) to describe which
19501 registers were saved - this is used to mark the save slots when we
19502 push high registers after moving them to low registers. */
19504 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19505 unsigned long real_regs)
19508 int lo_mask = mask & 0xFF;
19509 int pushed_words = 0;
19513 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19515 /* Special case. Do not generate a POP PC statement here, do it in
19517 thumb_exit (f, -1);
19521 if (ARM_EABI_UNWIND_TABLES && push)
19523 fprintf (f, "\t.save\t{");
19524 for (regno = 0; regno < 15; regno++)
19526 if (real_regs & (1 << regno))
19528 if (real_regs & ((1 << regno) -1))
19530 asm_fprintf (f, "%r", regno);
19533 fprintf (f, "}\n");
19536 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19538 /* Look at the low registers first. */
19539 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19543 asm_fprintf (f, "%r", regno);
19545 if ((lo_mask & ~1) != 0)
19552 if (push && (mask & (1 << LR_REGNUM)))
19554 /* Catch pushing the LR. */
19558 asm_fprintf (f, "%r", LR_REGNUM);
19562 else if (!push && (mask & (1 << PC_REGNUM)))
19564 /* Catch popping the PC. */
19565 if (TARGET_INTERWORK || TARGET_BACKTRACE
19566 || crtl->calls_eh_return)
19568 /* The PC is never poped directly, instead
19569 it is popped into r3 and then BX is used. */
19570 fprintf (f, "}\n");
19572 thumb_exit (f, -1);
19581 asm_fprintf (f, "%r", PC_REGNUM);
19585 fprintf (f, "}\n");
19587 if (push && pushed_words && dwarf2out_do_frame ())
19589 char *l = dwarf2out_cfi_label (false);
19590 int pushed_mask = real_regs;
19592 *cfa_offset += pushed_words * 4;
19593 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19596 pushed_mask = real_regs;
19597 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19599 if (pushed_mask & 1)
19600 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19605 /* Generate code to return from a thumb function.
19606 If 'reg_containing_return_addr' is -1, then the return address is
19607 actually on the stack, at the stack pointer. */
19609 thumb_exit (FILE *f, int reg_containing_return_addr)
19611 unsigned regs_available_for_popping;
19612 unsigned regs_to_pop;
19614 unsigned available;
19618 int restore_a4 = FALSE;
19620 /* Compute the registers we need to pop. */
19624 if (reg_containing_return_addr == -1)
19626 regs_to_pop |= 1 << LR_REGNUM;
19630 if (TARGET_BACKTRACE)
19632 /* Restore the (ARM) frame pointer and stack pointer. */
19633 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19637 /* If there is nothing to pop then just emit the BX instruction and
19639 if (pops_needed == 0)
19641 if (crtl->calls_eh_return)
19642 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19644 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19647 /* Otherwise if we are not supporting interworking and we have not created
19648 a backtrace structure and the function was not entered in ARM mode then
19649 just pop the return address straight into the PC. */
19650 else if (!TARGET_INTERWORK
19651 && !TARGET_BACKTRACE
19652 && !is_called_in_ARM_mode (current_function_decl)
19653 && !crtl->calls_eh_return)
19655 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19659 /* Find out how many of the (return) argument registers we can corrupt. */
19660 regs_available_for_popping = 0;
19662 /* If returning via __builtin_eh_return, the bottom three registers
19663 all contain information needed for the return. */
19664 if (crtl->calls_eh_return)
19668 /* If we can deduce the registers used from the function's
19669 return value. This is more reliable that examining
19670 df_regs_ever_live_p () because that will be set if the register is
19671 ever used in the function, not just if the register is used
19672 to hold a return value. */
19674 if (crtl->return_rtx != 0)
19675 mode = GET_MODE (crtl->return_rtx);
19677 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19679 size = GET_MODE_SIZE (mode);
19683 /* In a void function we can use any argument register.
19684 In a function that returns a structure on the stack
19685 we can use the second and third argument registers. */
19686 if (mode == VOIDmode)
19687 regs_available_for_popping =
19688 (1 << ARG_REGISTER (1))
19689 | (1 << ARG_REGISTER (2))
19690 | (1 << ARG_REGISTER (3));
19692 regs_available_for_popping =
19693 (1 << ARG_REGISTER (2))
19694 | (1 << ARG_REGISTER (3));
19696 else if (size <= 4)
19697 regs_available_for_popping =
19698 (1 << ARG_REGISTER (2))
19699 | (1 << ARG_REGISTER (3));
19700 else if (size <= 8)
19701 regs_available_for_popping =
19702 (1 << ARG_REGISTER (3));
19705 /* Match registers to be popped with registers into which we pop them. */
19706 for (available = regs_available_for_popping,
19707 required = regs_to_pop;
19708 required != 0 && available != 0;
19709 available &= ~(available & - available),
19710 required &= ~(required & - required))
19713 /* If we have any popping registers left over, remove them. */
19715 regs_available_for_popping &= ~available;
19717 /* Otherwise if we need another popping register we can use
19718 the fourth argument register. */
19719 else if (pops_needed)
19721 /* If we have not found any free argument registers and
19722 reg a4 contains the return address, we must move it. */
19723 if (regs_available_for_popping == 0
19724 && reg_containing_return_addr == LAST_ARG_REGNUM)
19726 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19727 reg_containing_return_addr = LR_REGNUM;
19729 else if (size > 12)
19731 /* Register a4 is being used to hold part of the return value,
19732 but we have dire need of a free, low register. */
19735 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19738 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19740 /* The fourth argument register is available. */
19741 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19747 /* Pop as many registers as we can. */
19748 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19749 regs_available_for_popping);
19751 /* Process the registers we popped. */
19752 if (reg_containing_return_addr == -1)
19754 /* The return address was popped into the lowest numbered register. */
19755 regs_to_pop &= ~(1 << LR_REGNUM);
19757 reg_containing_return_addr =
19758 number_of_first_bit_set (regs_available_for_popping);
19760 /* Remove this register for the mask of available registers, so that
19761 the return address will not be corrupted by further pops. */
19762 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19765 /* If we popped other registers then handle them here. */
19766 if (regs_available_for_popping)
19770 /* Work out which register currently contains the frame pointer. */
19771 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19773 /* Move it into the correct place. */
19774 asm_fprintf (f, "\tmov\t%r, %r\n",
19775 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19777 /* (Temporarily) remove it from the mask of popped registers. */
19778 regs_available_for_popping &= ~(1 << frame_pointer);
19779 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19781 if (regs_available_for_popping)
19785 /* We popped the stack pointer as well,
19786 find the register that contains it. */
19787 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19789 /* Move it into the stack register. */
19790 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19792 /* At this point we have popped all necessary registers, so
19793 do not worry about restoring regs_available_for_popping
19794 to its correct value:
19796 assert (pops_needed == 0)
19797 assert (regs_available_for_popping == (1 << frame_pointer))
19798 assert (regs_to_pop == (1 << STACK_POINTER)) */
19802 /* Since we have just move the popped value into the frame
19803 pointer, the popping register is available for reuse, and
19804 we know that we still have the stack pointer left to pop. */
19805 regs_available_for_popping |= (1 << frame_pointer);
19809 /* If we still have registers left on the stack, but we no longer have
19810 any registers into which we can pop them, then we must move the return
19811 address into the link register and make available the register that
19813 if (regs_available_for_popping == 0 && pops_needed > 0)
19815 regs_available_for_popping |= 1 << reg_containing_return_addr;
19817 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19818 reg_containing_return_addr);
19820 reg_containing_return_addr = LR_REGNUM;
19823 /* If we have registers left on the stack then pop some more.
19824 We know that at most we will want to pop FP and SP. */
19825 if (pops_needed > 0)
19830 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19831 regs_available_for_popping);
19833 /* We have popped either FP or SP.
19834 Move whichever one it is into the correct register. */
19835 popped_into = number_of_first_bit_set (regs_available_for_popping);
19836 move_to = number_of_first_bit_set (regs_to_pop);
19838 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19840 regs_to_pop &= ~(1 << move_to);
19845 /* If we still have not popped everything then we must have only
19846 had one register available to us and we are now popping the SP. */
19847 if (pops_needed > 0)
19851 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19852 regs_available_for_popping);
19854 popped_into = number_of_first_bit_set (regs_available_for_popping);
19856 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19858 assert (regs_to_pop == (1 << STACK_POINTER))
19859 assert (pops_needed == 1)
19863 /* If necessary restore the a4 register. */
19866 if (reg_containing_return_addr != LR_REGNUM)
19868 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19869 reg_containing_return_addr = LR_REGNUM;
19872 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19875 if (crtl->calls_eh_return)
19876 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19878 /* Return to caller. */
19879 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19882 /* Scan INSN just before assembler is output for it.
19883 For Thumb-1, we track the status of the condition codes; this
19884 information is used in the cbranchsi4_insn pattern. */
19886 thumb1_final_prescan_insn (rtx insn)
19888 if (flag_print_asm_name)
19889 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19890 INSN_ADDRESSES (INSN_UID (insn)));
19891 /* Don't overwrite the previous setter when we get to a cbranch. */
19892 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19894 enum attr_conds conds;
19896 if (cfun->machine->thumb1_cc_insn)
19898 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
19899 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
19902 conds = get_attr_conds (insn);
19903 if (conds == CONDS_SET)
19905 rtx set = single_set (insn);
19906 cfun->machine->thumb1_cc_insn = insn;
19907 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
19908 cfun->machine->thumb1_cc_op1 = const0_rtx;
19909 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
19910 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
19912 rtx src1 = XEXP (SET_SRC (set), 1);
19913 if (src1 == const0_rtx)
19914 cfun->machine->thumb1_cc_mode = CCmode;
19917 else if (conds != CONDS_NOCOND)
19918 cfun->machine->thumb1_cc_insn = NULL_RTX;
19923 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19925 unsigned HOST_WIDE_INT mask = 0xff;
19928 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19929 if (val == 0) /* XXX */
19932 for (i = 0; i < 25; i++)
19933 if ((val & (mask << i)) == val)
19939 /* Returns nonzero if the current function contains,
19940 or might contain a far jump. */
19942 thumb_far_jump_used_p (void)
19946 /* This test is only important for leaf functions. */
19947 /* assert (!leaf_function_p ()); */
19949 /* If we have already decided that far jumps may be used,
19950 do not bother checking again, and always return true even if
19951 it turns out that they are not being used. Once we have made
19952 the decision that far jumps are present (and that hence the link
19953 register will be pushed onto the stack) we cannot go back on it. */
19954 if (cfun->machine->far_jump_used)
19957 /* If this function is not being called from the prologue/epilogue
19958 generation code then it must be being called from the
19959 INITIAL_ELIMINATION_OFFSET macro. */
19960 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19962 /* In this case we know that we are being asked about the elimination
19963 of the arg pointer register. If that register is not being used,
19964 then there are no arguments on the stack, and we do not have to
19965 worry that a far jump might force the prologue to push the link
19966 register, changing the stack offsets. In this case we can just
19967 return false, since the presence of far jumps in the function will
19968 not affect stack offsets.
19970 If the arg pointer is live (or if it was live, but has now been
19971 eliminated and so set to dead) then we do have to test to see if
19972 the function might contain a far jump. This test can lead to some
19973 false negatives, since before reload is completed, then length of
19974 branch instructions is not known, so gcc defaults to returning their
19975 longest length, which in turn sets the far jump attribute to true.
19977 A false negative will not result in bad code being generated, but it
19978 will result in a needless push and pop of the link register. We
19979 hope that this does not occur too often.
19981 If we need doubleword stack alignment this could affect the other
19982 elimination offsets so we can't risk getting it wrong. */
19983 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19984 cfun->machine->arg_pointer_live = 1;
19985 else if (!cfun->machine->arg_pointer_live)
19989 /* Check to see if the function contains a branch
19990 insn with the far jump attribute set. */
19991 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19993 if (GET_CODE (insn) == JUMP_INSN
19994 /* Ignore tablejump patterns. */
19995 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19996 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19997 && get_attr_far_jump (insn) == FAR_JUMP_YES
20000 /* Record the fact that we have decided that
20001 the function does use far jumps. */
20002 cfun->machine->far_jump_used = 1;
20010 /* Return nonzero if FUNC must be entered in ARM mode. */
20012 is_called_in_ARM_mode (tree func)
20014 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20016 /* Ignore the problem about functions whose address is taken. */
20017 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20021 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20027 /* Given the stack offsets and register mask in OFFSETS, decide how
20028 many additional registers to push instead of subtracting a constant
20029 from SP. For epilogues the principle is the same except we use pop.
20030 FOR_PROLOGUE indicates which we're generating. */
20032 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20034 HOST_WIDE_INT amount;
20035 unsigned long live_regs_mask = offsets->saved_regs_mask;
20036 /* Extract a mask of the ones we can give to the Thumb's push/pop
20038 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20039 /* Then count how many other high registers will need to be pushed. */
20040 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20041 int n_free, reg_base;
20043 if (!for_prologue && frame_pointer_needed)
20044 amount = offsets->locals_base - offsets->saved_regs;
20046 amount = offsets->outgoing_args - offsets->saved_regs;
20048 /* If the stack frame size is 512 exactly, we can save one load
20049 instruction, which should make this a win even when optimizing
20051 if (!optimize_size && amount != 512)
20054 /* Can't do this if there are high registers to push. */
20055 if (high_regs_pushed != 0)
20058 /* Shouldn't do it in the prologue if no registers would normally
20059 be pushed at all. In the epilogue, also allow it if we'll have
20060 a pop insn for the PC. */
20063 || TARGET_BACKTRACE
20064 || (live_regs_mask & 1 << LR_REGNUM) == 0
20065 || TARGET_INTERWORK
20066 || crtl->args.pretend_args_size != 0))
20069 /* Don't do this if thumb_expand_prologue wants to emit instructions
20070 between the push and the stack frame allocation. */
20072 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20073 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20080 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20081 live_regs_mask >>= reg_base;
20084 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20085 && (for_prologue || call_used_regs[reg_base + n_free]))
20087 live_regs_mask >>= 1;
20093 gcc_assert (amount / 4 * 4 == amount);
20095 if (amount >= 512 && (amount - n_free * 4) < 512)
20096 return (amount - 508) / 4;
20097 if (amount <= n_free * 4)
20102 /* The bits which aren't usefully expanded as rtl. */
20104 thumb_unexpanded_epilogue (void)
20106 arm_stack_offsets *offsets;
20108 unsigned long live_regs_mask = 0;
20109 int high_regs_pushed = 0;
20111 int had_to_push_lr;
20114 if (cfun->machine->return_used_this_function != 0)
20117 if (IS_NAKED (arm_current_func_type ()))
20120 offsets = arm_get_frame_offsets ();
20121 live_regs_mask = offsets->saved_regs_mask;
20122 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20124 /* If we can deduce the registers used from the function's return value.
20125 This is more reliable that examining df_regs_ever_live_p () because that
20126 will be set if the register is ever used in the function, not just if
20127 the register is used to hold a return value. */
20128 size = arm_size_return_regs ();
20130 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20133 unsigned long extra_mask = (1 << extra_pop) - 1;
20134 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20137 /* The prolog may have pushed some high registers to use as
20138 work registers. e.g. the testsuite file:
20139 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20140 compiles to produce:
20141 push {r4, r5, r6, r7, lr}
20145 as part of the prolog. We have to undo that pushing here. */
20147 if (high_regs_pushed)
20149 unsigned long mask = live_regs_mask & 0xff;
20152 /* The available low registers depend on the size of the value we are
20160 /* Oh dear! We have no low registers into which we can pop
20163 ("no low registers available for popping high registers");
20165 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20166 if (live_regs_mask & (1 << next_hi_reg))
20169 while (high_regs_pushed)
20171 /* Find lo register(s) into which the high register(s) can
20173 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20175 if (mask & (1 << regno))
20176 high_regs_pushed--;
20177 if (high_regs_pushed == 0)
20181 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20183 /* Pop the values into the low register(s). */
20184 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20186 /* Move the value(s) into the high registers. */
20187 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20189 if (mask & (1 << regno))
20191 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20194 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20195 if (live_regs_mask & (1 << next_hi_reg))
20200 live_regs_mask &= ~0x0f00;
20203 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20204 live_regs_mask &= 0xff;
20206 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20208 /* Pop the return address into the PC. */
20209 if (had_to_push_lr)
20210 live_regs_mask |= 1 << PC_REGNUM;
20212 /* Either no argument registers were pushed or a backtrace
20213 structure was created which includes an adjusted stack
20214 pointer, so just pop everything. */
20215 if (live_regs_mask)
20216 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20219 /* We have either just popped the return address into the
20220 PC or it is was kept in LR for the entire function.
20221 Note that thumb_pushpop has already called thumb_exit if the
20222 PC was in the list. */
20223 if (!had_to_push_lr)
20224 thumb_exit (asm_out_file, LR_REGNUM);
20228 /* Pop everything but the return address. */
20229 if (live_regs_mask)
20230 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20233 if (had_to_push_lr)
20237 /* We have no free low regs, so save one. */
20238 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20242 /* Get the return address into a temporary register. */
20243 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20244 1 << LAST_ARG_REGNUM);
20248 /* Move the return address to lr. */
20249 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20251 /* Restore the low register. */
20252 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20257 regno = LAST_ARG_REGNUM;
20262 /* Remove the argument registers that were pushed onto the stack. */
20263 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20264 SP_REGNUM, SP_REGNUM,
20265 crtl->args.pretend_args_size);
20267 thumb_exit (asm_out_file, regno);
20273 /* Functions to save and restore machine-specific function data. */
20274 static struct machine_function *
20275 arm_init_machine_status (void)
20277 struct machine_function *machine;
20278 machine = ggc_alloc_cleared_machine_function ();
20280 #if ARM_FT_UNKNOWN != 0
20281 machine->func_type = ARM_FT_UNKNOWN;
20286 /* Return an RTX indicating where the return address to the
20287 calling function can be found. */
20289 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20294 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20297 /* Do anything needed before RTL is emitted for each function. */
20299 arm_init_expanders (void)
20301 /* Arrange to initialize and mark the machine per-function status. */
20302 init_machine_status = arm_init_machine_status;
20304 /* This is to stop the combine pass optimizing away the alignment
20305 adjustment of va_arg. */
20306 /* ??? It is claimed that this should not be necessary. */
20308 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20312 /* Like arm_compute_initial_elimination offset. Simpler because there
20313 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20314 to point at the base of the local variables after static stack
20315 space for a function has been allocated. */
20318 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20320 arm_stack_offsets *offsets;
20322 offsets = arm_get_frame_offsets ();
20326 case ARG_POINTER_REGNUM:
20329 case STACK_POINTER_REGNUM:
20330 return offsets->outgoing_args - offsets->saved_args;
20332 case FRAME_POINTER_REGNUM:
20333 return offsets->soft_frame - offsets->saved_args;
20335 case ARM_HARD_FRAME_POINTER_REGNUM:
20336 return offsets->saved_regs - offsets->saved_args;
20338 case THUMB_HARD_FRAME_POINTER_REGNUM:
20339 return offsets->locals_base - offsets->saved_args;
20342 gcc_unreachable ();
20346 case FRAME_POINTER_REGNUM:
20349 case STACK_POINTER_REGNUM:
20350 return offsets->outgoing_args - offsets->soft_frame;
20352 case ARM_HARD_FRAME_POINTER_REGNUM:
20353 return offsets->saved_regs - offsets->soft_frame;
20355 case THUMB_HARD_FRAME_POINTER_REGNUM:
20356 return offsets->locals_base - offsets->soft_frame;
20359 gcc_unreachable ();
20364 gcc_unreachable ();
20368 /* Generate the rest of a function's prologue. */
20370 thumb1_expand_prologue (void)
20374 HOST_WIDE_INT amount;
20375 arm_stack_offsets *offsets;
20376 unsigned long func_type;
20378 unsigned long live_regs_mask;
20380 func_type = arm_current_func_type ();
20382 /* Naked functions don't have prologues. */
20383 if (IS_NAKED (func_type))
20386 if (IS_INTERRUPT (func_type))
20388 error ("interrupt Service Routines cannot be coded in Thumb mode");
20392 offsets = arm_get_frame_offsets ();
20393 live_regs_mask = offsets->saved_regs_mask;
20394 /* Load the pic register before setting the frame pointer,
20395 so we can use r7 as a temporary work register. */
20396 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20397 arm_load_pic_register (live_regs_mask);
20399 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20400 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20401 stack_pointer_rtx);
20403 amount = offsets->outgoing_args - offsets->saved_regs;
20404 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20409 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20410 GEN_INT (- amount)));
20411 RTX_FRAME_RELATED_P (insn) = 1;
20417 /* The stack decrement is too big for an immediate value in a single
20418 insn. In theory we could issue multiple subtracts, but after
20419 three of them it becomes more space efficient to place the full
20420 value in the constant pool and load into a register. (Also the
20421 ARM debugger really likes to see only one stack decrement per
20422 function). So instead we look for a scratch register into which
20423 we can load the decrement, and then we subtract this from the
20424 stack pointer. Unfortunately on the thumb the only available
20425 scratch registers are the argument registers, and we cannot use
20426 these as they may hold arguments to the function. Instead we
20427 attempt to locate a call preserved register which is used by this
20428 function. If we can find one, then we know that it will have
20429 been pushed at the start of the prologue and so we can corrupt
20431 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20432 if (live_regs_mask & (1 << regno))
20435 gcc_assert(regno <= LAST_LO_REGNUM);
20437 reg = gen_rtx_REG (SImode, regno);
20439 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20441 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20442 stack_pointer_rtx, reg));
20443 RTX_FRAME_RELATED_P (insn) = 1;
20444 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20445 plus_constant (stack_pointer_rtx,
20447 RTX_FRAME_RELATED_P (dwarf) = 1;
20448 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20452 if (frame_pointer_needed)
20453 thumb_set_frame_pointer (offsets);
20455 /* If we are profiling, make sure no instructions are scheduled before
20456 the call to mcount. Similarly if the user has requested no
20457 scheduling in the prolog. Similarly if we want non-call exceptions
20458 using the EABI unwinder, to prevent faulting instructions from being
20459 swapped with a stack adjustment. */
20460 if (crtl->profile || !TARGET_SCHED_PROLOG
20461 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20462 emit_insn (gen_blockage ());
20464 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20465 if (live_regs_mask & 0xff)
20466 cfun->machine->lr_save_eliminated = 0;
20471 thumb1_expand_epilogue (void)
20473 HOST_WIDE_INT amount;
20474 arm_stack_offsets *offsets;
20477 /* Naked functions don't have prologues. */
20478 if (IS_NAKED (arm_current_func_type ()))
20481 offsets = arm_get_frame_offsets ();
20482 amount = offsets->outgoing_args - offsets->saved_regs;
20484 if (frame_pointer_needed)
20486 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20487 amount = offsets->locals_base - offsets->saved_regs;
20489 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20491 gcc_assert (amount >= 0);
20495 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20496 GEN_INT (amount)));
20499 /* r3 is always free in the epilogue. */
20500 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20502 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20503 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20507 /* Emit a USE (stack_pointer_rtx), so that
20508 the stack adjustment will not be deleted. */
20509 emit_insn (gen_prologue_use (stack_pointer_rtx));
20511 if (crtl->profile || !TARGET_SCHED_PROLOG)
20512 emit_insn (gen_blockage ());
20514 /* Emit a clobber for each insn that will be restored in the epilogue,
20515 so that flow2 will get register lifetimes correct. */
20516 for (regno = 0; regno < 13; regno++)
20517 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20518 emit_clobber (gen_rtx_REG (SImode, regno));
20520 if (! df_regs_ever_live_p (LR_REGNUM))
20521 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20525 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20527 arm_stack_offsets *offsets;
20528 unsigned long live_regs_mask = 0;
20529 unsigned long l_mask;
20530 unsigned high_regs_pushed = 0;
20531 int cfa_offset = 0;
20534 if (IS_NAKED (arm_current_func_type ()))
20537 if (is_called_in_ARM_mode (current_function_decl))
20541 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20542 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20544 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20546 /* Generate code sequence to switch us into Thumb mode. */
20547 /* The .code 32 directive has already been emitted by
20548 ASM_DECLARE_FUNCTION_NAME. */
20549 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20550 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20552 /* Generate a label, so that the debugger will notice the
20553 change in instruction sets. This label is also used by
20554 the assembler to bypass the ARM code when this function
20555 is called from a Thumb encoded function elsewhere in the
20556 same file. Hence the definition of STUB_NAME here must
20557 agree with the definition in gas/config/tc-arm.c. */
20559 #define STUB_NAME ".real_start_of"
20561 fprintf (f, "\t.code\t16\n");
20563 if (arm_dllexport_name_p (name))
20564 name = arm_strip_name_encoding (name);
20566 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20567 fprintf (f, "\t.thumb_func\n");
20568 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20571 if (crtl->args.pretend_args_size)
20573 /* Output unwind directive for the stack adjustment. */
20574 if (ARM_EABI_UNWIND_TABLES)
20575 fprintf (f, "\t.pad #%d\n",
20576 crtl->args.pretend_args_size);
20578 if (cfun->machine->uses_anonymous_args)
20582 fprintf (f, "\tpush\t{");
20584 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20586 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20587 regno <= LAST_ARG_REGNUM;
20589 asm_fprintf (f, "%r%s", regno,
20590 regno == LAST_ARG_REGNUM ? "" : ", ");
20592 fprintf (f, "}\n");
20595 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20596 SP_REGNUM, SP_REGNUM,
20597 crtl->args.pretend_args_size);
20599 /* We don't need to record the stores for unwinding (would it
20600 help the debugger any if we did?), but record the change in
20601 the stack pointer. */
20602 if (dwarf2out_do_frame ())
20604 char *l = dwarf2out_cfi_label (false);
20606 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20607 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20611 /* Get the registers we are going to push. */
20612 offsets = arm_get_frame_offsets ();
20613 live_regs_mask = offsets->saved_regs_mask;
20614 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20615 l_mask = live_regs_mask & 0x40ff;
20616 /* Then count how many other high registers will need to be pushed. */
20617 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20619 if (TARGET_BACKTRACE)
20622 unsigned work_register;
20624 /* We have been asked to create a stack backtrace structure.
20625 The code looks like this:
20629 0 sub SP, #16 Reserve space for 4 registers.
20630 2 push {R7} Push low registers.
20631 4 add R7, SP, #20 Get the stack pointer before the push.
20632 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20633 8 mov R7, PC Get hold of the start of this code plus 12.
20634 10 str R7, [SP, #16] Store it.
20635 12 mov R7, FP Get hold of the current frame pointer.
20636 14 str R7, [SP, #4] Store it.
20637 16 mov R7, LR Get hold of the current return address.
20638 18 str R7, [SP, #12] Store it.
20639 20 add R7, SP, #16 Point at the start of the backtrace structure.
20640 22 mov FP, R7 Put this value into the frame pointer. */
20642 work_register = thumb_find_work_register (live_regs_mask);
20644 if (ARM_EABI_UNWIND_TABLES)
20645 asm_fprintf (f, "\t.pad #16\n");
20648 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20649 SP_REGNUM, SP_REGNUM);
20651 if (dwarf2out_do_frame ())
20653 char *l = dwarf2out_cfi_label (false);
20655 cfa_offset = cfa_offset + 16;
20656 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20661 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20662 offset = bit_count (l_mask) * UNITS_PER_WORD;
20667 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20668 offset + 16 + crtl->args.pretend_args_size);
20670 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20673 /* Make sure that the instruction fetching the PC is in the right place
20674 to calculate "start of backtrace creation code + 12". */
20677 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20678 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20680 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20681 ARM_HARD_FRAME_POINTER_REGNUM);
20682 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20687 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20688 ARM_HARD_FRAME_POINTER_REGNUM);
20689 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20691 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20692 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20696 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20697 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20699 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20701 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20702 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20704 /* Optimization: If we are not pushing any low registers but we are going
20705 to push some high registers then delay our first push. This will just
20706 be a push of LR and we can combine it with the push of the first high
20708 else if ((l_mask & 0xff) != 0
20709 || (high_regs_pushed == 0 && l_mask))
20711 unsigned long mask = l_mask;
20712 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20713 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20716 if (high_regs_pushed)
20718 unsigned pushable_regs;
20719 unsigned next_hi_reg;
20721 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20722 if (live_regs_mask & (1 << next_hi_reg))
20725 pushable_regs = l_mask & 0xff;
20727 if (pushable_regs == 0)
20728 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20730 while (high_regs_pushed > 0)
20732 unsigned long real_regs_mask = 0;
20734 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20736 if (pushable_regs & (1 << regno))
20738 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20740 high_regs_pushed --;
20741 real_regs_mask |= (1 << next_hi_reg);
20743 if (high_regs_pushed)
20745 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20747 if (live_regs_mask & (1 << next_hi_reg))
20752 pushable_regs &= ~((1 << regno) - 1);
20758 /* If we had to find a work register and we have not yet
20759 saved the LR then add it to the list of regs to push. */
20760 if (l_mask == (1 << LR_REGNUM))
20762 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20764 real_regs_mask | (1 << LR_REGNUM));
20768 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20773 /* Handle the case of a double word load into a low register from
20774 a computed memory address. The computed address may involve a
20775 register which is overwritten by the load. */
20777 thumb_load_double_from_address (rtx *operands)
20785 gcc_assert (GET_CODE (operands[0]) == REG);
20786 gcc_assert (GET_CODE (operands[1]) == MEM);
20788 /* Get the memory address. */
20789 addr = XEXP (operands[1], 0);
20791 /* Work out how the memory address is computed. */
20792 switch (GET_CODE (addr))
20795 operands[2] = adjust_address (operands[1], SImode, 4);
20797 if (REGNO (operands[0]) == REGNO (addr))
20799 output_asm_insn ("ldr\t%H0, %2", operands);
20800 output_asm_insn ("ldr\t%0, %1", operands);
20804 output_asm_insn ("ldr\t%0, %1", operands);
20805 output_asm_insn ("ldr\t%H0, %2", operands);
20810 /* Compute <address> + 4 for the high order load. */
20811 operands[2] = adjust_address (operands[1], SImode, 4);
20813 output_asm_insn ("ldr\t%0, %1", operands);
20814 output_asm_insn ("ldr\t%H0, %2", operands);
20818 arg1 = XEXP (addr, 0);
20819 arg2 = XEXP (addr, 1);
20821 if (CONSTANT_P (arg1))
20822 base = arg2, offset = arg1;
20824 base = arg1, offset = arg2;
20826 gcc_assert (GET_CODE (base) == REG);
20828 /* Catch the case of <address> = <reg> + <reg> */
20829 if (GET_CODE (offset) == REG)
20831 int reg_offset = REGNO (offset);
20832 int reg_base = REGNO (base);
20833 int reg_dest = REGNO (operands[0]);
20835 /* Add the base and offset registers together into the
20836 higher destination register. */
20837 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20838 reg_dest + 1, reg_base, reg_offset);
20840 /* Load the lower destination register from the address in
20841 the higher destination register. */
20842 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20843 reg_dest, reg_dest + 1);
20845 /* Load the higher destination register from its own address
20847 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20848 reg_dest + 1, reg_dest + 1);
20852 /* Compute <address> + 4 for the high order load. */
20853 operands[2] = adjust_address (operands[1], SImode, 4);
20855 /* If the computed address is held in the low order register
20856 then load the high order register first, otherwise always
20857 load the low order register first. */
20858 if (REGNO (operands[0]) == REGNO (base))
20860 output_asm_insn ("ldr\t%H0, %2", operands);
20861 output_asm_insn ("ldr\t%0, %1", operands);
20865 output_asm_insn ("ldr\t%0, %1", operands);
20866 output_asm_insn ("ldr\t%H0, %2", operands);
20872 /* With no registers to worry about we can just load the value
20874 operands[2] = adjust_address (operands[1], SImode, 4);
20876 output_asm_insn ("ldr\t%H0, %2", operands);
20877 output_asm_insn ("ldr\t%0, %1", operands);
20881 gcc_unreachable ();
20888 thumb_output_move_mem_multiple (int n, rtx *operands)
20895 if (REGNO (operands[4]) > REGNO (operands[5]))
20898 operands[4] = operands[5];
20901 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20902 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20906 if (REGNO (operands[4]) > REGNO (operands[5]))
20909 operands[4] = operands[5];
20912 if (REGNO (operands[5]) > REGNO (operands[6]))
20915 operands[5] = operands[6];
20918 if (REGNO (operands[4]) > REGNO (operands[5]))
20921 operands[4] = operands[5];
20925 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20926 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20930 gcc_unreachable ();
20936 /* Output a call-via instruction for thumb state. */
20938 thumb_call_via_reg (rtx reg)
20940 int regno = REGNO (reg);
20943 gcc_assert (regno < LR_REGNUM);
20945 /* If we are in the normal text section we can use a single instance
20946 per compilation unit. If we are doing function sections, then we need
20947 an entry per section, since we can't rely on reachability. */
20948 if (in_section == text_section)
20950 thumb_call_reg_needed = 1;
20952 if (thumb_call_via_label[regno] == NULL)
20953 thumb_call_via_label[regno] = gen_label_rtx ();
20954 labelp = thumb_call_via_label + regno;
20958 if (cfun->machine->call_via[regno] == NULL)
20959 cfun->machine->call_via[regno] = gen_label_rtx ();
20960 labelp = cfun->machine->call_via + regno;
20963 output_asm_insn ("bl\t%a0", labelp);
20967 /* Routines for generating rtl. */
20969 thumb_expand_movmemqi (rtx *operands)
20971 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20972 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20973 HOST_WIDE_INT len = INTVAL (operands[2]);
20974 HOST_WIDE_INT offset = 0;
20978 emit_insn (gen_movmem12b (out, in, out, in));
20984 emit_insn (gen_movmem8b (out, in, out, in));
20990 rtx reg = gen_reg_rtx (SImode);
20991 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20992 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20999 rtx reg = gen_reg_rtx (HImode);
21000 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21001 plus_constant (in, offset))));
21002 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21010 rtx reg = gen_reg_rtx (QImode);
21011 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21012 plus_constant (in, offset))));
21013 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21019 thumb_reload_out_hi (rtx *operands)
21021 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21024 /* Handle reading a half-word from memory during reload. */
21026 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21028 gcc_unreachable ();
21031 /* Return the length of a function name prefix
21032 that starts with the character 'c'. */
21034 arm_get_strip_length (int c)
21038 ARM_NAME_ENCODING_LENGTHS
21043 /* Return a pointer to a function's name with any
21044 and all prefix encodings stripped from it. */
21046 arm_strip_name_encoding (const char *name)
21050 while ((skip = arm_get_strip_length (* name)))
21056 /* If there is a '*' anywhere in the name's prefix, then
21057 emit the stripped name verbatim, otherwise prepend an
21058 underscore if leading underscores are being used. */
21060 arm_asm_output_labelref (FILE *stream, const char *name)
21065 while ((skip = arm_get_strip_length (* name)))
21067 verbatim |= (*name == '*');
21072 fputs (name, stream);
21074 asm_fprintf (stream, "%U%s", name);
21078 arm_file_start (void)
21082 if (TARGET_UNIFIED_ASM)
21083 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21087 const char *fpu_name;
21088 if (arm_selected_arch)
21089 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21091 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21093 if (TARGET_SOFT_FLOAT)
21096 fpu_name = "softvfp";
21098 fpu_name = "softfpa";
21102 fpu_name = arm_fpu_desc->name;
21103 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21105 if (TARGET_HARD_FLOAT)
21106 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21107 if (TARGET_HARD_FLOAT_ABI)
21108 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21111 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21113 /* Some of these attributes only apply when the corresponding features
21114 are used. However we don't have any easy way of figuring this out.
21115 Conservatively record the setting that would have been used. */
21117 /* Tag_ABI_FP_rounding. */
21118 if (flag_rounding_math)
21119 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21120 if (!flag_unsafe_math_optimizations)
21122 /* Tag_ABI_FP_denomal. */
21123 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21124 /* Tag_ABI_FP_exceptions. */
21125 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21127 /* Tag_ABI_FP_user_exceptions. */
21128 if (flag_signaling_nans)
21129 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21130 /* Tag_ABI_FP_number_model. */
21131 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21132 flag_finite_math_only ? 1 : 3);
21134 /* Tag_ABI_align8_needed. */
21135 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21136 /* Tag_ABI_align8_preserved. */
21137 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21138 /* Tag_ABI_enum_size. */
21139 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21140 flag_short_enums ? 1 : 2);
21142 /* Tag_ABI_optimization_goals. */
21145 else if (optimize >= 2)
21151 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21153 /* Tag_ABI_FP_16bit_format. */
21154 if (arm_fp16_format)
21155 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21156 (int)arm_fp16_format);
21158 if (arm_lang_output_object_attributes_hook)
21159 arm_lang_output_object_attributes_hook();
21161 default_file_start();
21165 arm_file_end (void)
21169 if (NEED_INDICATE_EXEC_STACK)
21170 /* Add .note.GNU-stack. */
21171 file_end_indicate_exec_stack ();
21173 if (! thumb_call_reg_needed)
21176 switch_to_section (text_section);
21177 asm_fprintf (asm_out_file, "\t.code 16\n");
21178 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21180 for (regno = 0; regno < LR_REGNUM; regno++)
21182 rtx label = thumb_call_via_label[regno];
21186 targetm.asm_out.internal_label (asm_out_file, "L",
21187 CODE_LABEL_NUMBER (label));
21188 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21194 /* Symbols in the text segment can be accessed without indirecting via the
21195 constant pool; it may take an extra binary operation, but this is still
21196 faster than indirecting via memory. Don't do this when not optimizing,
21197 since we won't be calculating al of the offsets necessary to do this
21201 arm_encode_section_info (tree decl, rtx rtl, int first)
21203 if (optimize > 0 && TREE_CONSTANT (decl))
21204 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21206 default_encode_section_info (decl, rtl, first);
21208 #endif /* !ARM_PE */
21211 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21213 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21214 && !strcmp (prefix, "L"))
21216 arm_ccfsm_state = 0;
21217 arm_target_insn = NULL;
21219 default_internal_label (stream, prefix, labelno);
21222 /* Output code to add DELTA to the first argument, and then jump
21223 to FUNCTION. Used for C++ multiple inheritance. */
21225 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21226 HOST_WIDE_INT delta,
21227 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21230 static int thunk_label = 0;
21233 int mi_delta = delta;
21234 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21236 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21239 mi_delta = - mi_delta;
21243 int labelno = thunk_label++;
21244 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21245 /* Thunks are entered in arm mode when avaiable. */
21246 if (TARGET_THUMB1_ONLY)
21248 /* push r3 so we can use it as a temporary. */
21249 /* TODO: Omit this save if r3 is not used. */
21250 fputs ("\tpush {r3}\n", file);
21251 fputs ("\tldr\tr3, ", file);
21255 fputs ("\tldr\tr12, ", file);
21257 assemble_name (file, label);
21258 fputc ('\n', file);
21261 /* If we are generating PIC, the ldr instruction below loads
21262 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21263 the address of the add + 8, so we have:
21265 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21268 Note that we have "+ 1" because some versions of GNU ld
21269 don't set the low bit of the result for R_ARM_REL32
21270 relocations against thumb function symbols.
21271 On ARMv6M this is +4, not +8. */
21272 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21273 assemble_name (file, labelpc);
21274 fputs (":\n", file);
21275 if (TARGET_THUMB1_ONLY)
21277 /* This is 2 insns after the start of the thunk, so we know it
21278 is 4-byte aligned. */
21279 fputs ("\tadd\tr3, pc, r3\n", file);
21280 fputs ("\tmov r12, r3\n", file);
21283 fputs ("\tadd\tr12, pc, r12\n", file);
21285 else if (TARGET_THUMB1_ONLY)
21286 fputs ("\tmov r12, r3\n", file);
21288 if (TARGET_THUMB1_ONLY)
21290 if (mi_delta > 255)
21292 fputs ("\tldr\tr3, ", file);
21293 assemble_name (file, label);
21294 fputs ("+4\n", file);
21295 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21296 mi_op, this_regno, this_regno);
21298 else if (mi_delta != 0)
21300 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21301 mi_op, this_regno, this_regno,
21307 /* TODO: Use movw/movt for large constants when available. */
21308 while (mi_delta != 0)
21310 if ((mi_delta & (3 << shift)) == 0)
21314 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21315 mi_op, this_regno, this_regno,
21316 mi_delta & (0xff << shift));
21317 mi_delta &= ~(0xff << shift);
21324 if (TARGET_THUMB1_ONLY)
21325 fputs ("\tpop\t{r3}\n", file);
21327 fprintf (file, "\tbx\tr12\n");
21328 ASM_OUTPUT_ALIGN (file, 2);
21329 assemble_name (file, label);
21330 fputs (":\n", file);
21333 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21334 rtx tem = XEXP (DECL_RTL (function), 0);
21335 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21336 tem = gen_rtx_MINUS (GET_MODE (tem),
21338 gen_rtx_SYMBOL_REF (Pmode,
21339 ggc_strdup (labelpc)));
21340 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21343 /* Output ".word .LTHUNKn". */
21344 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21346 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21347 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21351 fputs ("\tb\t", file);
21352 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21353 if (NEED_PLT_RELOC)
21354 fputs ("(PLT)", file);
21355 fputc ('\n', file);
21360 arm_emit_vector_const (FILE *file, rtx x)
21363 const char * pattern;
21365 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21367 switch (GET_MODE (x))
21369 case V2SImode: pattern = "%08x"; break;
21370 case V4HImode: pattern = "%04x"; break;
21371 case V8QImode: pattern = "%02x"; break;
21372 default: gcc_unreachable ();
21375 fprintf (file, "0x");
21376 for (i = CONST_VECTOR_NUNITS (x); i--;)
21380 element = CONST_VECTOR_ELT (x, i);
21381 fprintf (file, pattern, INTVAL (element));
21387 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21388 HFmode constant pool entries are actually loaded with ldr. */
21390 arm_emit_fp16_const (rtx c)
21395 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21396 bits = real_to_target (NULL, &r, HFmode);
21397 if (WORDS_BIG_ENDIAN)
21398 assemble_zeros (2);
21399 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21400 if (!WORDS_BIG_ENDIAN)
21401 assemble_zeros (2);
21405 arm_output_load_gr (rtx *operands)
21412 if (GET_CODE (operands [1]) != MEM
21413 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21414 || GET_CODE (reg = XEXP (sum, 0)) != REG
21415 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21416 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21417 return "wldrw%?\t%0, %1";
21419 /* Fix up an out-of-range load of a GR register. */
21420 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21421 wcgr = operands[0];
21423 output_asm_insn ("ldr%?\t%0, %1", operands);
21425 operands[0] = wcgr;
21427 output_asm_insn ("tmcr%?\t%0, %1", operands);
21428 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21433 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21435 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21436 named arg and all anonymous args onto the stack.
21437 XXX I know the prologue shouldn't be pushing registers, but it is faster
21441 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21442 enum machine_mode mode,
21445 int second_time ATTRIBUTE_UNUSED)
21449 cfun->machine->uses_anonymous_args = 1;
21450 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21452 nregs = pcum->aapcs_ncrn;
21453 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21457 nregs = pcum->nregs;
21459 if (nregs < NUM_ARG_REGS)
21460 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21463 /* Return nonzero if the CONSUMER instruction (a store) does not need
21464 PRODUCER's value to calculate the address. */
21467 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21469 rtx value = PATTERN (producer);
21470 rtx addr = PATTERN (consumer);
21472 if (GET_CODE (value) == COND_EXEC)
21473 value = COND_EXEC_CODE (value);
21474 if (GET_CODE (value) == PARALLEL)
21475 value = XVECEXP (value, 0, 0);
21476 value = XEXP (value, 0);
21477 if (GET_CODE (addr) == COND_EXEC)
21478 addr = COND_EXEC_CODE (addr);
21479 if (GET_CODE (addr) == PARALLEL)
21480 addr = XVECEXP (addr, 0, 0);
21481 addr = XEXP (addr, 0);
21483 return !reg_overlap_mentioned_p (value, addr);
21486 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21487 have an early register shift value or amount dependency on the
21488 result of PRODUCER. */
21491 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21493 rtx value = PATTERN (producer);
21494 rtx op = PATTERN (consumer);
21497 if (GET_CODE (value) == COND_EXEC)
21498 value = COND_EXEC_CODE (value);
21499 if (GET_CODE (value) == PARALLEL)
21500 value = XVECEXP (value, 0, 0);
21501 value = XEXP (value, 0);
21502 if (GET_CODE (op) == COND_EXEC)
21503 op = COND_EXEC_CODE (op);
21504 if (GET_CODE (op) == PARALLEL)
21505 op = XVECEXP (op, 0, 0);
21508 early_op = XEXP (op, 0);
21509 /* This is either an actual independent shift, or a shift applied to
21510 the first operand of another operation. We want the whole shift
21512 if (GET_CODE (early_op) == REG)
21515 return !reg_overlap_mentioned_p (value, early_op);
21518 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21519 have an early register shift value dependency on the result of
21523 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21525 rtx value = PATTERN (producer);
21526 rtx op = PATTERN (consumer);
21529 if (GET_CODE (value) == COND_EXEC)
21530 value = COND_EXEC_CODE (value);
21531 if (GET_CODE (value) == PARALLEL)
21532 value = XVECEXP (value, 0, 0);
21533 value = XEXP (value, 0);
21534 if (GET_CODE (op) == COND_EXEC)
21535 op = COND_EXEC_CODE (op);
21536 if (GET_CODE (op) == PARALLEL)
21537 op = XVECEXP (op, 0, 0);
21540 early_op = XEXP (op, 0);
21542 /* This is either an actual independent shift, or a shift applied to
21543 the first operand of another operation. We want the value being
21544 shifted, in either case. */
21545 if (GET_CODE (early_op) != REG)
21546 early_op = XEXP (early_op, 0);
21548 return !reg_overlap_mentioned_p (value, early_op);
21551 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21552 have an early register mult dependency on the result of
21556 arm_no_early_mul_dep (rtx producer, rtx consumer)
21558 rtx value = PATTERN (producer);
21559 rtx op = PATTERN (consumer);
21561 if (GET_CODE (value) == COND_EXEC)
21562 value = COND_EXEC_CODE (value);
21563 if (GET_CODE (value) == PARALLEL)
21564 value = XVECEXP (value, 0, 0);
21565 value = XEXP (value, 0);
21566 if (GET_CODE (op) == COND_EXEC)
21567 op = COND_EXEC_CODE (op);
21568 if (GET_CODE (op) == PARALLEL)
21569 op = XVECEXP (op, 0, 0);
21572 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21574 if (GET_CODE (XEXP (op, 0)) == MULT)
21575 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21577 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21583 /* We can't rely on the caller doing the proper promotion when
21584 using APCS or ATPCS. */
21587 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21589 return !TARGET_AAPCS_BASED;
21592 static enum machine_mode
21593 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21594 enum machine_mode mode,
21595 int *punsignedp ATTRIBUTE_UNUSED,
21596 const_tree fntype ATTRIBUTE_UNUSED,
21597 int for_return ATTRIBUTE_UNUSED)
21599 if (GET_MODE_CLASS (mode) == MODE_INT
21600 && GET_MODE_SIZE (mode) < 4)
21606 /* AAPCS based ABIs use short enums by default. */
21609 arm_default_short_enums (void)
21611 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21615 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21618 arm_align_anon_bitfield (void)
21620 return TARGET_AAPCS_BASED;
21624 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21627 arm_cxx_guard_type (void)
21629 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21632 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21633 has an accumulator dependency on the result of the producer (a
21634 multiplication instruction) and no other dependency on that result. */
21636 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21638 rtx mul = PATTERN (producer);
21639 rtx mac = PATTERN (consumer);
21641 rtx mac_op0, mac_op1, mac_acc;
21643 if (GET_CODE (mul) == COND_EXEC)
21644 mul = COND_EXEC_CODE (mul);
21645 if (GET_CODE (mac) == COND_EXEC)
21646 mac = COND_EXEC_CODE (mac);
21648 /* Check that mul is of the form (set (...) (mult ...))
21649 and mla is of the form (set (...) (plus (mult ...) (...))). */
21650 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21651 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21652 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21655 mul_result = XEXP (mul, 0);
21656 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21657 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21658 mac_acc = XEXP (XEXP (mac, 1), 1);
21660 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21661 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21662 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21666 /* The EABI says test the least significant bit of a guard variable. */
21669 arm_cxx_guard_mask_bit (void)
21671 return TARGET_AAPCS_BASED;
21675 /* The EABI specifies that all array cookies are 8 bytes long. */
21678 arm_get_cookie_size (tree type)
21682 if (!TARGET_AAPCS_BASED)
21683 return default_cxx_get_cookie_size (type);
21685 size = build_int_cst (sizetype, 8);
21690 /* The EABI says that array cookies should also contain the element size. */
21693 arm_cookie_has_size (void)
21695 return TARGET_AAPCS_BASED;
21699 /* The EABI says constructors and destructors should return a pointer to
21700 the object constructed/destroyed. */
21703 arm_cxx_cdtor_returns_this (void)
21705 return TARGET_AAPCS_BASED;
21708 /* The EABI says that an inline function may never be the key
21712 arm_cxx_key_method_may_be_inline (void)
21714 return !TARGET_AAPCS_BASED;
21718 arm_cxx_determine_class_data_visibility (tree decl)
21720 if (!TARGET_AAPCS_BASED
21721 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21724 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21725 is exported. However, on systems without dynamic vague linkage,
21726 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21727 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21728 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21730 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21731 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21735 arm_cxx_class_data_always_comdat (void)
21737 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21738 vague linkage if the class has no key function. */
21739 return !TARGET_AAPCS_BASED;
21743 /* The EABI says __aeabi_atexit should be used to register static
21747 arm_cxx_use_aeabi_atexit (void)
21749 return TARGET_AAPCS_BASED;
21754 arm_set_return_address (rtx source, rtx scratch)
21756 arm_stack_offsets *offsets;
21757 HOST_WIDE_INT delta;
21759 unsigned long saved_regs;
21761 offsets = arm_get_frame_offsets ();
21762 saved_regs = offsets->saved_regs_mask;
21764 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21765 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21768 if (frame_pointer_needed)
21769 addr = plus_constant(hard_frame_pointer_rtx, -4);
21772 /* LR will be the first saved register. */
21773 delta = offsets->outgoing_args - (offsets->frame + 4);
21778 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21779 GEN_INT (delta & ~4095)));
21784 addr = stack_pointer_rtx;
21786 addr = plus_constant (addr, delta);
21788 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21794 thumb_set_return_address (rtx source, rtx scratch)
21796 arm_stack_offsets *offsets;
21797 HOST_WIDE_INT delta;
21798 HOST_WIDE_INT limit;
21801 unsigned long mask;
21805 offsets = arm_get_frame_offsets ();
21806 mask = offsets->saved_regs_mask;
21807 if (mask & (1 << LR_REGNUM))
21810 /* Find the saved regs. */
21811 if (frame_pointer_needed)
21813 delta = offsets->soft_frame - offsets->saved_args;
21814 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21820 delta = offsets->outgoing_args - offsets->saved_args;
21823 /* Allow for the stack frame. */
21824 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21826 /* The link register is always the first saved register. */
21829 /* Construct the address. */
21830 addr = gen_rtx_REG (SImode, reg);
21833 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21834 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21838 addr = plus_constant (addr, delta);
21840 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21843 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21846 /* Implements target hook vector_mode_supported_p. */
21848 arm_vector_mode_supported_p (enum machine_mode mode)
21850 /* Neon also supports V2SImode, etc. listed in the clause below. */
21851 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21852 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21855 if ((TARGET_NEON || TARGET_IWMMXT)
21856 && ((mode == V2SImode)
21857 || (mode == V4HImode)
21858 || (mode == V8QImode)))
21864 /* Implements target hook small_register_classes_for_mode_p. */
21866 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21868 return TARGET_THUMB1;
21871 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21872 ARM insns and therefore guarantee that the shift count is modulo 256.
21873 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21874 guarantee no particular behavior for out-of-range counts. */
21876 static unsigned HOST_WIDE_INT
21877 arm_shift_truncation_mask (enum machine_mode mode)
21879 return mode == SImode ? 255 : 0;
21883 /* Map internal gcc register numbers to DWARF2 register numbers. */
21886 arm_dbx_register_number (unsigned int regno)
21891 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21892 compatibility. The EABI defines them as registers 96-103. */
21893 if (IS_FPA_REGNUM (regno))
21894 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21896 if (IS_VFP_REGNUM (regno))
21898 /* See comment in arm_dwarf_register_span. */
21899 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21900 return 64 + regno - FIRST_VFP_REGNUM;
21902 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21905 if (IS_IWMMXT_GR_REGNUM (regno))
21906 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21908 if (IS_IWMMXT_REGNUM (regno))
21909 return 112 + regno - FIRST_IWMMXT_REGNUM;
21911 gcc_unreachable ();
21914 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21915 GCC models tham as 64 32-bit registers, so we need to describe this to
21916 the DWARF generation code. Other registers can use the default. */
21918 arm_dwarf_register_span (rtx rtl)
21925 regno = REGNO (rtl);
21926 if (!IS_VFP_REGNUM (regno))
21929 /* XXX FIXME: The EABI defines two VFP register ranges:
21930 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21932 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21933 corresponding D register. Until GDB supports this, we shall use the
21934 legacy encodings. We also use these encodings for D0-D15 for
21935 compatibility with older debuggers. */
21936 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21939 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21940 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21941 regno = (regno - FIRST_VFP_REGNUM) / 2;
21942 for (i = 0; i < nregs; i++)
21943 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21948 #ifdef TARGET_UNWIND_INFO
21949 /* Emit unwind directives for a store-multiple instruction or stack pointer
21950 push during alignment.
21951 These should only ever be generated by the function prologue code, so
21952 expect them to have a particular form. */
21955 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21958 HOST_WIDE_INT offset;
21959 HOST_WIDE_INT nregs;
21965 e = XVECEXP (p, 0, 0);
21966 if (GET_CODE (e) != SET)
21969 /* First insn will adjust the stack pointer. */
21970 if (GET_CODE (e) != SET
21971 || GET_CODE (XEXP (e, 0)) != REG
21972 || REGNO (XEXP (e, 0)) != SP_REGNUM
21973 || GET_CODE (XEXP (e, 1)) != PLUS)
21976 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21977 nregs = XVECLEN (p, 0) - 1;
21979 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21982 /* The function prologue may also push pc, but not annotate it as it is
21983 never restored. We turn this into a stack pointer adjustment. */
21984 if (nregs * 4 == offset - 4)
21986 fprintf (asm_out_file, "\t.pad #4\n");
21990 fprintf (asm_out_file, "\t.save {");
21992 else if (IS_VFP_REGNUM (reg))
21995 fprintf (asm_out_file, "\t.vsave {");
21997 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21999 /* FPA registers are done differently. */
22000 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22004 /* Unknown register type. */
22007 /* If the stack increment doesn't match the size of the saved registers,
22008 something has gone horribly wrong. */
22009 if (offset != nregs * reg_size)
22014 /* The remaining insns will describe the stores. */
22015 for (i = 1; i <= nregs; i++)
22017 /* Expect (set (mem <addr>) (reg)).
22018 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22019 e = XVECEXP (p, 0, i);
22020 if (GET_CODE (e) != SET
22021 || GET_CODE (XEXP (e, 0)) != MEM
22022 || GET_CODE (XEXP (e, 1)) != REG)
22025 reg = REGNO (XEXP (e, 1));
22030 fprintf (asm_out_file, ", ");
22031 /* We can't use %r for vfp because we need to use the
22032 double precision register names. */
22033 if (IS_VFP_REGNUM (reg))
22034 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22036 asm_fprintf (asm_out_file, "%r", reg);
22038 #ifdef ENABLE_CHECKING
22039 /* Check that the addresses are consecutive. */
22040 e = XEXP (XEXP (e, 0), 0);
22041 if (GET_CODE (e) == PLUS)
22043 offset += reg_size;
22044 if (GET_CODE (XEXP (e, 0)) != REG
22045 || REGNO (XEXP (e, 0)) != SP_REGNUM
22046 || GET_CODE (XEXP (e, 1)) != CONST_INT
22047 || offset != INTVAL (XEXP (e, 1)))
22051 || GET_CODE (e) != REG
22052 || REGNO (e) != SP_REGNUM)
22056 fprintf (asm_out_file, "}\n");
22059 /* Emit unwind directives for a SET. */
22062 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22070 switch (GET_CODE (e0))
22073 /* Pushing a single register. */
22074 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22075 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22076 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22079 asm_fprintf (asm_out_file, "\t.save ");
22080 if (IS_VFP_REGNUM (REGNO (e1)))
22081 asm_fprintf(asm_out_file, "{d%d}\n",
22082 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22084 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22088 if (REGNO (e0) == SP_REGNUM)
22090 /* A stack increment. */
22091 if (GET_CODE (e1) != PLUS
22092 || GET_CODE (XEXP (e1, 0)) != REG
22093 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22094 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22097 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22098 -INTVAL (XEXP (e1, 1)));
22100 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22102 HOST_WIDE_INT offset;
22104 if (GET_CODE (e1) == PLUS)
22106 if (GET_CODE (XEXP (e1, 0)) != REG
22107 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22109 reg = REGNO (XEXP (e1, 0));
22110 offset = INTVAL (XEXP (e1, 1));
22111 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22112 HARD_FRAME_POINTER_REGNUM, reg,
22115 else if (GET_CODE (e1) == REG)
22118 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22119 HARD_FRAME_POINTER_REGNUM, reg);
22124 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22126 /* Move from sp to reg. */
22127 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22129 else if (GET_CODE (e1) == PLUS
22130 && GET_CODE (XEXP (e1, 0)) == REG
22131 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22132 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22134 /* Set reg to offset from sp. */
22135 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22136 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22138 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22140 /* Stack pointer save before alignment. */
22142 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22155 /* Emit unwind directives for the given insn. */
22158 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22162 if (!ARM_EABI_UNWIND_TABLES)
22165 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22166 && (TREE_NOTHROW (current_function_decl)
22167 || crtl->all_throwers_are_sibcalls))
22170 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22173 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22175 pat = XEXP (pat, 0);
22177 pat = PATTERN (insn);
22179 switch (GET_CODE (pat))
22182 arm_unwind_emit_set (asm_out_file, pat);
22186 /* Store multiple. */
22187 arm_unwind_emit_sequence (asm_out_file, pat);
22196 /* Output a reference from a function exception table to the type_info
22197 object X. The EABI specifies that the symbol should be relocated by
22198 an R_ARM_TARGET2 relocation. */
22201 arm_output_ttype (rtx x)
22203 fputs ("\t.word\t", asm_out_file);
22204 output_addr_const (asm_out_file, x);
22205 /* Use special relocations for symbol references. */
22206 if (GET_CODE (x) != CONST_INT)
22207 fputs ("(TARGET2)", asm_out_file);
22208 fputc ('\n', asm_out_file);
22212 #endif /* TARGET_UNWIND_INFO */
22215 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22216 stack alignment. */
22219 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22221 rtx unspec = SET_SRC (pattern);
22222 gcc_assert (GET_CODE (unspec) == UNSPEC);
22226 case UNSPEC_STACK_ALIGN:
22227 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22228 put anything on the stack, so hopefully it won't matter.
22229 CFA = SP will be correct after alignment. */
22230 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22231 SET_DEST (pattern));
22234 gcc_unreachable ();
22239 /* Output unwind directives for the start/end of a function. */
22242 arm_output_fn_unwind (FILE * f, bool prologue)
22244 if (!ARM_EABI_UNWIND_TABLES)
22248 fputs ("\t.fnstart\n", f);
22251 /* If this function will never be unwound, then mark it as such.
22252 The came condition is used in arm_unwind_emit to suppress
22253 the frame annotations. */
22254 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22255 && (TREE_NOTHROW (current_function_decl)
22256 || crtl->all_throwers_are_sibcalls))
22257 fputs("\t.cantunwind\n", f);
22259 fputs ("\t.fnend\n", f);
22264 arm_emit_tls_decoration (FILE *fp, rtx x)
22266 enum tls_reloc reloc;
22269 val = XVECEXP (x, 0, 0);
22270 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22272 output_addr_const (fp, val);
22277 fputs ("(tlsgd)", fp);
22280 fputs ("(tlsldm)", fp);
22283 fputs ("(tlsldo)", fp);
22286 fputs ("(gottpoff)", fp);
22289 fputs ("(tpoff)", fp);
22292 gcc_unreachable ();
22300 fputs (" + (. - ", fp);
22301 output_addr_const (fp, XVECEXP (x, 0, 2));
22303 output_addr_const (fp, XVECEXP (x, 0, 3));
22313 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22316 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22318 gcc_assert (size == 4);
22319 fputs ("\t.word\t", file);
22320 output_addr_const (file, x);
22321 fputs ("(tlsldo)", file);
22325 arm_output_addr_const_extra (FILE *fp, rtx x)
22327 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22328 return arm_emit_tls_decoration (fp, x);
22329 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22332 int labelno = INTVAL (XVECEXP (x, 0, 0));
22334 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22335 assemble_name_raw (fp, label);
22339 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22341 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22345 output_addr_const (fp, XVECEXP (x, 0, 0));
22349 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22351 output_addr_const (fp, XVECEXP (x, 0, 0));
22355 output_addr_const (fp, XVECEXP (x, 0, 1));
22359 else if (GET_CODE (x) == CONST_VECTOR)
22360 return arm_emit_vector_const (fp, x);
22365 /* Output assembly for a shift instruction.
22366 SET_FLAGS determines how the instruction modifies the condition codes.
22367 0 - Do not set condition codes.
22368 1 - Set condition codes.
22369 2 - Use smallest instruction. */
22371 arm_output_shift(rtx * operands, int set_flags)
22374 static const char flag_chars[3] = {'?', '.', '!'};
22379 c = flag_chars[set_flags];
22380 if (TARGET_UNIFIED_ASM)
22382 shift = shift_op(operands[3], &val);
22386 operands[2] = GEN_INT(val);
22387 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22390 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22393 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22394 output_asm_insn (pattern, operands);
22398 /* Output a Thumb-1 casesi dispatch sequence. */
22400 thumb1_output_casesi (rtx *operands)
22402 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22404 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22406 switch (GET_MODE(diff_vec))
22409 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22410 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22412 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22413 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22415 return "bl\t%___gnu_thumb1_case_si";
22417 gcc_unreachable ();
22421 /* Output a Thumb-2 casesi instruction. */
22423 thumb2_output_casesi (rtx *operands)
22425 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22427 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22429 output_asm_insn ("cmp\t%0, %1", operands);
22430 output_asm_insn ("bhi\t%l3", operands);
22431 switch (GET_MODE(diff_vec))
22434 return "tbb\t[%|pc, %0]";
22436 return "tbh\t[%|pc, %0, lsl #1]";
22440 output_asm_insn ("adr\t%4, %l2", operands);
22441 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22442 output_asm_insn ("add\t%4, %4, %5", operands);
22447 output_asm_insn ("adr\t%4, %l2", operands);
22448 return "ldr\t%|pc, [%4, %0, lsl #2]";
22451 gcc_unreachable ();
22455 /* Most ARM cores are single issue, but some newer ones can dual issue.
22456 The scheduler descriptions rely on this being correct. */
22458 arm_issue_rate (void)
22474 /* A table and a function to perform ARM-specific name mangling for
22475 NEON vector types in order to conform to the AAPCS (see "Procedure
22476 Call Standard for the ARM Architecture", Appendix A). To qualify
22477 for emission with the mangled names defined in that document, a
22478 vector type must not only be of the correct mode but also be
22479 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22482 enum machine_mode mode;
22483 const char *element_type_name;
22484 const char *aapcs_name;
22485 } arm_mangle_map_entry;
22487 static arm_mangle_map_entry arm_mangle_map[] = {
22488 /* 64-bit containerized types. */
22489 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22490 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22491 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22492 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22493 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22494 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22495 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22496 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22497 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22498 /* 128-bit containerized types. */
22499 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22500 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22501 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22502 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22503 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22504 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22505 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22506 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22507 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22508 { VOIDmode, NULL, NULL }
22512 arm_mangle_type (const_tree type)
22514 arm_mangle_map_entry *pos = arm_mangle_map;
22516 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22517 has to be managled as if it is in the "std" namespace. */
22518 if (TARGET_AAPCS_BASED
22519 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22521 static bool warned;
22522 if (!warned && warn_psabi && !in_system_header)
22525 inform (input_location,
22526 "the mangling of %<va_list%> has changed in GCC 4.4");
22528 return "St9__va_list";
22531 /* Half-precision float. */
22532 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22535 if (TREE_CODE (type) != VECTOR_TYPE)
22538 /* Check the mode of the vector type, and the name of the vector
22539 element type, against the table. */
22540 while (pos->mode != VOIDmode)
22542 tree elt_type = TREE_TYPE (type);
22544 if (pos->mode == TYPE_MODE (type)
22545 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22546 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22547 pos->element_type_name))
22548 return pos->aapcs_name;
22553 /* Use the default mangling for unrecognized (possibly user-defined)
22558 /* Order of allocation of core registers for Thumb: this allocation is
22559 written over the corresponding initial entries of the array
22560 initialized with REG_ALLOC_ORDER. We allocate all low registers
22561 first. Saving and restoring a low register is usually cheaper than
22562 using a call-clobbered high register. */
22564 static const int thumb_core_reg_alloc_order[] =
22566 3, 2, 1, 0, 4, 5, 6, 7,
22567 14, 12, 8, 9, 10, 11, 13, 15
22570 /* Adjust register allocation order when compiling for Thumb. */
22573 arm_order_regs_for_local_alloc (void)
22575 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22576 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22578 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22579 sizeof (thumb_core_reg_alloc_order));
22582 /* Set default optimization options. */
22584 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22586 /* Enable section anchors by default at -O1 or higher.
22587 Use 2 to distinguish from an explicit -fsection-anchors
22588 given on the command line. */
22590 flag_section_anchors = 2;
22593 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22596 arm_frame_pointer_required (void)
22598 return (cfun->has_nonlocal_label
22599 || SUBTARGET_FRAME_POINTER_REQUIRED
22600 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22603 /* Only thumb1 can't support conditional execution, so return true if
22604 the target is not thumb1. */
22606 arm_have_conditional_execution (void)
22608 return !TARGET_THUMB1;
22611 /* Legitimize a memory reference for sync primitive implemented using
22612 ldrex / strex. We currently force the form of the reference to be
22613 indirect without offset. We do not yet support the indirect offset
22614 addressing supported by some ARM targets for these
22617 arm_legitimize_sync_memory (rtx memory)
22619 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22620 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22622 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22623 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22624 return legitimate_memory;
22627 /* An instruction emitter. */
22628 typedef void (* emit_f) (int label, const char *, rtx *);
22630 /* An instruction emitter that emits via the conventional
22631 output_asm_insn. */
22633 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22635 output_asm_insn (pattern, operands);
22638 /* Count the number of emitted synchronization instructions. */
22639 static unsigned arm_insn_count;
22641 /* An emitter that counts emitted instructions but does not actually
22642 emit instruction into the the instruction stream. */
22644 arm_count (int label,
22645 const char *pattern ATTRIBUTE_UNUSED,
22646 rtx *operands ATTRIBUTE_UNUSED)
22652 /* Construct a pattern using conventional output formatting and feed
22653 it to output_asm_insn. Provides a mechanism to construct the
22654 output pattern on the fly. Note the hard limit on the pattern
22657 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
22658 const char *pattern, ...)
22663 va_start (ap, pattern);
22664 vsprintf (buffer, pattern, ap);
22666 emit (label, buffer, operands);
22669 /* Emit the memory barrier instruction, if any, provided by this
22670 target to a specified emitter. */
22672 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
22674 if (TARGET_HAVE_DMB)
22676 /* Note we issue a system level barrier. We should consider
22677 issuing a inner shareabilty zone barrier here instead, ie.
22679 emit (0, "dmb\tsy", operands);
22683 if (TARGET_HAVE_DMB_MCR)
22685 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
22689 gcc_unreachable ();
22692 /* Emit the memory barrier instruction, if any, provided by this
22695 arm_output_memory_barrier (rtx *operands)
22697 arm_process_output_memory_barrier (arm_emit, operands);
22701 /* Helper to figure out the instruction suffix required on ldrex/strex
22702 for operations on an object of the specified mode. */
22703 static const char *
22704 arm_ldrex_suffix (enum machine_mode mode)
22708 case QImode: return "b";
22709 case HImode: return "h";
22710 case SImode: return "";
22711 case DImode: return "d";
22713 gcc_unreachable ();
22718 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
22721 arm_output_ldrex (emit_f emit,
22722 enum machine_mode mode,
22726 const char *suffix = arm_ldrex_suffix (mode);
22729 operands[0] = target;
22730 operands[1] = memory;
22731 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
22734 /* Emit a strex{b,h,d, } instruction appropriate for the specified
22737 arm_output_strex (emit_f emit,
22738 enum machine_mode mode,
22744 const char *suffix = arm_ldrex_suffix (mode);
22747 operands[0] = result;
22748 operands[1] = value;
22749 operands[2] = memory;
22750 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
22754 /* Helper to emit a two operand instruction. */
22756 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
22762 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
22765 /* Helper to emit a three operand instruction. */
22767 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
22774 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
22777 /* Emit a load store exclusive synchronization loop.
22781 if old_value != required_value
22783 t1 = sync_op (old_value, new_value)
22784 [mem] = t1, t2 = [0|1]
22788 t1 == t2 is not permitted
22789 t1 == old_value is permitted
22793 RTX register or const_int representing the required old_value for
22794 the modify to continue, if NULL no comparsion is performed. */
22796 arm_output_sync_loop (emit_f emit,
22797 enum machine_mode mode,
22800 rtx required_value,
22804 enum attr_sync_op sync_op,
22805 int early_barrier_required)
22809 gcc_assert (t1 != t2);
22811 if (early_barrier_required)
22812 arm_process_output_memory_barrier (emit, NULL);
22814 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
22816 arm_output_ldrex (emit, mode, old_value, memory);
22818 if (required_value)
22822 operands[0] = old_value;
22823 operands[1] = required_value;
22824 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
22825 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
22831 arm_output_op3 (emit, "add", t1, old_value, new_value);
22835 arm_output_op3 (emit, "sub", t1, old_value, new_value);
22839 arm_output_op3 (emit, "orr", t1, old_value, new_value);
22843 arm_output_op3 (emit, "eor", t1, old_value, new_value);
22847 arm_output_op3 (emit,"and", t1, old_value, new_value);
22851 arm_output_op3 (emit, "and", t1, old_value, new_value);
22852 arm_output_op2 (emit, "mvn", t1, t1);
22860 arm_output_strex (emit, mode, "", t2, t1, memory);
22862 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
22863 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
22865 arm_process_output_memory_barrier (emit, NULL);
22866 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
22870 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
22873 default_value = operands[index - 1];
22875 return default_value;
22878 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
22879 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
22881 /* Extract the operands for a synchroniztion instruction from the
22882 instructions attributes and emit the instruction. */
22884 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
22886 rtx result, memory, required_value, new_value, t1, t2;
22888 enum machine_mode mode;
22889 enum attr_sync_op sync_op;
22891 result = FETCH_SYNC_OPERAND(result, 0);
22892 memory = FETCH_SYNC_OPERAND(memory, 0);
22893 required_value = FETCH_SYNC_OPERAND(required_value, 0);
22894 new_value = FETCH_SYNC_OPERAND(new_value, 0);
22895 t1 = FETCH_SYNC_OPERAND(t1, 0);
22896 t2 = FETCH_SYNC_OPERAND(t2, 0);
22898 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
22899 sync_op = get_attr_sync_op (insn);
22900 mode = GET_MODE (memory);
22902 arm_output_sync_loop (emit, mode, result, memory, required_value,
22903 new_value, t1, t2, sync_op, early_barrier);
22906 /* Emit a synchronization instruction loop. */
22908 arm_output_sync_insn (rtx insn, rtx *operands)
22910 arm_process_output_sync_insn (arm_emit, insn, operands);
22914 /* Count the number of machine instruction that will be emitted for a
22915 synchronization instruction. Note that the emitter used does not
22916 emit instructions, it just counts instructions being carefull not
22917 to count labels. */
22919 arm_sync_loop_insns (rtx insn, rtx *operands)
22921 arm_insn_count = 0;
22922 arm_process_output_sync_insn (arm_count, insn, operands);
22923 return arm_insn_count;
22926 /* Helper to call a target sync instruction generator, dealing with
22927 the variation in operands required by the different generators. */
22929 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
22930 rtx memory, rtx required_value, rtx new_value)
22932 switch (generator->op)
22934 case arm_sync_generator_omn:
22935 gcc_assert (! required_value);
22936 return generator->u.omn (old_value, memory, new_value);
22938 case arm_sync_generator_omrn:
22939 gcc_assert (required_value);
22940 return generator->u.omrn (old_value, memory, required_value, new_value);
22946 /* Expand a synchronization loop. The synchronization loop is expanded
22947 as an opaque block of instructions in order to ensure that we do
22948 not subsequently get extraneous memory accesses inserted within the
22949 critical region. The exclusive access property of ldrex/strex is
22950 only guaranteed in there are no intervening memory accesses. */
22952 arm_expand_sync (enum machine_mode mode,
22953 struct arm_sync_generator *generator,
22954 rtx target, rtx memory, rtx required_value, rtx new_value)
22956 if (target == NULL)
22957 target = gen_reg_rtx (mode);
22959 memory = arm_legitimize_sync_memory (memory);
22960 if (mode != SImode)
22962 rtx load_temp = gen_reg_rtx (SImode);
22964 if (required_value)
22965 required_value = convert_modes (SImode, mode, required_value, true);
22967 new_value = convert_modes (SImode, mode, new_value, true);
22968 emit_insn (arm_call_generator (generator, load_temp, memory,
22969 required_value, new_value));
22970 emit_move_insn (target, gen_lowpart (mode, load_temp));
22974 emit_insn (arm_call_generator (generator, target, memory, required_value,
22979 #include "gt-arm.h"