1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static void arm_print_operand (FILE *, rtx, int);
88 static void arm_print_operand_address (FILE *, rtx);
89 static bool arm_print_operand_punct_valid_p (unsigned char code);
90 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
91 static arm_cc get_arm_condition_code (rtx);
92 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
93 static rtx is_jump_table (rtx);
94 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 static const char *shift_op (rtx, HOST_WIDE_INT *);
97 static struct machine_function *arm_init_machine_status (void);
98 static void thumb_exit (FILE *, int);
99 static rtx is_jump_table (rtx);
100 static HOST_WIDE_INT get_jump_table_size (rtx);
101 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
102 static Mnode *add_minipool_forward_ref (Mfix *);
103 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_backward_ref (Mfix *);
105 static void assign_minipool_offsets (Mfix *);
106 static void arm_print_value (FILE *, rtx);
107 static void dump_minipool (rtx);
108 static int arm_barrier_cost (rtx);
109 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
110 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
111 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 static void arm_reorg (void);
114 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
115 static unsigned long arm_compute_save_reg0_reg12_mask (void);
116 static unsigned long arm_compute_save_reg_mask (void);
117 static unsigned long arm_isr_value (tree);
118 static unsigned long arm_compute_func_type (void);
119 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
120 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
122 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
123 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
126 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
127 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static int arm_comp_type_attributes (const_tree, const_tree);
129 static void arm_set_default_type_attributes (tree);
130 static int arm_adjust_cost (rtx, rtx, rtx, int);
131 static int count_insns_for_constant (HOST_WIDE_INT, int);
132 static int arm_get_strip_length (int);
133 static bool arm_function_ok_for_sibcall (tree, tree);
134 static enum machine_mode arm_promote_function_mode (const_tree,
135 enum machine_mode, int *,
137 static bool arm_return_in_memory (const_tree, const_tree);
138 static rtx arm_function_value (const_tree, const_tree, bool);
139 static rtx arm_libcall_value (enum machine_mode, const_rtx);
141 static void arm_internal_label (FILE *, const char *, unsigned long);
142 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 static bool arm_have_conditional_execution (void);
145 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
146 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
147 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_rtx_costs (rtx, int, int, int *, bool);
152 static int arm_address_cost (rtx, bool);
153 static bool arm_memory_load_p (rtx);
154 static bool arm_cirrus_insn_p (rtx);
155 static void cirrus_reorg (rtx);
156 static void arm_init_builtins (void);
157 static void arm_init_iwmmxt_builtins (void);
158 static rtx safe_vector_operand (rtx, enum machine_mode);
159 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
160 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
161 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
162 static void emit_constant_insn (rtx cond, rtx pattern);
163 static rtx emit_set_insn (rtx, rtx);
164 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
168 static int aapcs_select_return_coproc (const_tree, const_tree);
170 #ifdef OBJECT_FORMAT_ELF
171 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
175 static void arm_encode_section_info (tree, rtx, int);
178 static void arm_file_end (void);
179 static void arm_file_start (void);
181 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
183 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
184 enum machine_mode, const_tree, bool);
185 static bool arm_promote_prototypes (const_tree);
186 static bool arm_default_short_enums (void);
187 static bool arm_align_anon_bitfield (void);
188 static bool arm_return_in_msb (const_tree);
189 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
190 static bool arm_return_in_memory (const_tree, const_tree);
191 #ifdef TARGET_UNWIND_INFO
192 static void arm_unwind_emit (FILE *, rtx);
193 static bool arm_output_ttype (rtx);
195 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
196 static rtx arm_dwarf_register_span (rtx);
198 static tree arm_cxx_guard_type (void);
199 static bool arm_cxx_guard_mask_bit (void);
200 static tree arm_get_cookie_size (tree);
201 static bool arm_cookie_has_size (void);
202 static bool arm_cxx_cdtor_returns_this (void);
203 static bool arm_cxx_key_method_may_be_inline (void);
204 static void arm_cxx_determine_class_data_visibility (tree);
205 static bool arm_cxx_class_data_always_comdat (void);
206 static bool arm_cxx_use_aeabi_atexit (void);
207 static void arm_init_libfuncs (void);
208 static tree arm_build_builtin_va_list (void);
209 static void arm_expand_builtin_va_start (tree, rtx);
210 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
211 static bool arm_handle_option (size_t, const char *, int);
212 static void arm_target_help (void);
213 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
214 static bool arm_cannot_copy_insn_p (rtx);
215 static bool arm_tls_symbol_p (rtx x);
216 static int arm_issue_rate (void);
217 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
218 static bool arm_allocate_stack_slots_for_args (void);
219 static const char *arm_invalid_parameter_type (const_tree t);
220 static const char *arm_invalid_return_type (const_tree t);
221 static tree arm_promoted_type (const_tree t);
222 static tree arm_convert_to_type (tree type, tree expr);
223 static bool arm_scalar_mode_supported_p (enum machine_mode);
224 static bool arm_frame_pointer_required (void);
225 static bool arm_can_eliminate (const int, const int);
226 static void arm_asm_trampoline_template (FILE *);
227 static void arm_trampoline_init (rtx, tree, rtx);
228 static rtx arm_trampoline_adjust_address (rtx);
229 static rtx arm_pic_static_addr (rtx orig, rtx reg);
232 /* Table of machine attributes. */
233 static const struct attribute_spec arm_attribute_table[] =
235 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
236 /* Function calls made to this symbol must be done indirectly, because
237 it may lie outside of the 26 bit addressing range of a normal function
239 { "long_call", 0, 0, false, true, true, NULL },
240 /* Whereas these functions are always known to reside within the 26 bit
242 { "short_call", 0, 0, false, true, true, NULL },
243 /* Specify the procedure call conventions for a function. */
244 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
245 /* Interrupt Service Routines have special prologue and epilogue requirements. */
246 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
247 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
248 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
250 /* ARM/PE has three new attributes:
252 dllexport - for exporting a function/variable that will live in a dll
253 dllimport - for importing a function/variable from a dll
255 Microsoft allows multiple declspecs in one __declspec, separating
256 them with spaces. We do NOT support this. Instead, use __declspec
259 { "dllimport", 0, 0, true, false, false, NULL },
260 { "dllexport", 0, 0, true, false, false, NULL },
261 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
262 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
263 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
264 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
265 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
267 { NULL, 0, 0, false, false, false, NULL }
270 /* Initialize the GCC target structure. */
271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
272 #undef TARGET_MERGE_DECL_ATTRIBUTES
273 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
282 #undef TARGET_ASM_FILE_START
283 #define TARGET_ASM_FILE_START arm_file_start
284 #undef TARGET_ASM_FILE_END
285 #define TARGET_ASM_FILE_END arm_file_end
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP NULL
289 #undef TARGET_ASM_INTEGER
290 #define TARGET_ASM_INTEGER arm_assemble_integer
292 #undef TARGET_PRINT_OPERAND
293 #define TARGET_PRINT_OPERAND arm_print_operand
294 #undef TARGET_PRINT_OPERAND_ADDRESS
295 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
296 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
297 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
299 #undef TARGET_ASM_FUNCTION_PROLOGUE
300 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_EPILOGUE
303 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION arm_handle_option
310 #define TARGET_HELP arm_target_help
312 #undef TARGET_COMP_TYPE_ATTRIBUTES
313 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
315 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
316 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
318 #undef TARGET_SCHED_ADJUST_COST
319 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
321 #undef TARGET_ENCODE_SECTION_INFO
323 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
325 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
328 #undef TARGET_STRIP_NAME_ENCODING
329 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
331 #undef TARGET_ASM_INTERNAL_LABEL
332 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
334 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
335 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
337 #undef TARGET_FUNCTION_VALUE
338 #define TARGET_FUNCTION_VALUE arm_function_value
340 #undef TARGET_LIBCALL_VALUE
341 #define TARGET_LIBCALL_VALUE arm_libcall_value
343 #undef TARGET_ASM_OUTPUT_MI_THUNK
344 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
345 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
346 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
348 #undef TARGET_RTX_COSTS
349 #define TARGET_RTX_COSTS arm_rtx_costs
350 #undef TARGET_ADDRESS_COST
351 #define TARGET_ADDRESS_COST arm_address_cost
353 #undef TARGET_SHIFT_TRUNCATION_MASK
354 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
355 #undef TARGET_VECTOR_MODE_SUPPORTED_P
356 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
358 #undef TARGET_MACHINE_DEPENDENT_REORG
359 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
361 #undef TARGET_INIT_BUILTINS
362 #define TARGET_INIT_BUILTINS arm_init_builtins
363 #undef TARGET_EXPAND_BUILTIN
364 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
366 #undef TARGET_INIT_LIBFUNCS
367 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
369 #undef TARGET_PROMOTE_FUNCTION_MODE
370 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
371 #undef TARGET_PROMOTE_PROTOTYPES
372 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
373 #undef TARGET_PASS_BY_REFERENCE
374 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
375 #undef TARGET_ARG_PARTIAL_BYTES
376 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
378 #undef TARGET_SETUP_INCOMING_VARARGS
379 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
381 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
382 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
384 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
386 #undef TARGET_TRAMPOLINE_INIT
387 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
388 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
391 #undef TARGET_DEFAULT_SHORT_ENUMS
392 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
394 #undef TARGET_ALIGN_ANON_BITFIELD
395 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
397 #undef TARGET_NARROW_VOLATILE_BITFIELD
398 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
400 #undef TARGET_CXX_GUARD_TYPE
401 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
403 #undef TARGET_CXX_GUARD_MASK_BIT
404 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
406 #undef TARGET_CXX_GET_COOKIE_SIZE
407 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
409 #undef TARGET_CXX_COOKIE_HAS_SIZE
410 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
412 #undef TARGET_CXX_CDTOR_RETURNS_THIS
413 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
415 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
416 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
418 #undef TARGET_CXX_USE_AEABI_ATEXIT
419 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
421 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
422 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
423 arm_cxx_determine_class_data_visibility
425 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
426 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
428 #undef TARGET_RETURN_IN_MSB
429 #define TARGET_RETURN_IN_MSB arm_return_in_msb
431 #undef TARGET_RETURN_IN_MEMORY
432 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
434 #undef TARGET_MUST_PASS_IN_STACK
435 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
437 #ifdef TARGET_UNWIND_INFO
438 #undef TARGET_ASM_UNWIND_EMIT
439 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
441 /* EABI unwinding tables use a different format for the typeinfo tables. */
442 #undef TARGET_ASM_TTYPE
443 #define TARGET_ASM_TTYPE arm_output_ttype
445 #undef TARGET_ARM_EABI_UNWINDER
446 #define TARGET_ARM_EABI_UNWINDER true
447 #endif /* TARGET_UNWIND_INFO */
449 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
450 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
452 #undef TARGET_DWARF_REGISTER_SPAN
453 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
455 #undef TARGET_CANNOT_COPY_INSN_P
456 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
459 #undef TARGET_HAVE_TLS
460 #define TARGET_HAVE_TLS true
463 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
464 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
466 #undef TARGET_CANNOT_FORCE_CONST_MEM
467 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
469 #undef TARGET_MAX_ANCHOR_OFFSET
470 #define TARGET_MAX_ANCHOR_OFFSET 4095
472 /* The minimum is set such that the total size of the block
473 for a particular anchor is -4088 + 1 + 4095 bytes, which is
474 divisible by eight, ensuring natural spacing of anchors. */
475 #undef TARGET_MIN_ANCHOR_OFFSET
476 #define TARGET_MIN_ANCHOR_OFFSET -4088
478 #undef TARGET_SCHED_ISSUE_RATE
479 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
481 #undef TARGET_MANGLE_TYPE
482 #define TARGET_MANGLE_TYPE arm_mangle_type
484 #undef TARGET_BUILD_BUILTIN_VA_LIST
485 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
486 #undef TARGET_EXPAND_BUILTIN_VA_START
487 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
488 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
489 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
492 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
493 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
496 #undef TARGET_LEGITIMATE_ADDRESS_P
497 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
499 #undef TARGET_INVALID_PARAMETER_TYPE
500 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
502 #undef TARGET_INVALID_RETURN_TYPE
503 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
505 #undef TARGET_PROMOTED_TYPE
506 #define TARGET_PROMOTED_TYPE arm_promoted_type
508 #undef TARGET_CONVERT_TO_TYPE
509 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
511 #undef TARGET_SCALAR_MODE_SUPPORTED_P
512 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
514 #undef TARGET_FRAME_POINTER_REQUIRED
515 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
517 #undef TARGET_CAN_ELIMINATE
518 #define TARGET_CAN_ELIMINATE arm_can_eliminate
520 struct gcc_target targetm = TARGET_INITIALIZER;
522 /* Obstack for minipool constant handling. */
523 static struct obstack minipool_obstack;
524 static char * minipool_startobj;
526 /* The maximum number of insns skipped which
527 will be conditionalised if possible. */
528 static int max_insns_skipped = 5;
530 extern FILE * asm_out_file;
532 /* True if we are currently building a constant table. */
533 int making_const_table;
535 /* The processor for which instructions should be scheduled. */
536 enum processor_type arm_tune = arm_none;
538 /* The current tuning set. */
539 const struct tune_params *current_tune;
541 /* Which floating point hardware to schedule for. */
544 /* Which floating popint hardware to use. */
545 const struct arm_fpu_desc *arm_fpu_desc;
547 /* Whether to use floating point hardware. */
548 enum float_abi_type arm_float_abi;
550 /* Which __fp16 format to use. */
551 enum arm_fp16_format_type arm_fp16_format;
553 /* Which ABI to use. */
554 enum arm_abi_type arm_abi;
556 /* Which thread pointer model to use. */
557 enum arm_tp_type target_thread_pointer = TP_AUTO;
559 /* Used to parse -mstructure_size_boundary command line option. */
560 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
562 /* Used for Thumb call_via trampolines. */
563 rtx thumb_call_via_label[14];
564 static int thumb_call_reg_needed;
566 /* Bit values used to identify processor capabilities. */
567 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
568 #define FL_ARCH3M (1 << 1) /* Extended multiply */
569 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
570 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
571 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
572 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
573 #define FL_THUMB (1 << 6) /* Thumb aware */
574 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
575 #define FL_STRONG (1 << 8) /* StrongARM */
576 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
577 #define FL_XSCALE (1 << 10) /* XScale */
578 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
579 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
580 media instructions. */
581 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
582 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
583 Note: ARM6 & 7 derivatives only. */
584 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
585 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
586 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
588 #define FL_DIV (1 << 18) /* Hardware divide. */
589 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
590 #define FL_NEON (1 << 20) /* Neon instructions. */
591 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
594 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
596 /* Flags that only effect tuning, not available instructions. */
597 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
600 #define FL_FOR_ARCH2 FL_NOTM
601 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
602 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
603 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
604 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
605 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
606 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
607 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
608 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
609 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
610 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
611 #define FL_FOR_ARCH6J FL_FOR_ARCH6
612 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
613 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
614 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
615 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
616 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
617 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
618 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
619 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
620 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
621 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
623 /* The bits in this mask specify which
624 instructions we are allowed to generate. */
625 static unsigned long insn_flags = 0;
627 /* The bits in this mask specify which instruction scheduling options should
629 static unsigned long tune_flags = 0;
631 /* The following are used in the arm.md file as equivalents to bits
632 in the above two flag variables. */
634 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
637 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
640 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
643 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
646 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
649 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
652 /* Nonzero if this chip supports the ARM 6K extensions. */
655 /* Nonzero if instructions not present in the 'M' profile can be used. */
656 int arm_arch_notm = 0;
658 /* Nonzero if instructions present in ARMv7E-M can be used. */
661 /* Nonzero if this chip can benefit from load scheduling. */
662 int arm_ld_sched = 0;
664 /* Nonzero if this chip is a StrongARM. */
665 int arm_tune_strongarm = 0;
667 /* Nonzero if this chip is a Cirrus variant. */
668 int arm_arch_cirrus = 0;
670 /* Nonzero if this chip supports Intel Wireless MMX technology. */
671 int arm_arch_iwmmxt = 0;
673 /* Nonzero if this chip is an XScale. */
674 int arm_arch_xscale = 0;
676 /* Nonzero if tuning for XScale */
677 int arm_tune_xscale = 0;
679 /* Nonzero if we want to tune for stores that access the write-buffer.
680 This typically means an ARM6 or ARM7 with MMU or MPU. */
681 int arm_tune_wbuf = 0;
683 /* Nonzero if tuning for Cortex-A9. */
684 int arm_tune_cortex_a9 = 0;
686 /* Nonzero if generating Thumb instructions. */
689 /* Nonzero if we should define __THUMB_INTERWORK__ in the
691 XXX This is a bit of a hack, it's intended to help work around
692 problems in GLD which doesn't understand that armv5t code is
693 interworking clean. */
694 int arm_cpp_interwork = 0;
696 /* Nonzero if chip supports Thumb 2. */
699 /* Nonzero if chip supports integer division instruction. */
702 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
703 we must report the mode of the memory reference from
704 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
705 enum machine_mode output_memory_reference_mode;
707 /* The register number to be used for the PIC offset register. */
708 unsigned arm_pic_register = INVALID_REGNUM;
710 /* Set to 1 after arm_reorg has started. Reset to start at the start of
711 the next function. */
712 static int after_arm_reorg = 0;
714 static enum arm_pcs arm_pcs_default;
716 /* For an explanation of these variables, see final_prescan_insn below. */
718 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
719 enum arm_cond_code arm_current_cc;
721 int arm_target_label;
722 /* The number of conditionally executed insns, including the current insn. */
723 int arm_condexec_count = 0;
724 /* A bitmask specifying the patterns for the IT block.
725 Zero means do not output an IT block before this insn. */
726 int arm_condexec_mask = 0;
727 /* The number of bits used in arm_condexec_mask. */
728 int arm_condexec_masklen = 0;
730 /* The condition codes of the ARM, and the inverse function. */
731 static const char * const arm_condition_codes[] =
733 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
734 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
737 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
738 #define streq(string1, string2) (strcmp (string1, string2) == 0)
740 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
741 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
742 | (1 << PIC_OFFSET_TABLE_REGNUM)))
744 /* Initialization code. */
748 const char *const name;
749 enum processor_type core;
751 const unsigned long flags;
752 const struct tune_params *const tune;
755 const struct tune_params arm_slowmul_tune =
757 arm_slowmul_rtx_costs,
761 const struct tune_params arm_fastmul_tune =
763 arm_fastmul_rtx_costs,
767 const struct tune_params arm_xscale_tune =
769 arm_xscale_rtx_costs,
773 const struct tune_params arm_9e_tune =
779 /* Not all of these give usefully different compilation alternatives,
780 but there is no simple way of generalizing them. */
781 static const struct processors all_cores[] =
784 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
785 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
786 #include "arm-cores.def"
788 {NULL, arm_none, NULL, 0, NULL}
791 static const struct processors all_architectures[] =
793 /* ARM Architectures */
794 /* We don't specify tuning costs here as it will be figured out
797 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
798 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
799 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
800 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
801 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
802 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
803 implementations that support it, so we will leave it out for now. */
804 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
805 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
806 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
807 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
808 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
809 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
810 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
811 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
812 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
813 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
814 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
815 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
816 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
817 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
818 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
819 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
820 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
821 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
822 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
823 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
824 {NULL, arm_none, NULL, 0 , NULL}
828 /* These are populated as commandline arguments are processed, or NULL
830 static const struct processors *arm_selected_arch;
831 static const struct processors *arm_selected_cpu;
832 static const struct processors *arm_selected_tune;
834 /* The name of the preprocessor macro to define for this architecture. */
836 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
838 /* Available values for -mfpu=. */
840 static const struct arm_fpu_desc all_fpus[] =
842 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
843 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
844 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
845 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
846 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
847 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
848 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
849 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
850 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
851 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
852 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
853 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
854 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
855 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
856 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
857 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
858 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
859 /* Compatibility aliases. */
860 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
867 enum float_abi_type abi_type;
871 /* Available values for -mfloat-abi=. */
873 static const struct float_abi all_float_abis[] =
875 {"soft", ARM_FLOAT_ABI_SOFT},
876 {"softfp", ARM_FLOAT_ABI_SOFTFP},
877 {"hard", ARM_FLOAT_ABI_HARD}
884 enum arm_fp16_format_type fp16_format_type;
888 /* Available values for -mfp16-format=. */
890 static const struct fp16_format all_fp16_formats[] =
892 {"none", ARM_FP16_FORMAT_NONE},
893 {"ieee", ARM_FP16_FORMAT_IEEE},
894 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
901 enum arm_abi_type abi_type;
905 /* Available values for -mabi=. */
907 static const struct abi_name arm_all_abis[] =
909 {"apcs-gnu", ARM_ABI_APCS},
910 {"atpcs", ARM_ABI_ATPCS},
911 {"aapcs", ARM_ABI_AAPCS},
912 {"iwmmxt", ARM_ABI_IWMMXT},
913 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
916 /* Supported TLS relocations. */
926 /* The maximum number of insns to be used when loading a constant. */
928 arm_constant_limit (bool size_p)
930 return size_p ? 1 : current_tune->constant_limit;
933 /* Emit an insn that's a simple single-set. Both the operands must be known
936 emit_set_insn (rtx x, rtx y)
938 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
941 /* Return the number of bits set in VALUE. */
943 bit_count (unsigned long value)
945 unsigned long count = 0;
950 value &= value - 1; /* Clear the least-significant set bit. */
956 /* Set up library functions unique to ARM. */
959 arm_init_libfuncs (void)
961 /* There are no special library functions unless we are using the
966 /* The functions below are described in Section 4 of the "Run-Time
967 ABI for the ARM architecture", Version 1.0. */
969 /* Double-precision floating-point arithmetic. Table 2. */
970 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
971 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
972 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
973 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
974 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
976 /* Double-precision comparisons. Table 3. */
977 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
978 set_optab_libfunc (ne_optab, DFmode, NULL);
979 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
980 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
981 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
982 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
983 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
985 /* Single-precision floating-point arithmetic. Table 4. */
986 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
987 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
988 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
989 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
990 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
992 /* Single-precision comparisons. Table 5. */
993 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
994 set_optab_libfunc (ne_optab, SFmode, NULL);
995 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
996 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
997 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
998 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
999 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1001 /* Floating-point to integer conversions. Table 6. */
1002 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1003 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1004 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1005 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1006 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1007 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1008 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1009 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1011 /* Conversions between floating types. Table 7. */
1012 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1013 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1015 /* Integer to floating-point conversions. Table 8. */
1016 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1017 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1018 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1019 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1020 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1021 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1022 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1023 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1025 /* Long long. Table 9. */
1026 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1027 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1028 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1029 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1030 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1031 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1032 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1033 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1035 /* Integer (32/32->32) division. \S 4.3.1. */
1036 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1037 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1039 /* The divmod functions are designed so that they can be used for
1040 plain division, even though they return both the quotient and the
1041 remainder. The quotient is returned in the usual location (i.e.,
1042 r0 for SImode, {r0, r1} for DImode), just as would be expected
1043 for an ordinary division routine. Because the AAPCS calling
1044 conventions specify that all of { r0, r1, r2, r3 } are
1045 callee-saved registers, there is no need to tell the compiler
1046 explicitly that those registers are clobbered by these
1048 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1049 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1051 /* For SImode division the ABI provides div-without-mod routines,
1052 which are faster. */
1053 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1054 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1056 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1057 divmod libcalls instead. */
1058 set_optab_libfunc (smod_optab, DImode, NULL);
1059 set_optab_libfunc (umod_optab, DImode, NULL);
1060 set_optab_libfunc (smod_optab, SImode, NULL);
1061 set_optab_libfunc (umod_optab, SImode, NULL);
1063 /* Half-precision float operations. The compiler handles all operations
1064 with NULL libfuncs by converting the SFmode. */
1065 switch (arm_fp16_format)
1067 case ARM_FP16_FORMAT_IEEE:
1068 case ARM_FP16_FORMAT_ALTERNATIVE:
1071 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1072 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1074 : "__gnu_f2h_alternative"));
1075 set_conv_libfunc (sext_optab, SFmode, HFmode,
1076 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1078 : "__gnu_h2f_alternative"));
1081 set_optab_libfunc (add_optab, HFmode, NULL);
1082 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1083 set_optab_libfunc (smul_optab, HFmode, NULL);
1084 set_optab_libfunc (neg_optab, HFmode, NULL);
1085 set_optab_libfunc (sub_optab, HFmode, NULL);
1088 set_optab_libfunc (eq_optab, HFmode, NULL);
1089 set_optab_libfunc (ne_optab, HFmode, NULL);
1090 set_optab_libfunc (lt_optab, HFmode, NULL);
1091 set_optab_libfunc (le_optab, HFmode, NULL);
1092 set_optab_libfunc (ge_optab, HFmode, NULL);
1093 set_optab_libfunc (gt_optab, HFmode, NULL);
1094 set_optab_libfunc (unord_optab, HFmode, NULL);
1101 if (TARGET_AAPCS_BASED)
1102 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1105 /* On AAPCS systems, this is the "struct __va_list". */
1106 static GTY(()) tree va_list_type;
1108 /* Return the type to use as __builtin_va_list. */
1110 arm_build_builtin_va_list (void)
1115 if (!TARGET_AAPCS_BASED)
1116 return std_build_builtin_va_list ();
1118 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1126 The C Library ABI further reinforces this definition in \S
1129 We must follow this definition exactly. The structure tag
1130 name is visible in C++ mangled names, and thus forms a part
1131 of the ABI. The field name may be used by people who
1132 #include <stdarg.h>. */
1133 /* Create the type. */
1134 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1135 /* Give it the required name. */
1136 va_list_name = build_decl (BUILTINS_LOCATION,
1138 get_identifier ("__va_list"),
1140 DECL_ARTIFICIAL (va_list_name) = 1;
1141 TYPE_NAME (va_list_type) = va_list_name;
1142 /* Create the __ap field. */
1143 ap_field = build_decl (BUILTINS_LOCATION,
1145 get_identifier ("__ap"),
1147 DECL_ARTIFICIAL (ap_field) = 1;
1148 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1149 TYPE_FIELDS (va_list_type) = ap_field;
1150 /* Compute its layout. */
1151 layout_type (va_list_type);
1153 return va_list_type;
1156 /* Return an expression of type "void *" pointing to the next
1157 available argument in a variable-argument list. VALIST is the
1158 user-level va_list object, of type __builtin_va_list. */
1160 arm_extract_valist_ptr (tree valist)
1162 if (TREE_TYPE (valist) == error_mark_node)
1163 return error_mark_node;
1165 /* On an AAPCS target, the pointer is stored within "struct
1167 if (TARGET_AAPCS_BASED)
1169 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1170 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1171 valist, ap_field, NULL_TREE);
1177 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1179 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1181 valist = arm_extract_valist_ptr (valist);
1182 std_expand_builtin_va_start (valist, nextarg);
1185 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1187 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1190 valist = arm_extract_valist_ptr (valist);
1191 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1194 /* Lookup NAME in SEL. */
1196 static const struct processors *
1197 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1199 if (!(name && *name))
1202 for (; sel->name != NULL; sel++)
1204 if (streq (name, sel->name))
1208 error ("bad value (%s) for %s switch", name, desc);
1212 /* Implement TARGET_HANDLE_OPTION. */
1215 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1220 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1224 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1227 case OPT_mhard_float:
1228 target_float_abi_name = "hard";
1231 case OPT_msoft_float:
1232 target_float_abi_name = "soft";
1236 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1245 arm_target_help (void)
1248 static int columns = 0;
1251 /* If we have not done so already, obtain the desired maximum width of
1252 the output. Note - this is a duplication of the code at the start of
1253 gcc/opts.c:print_specific_help() - the two copies should probably be
1254 replaced by a single function. */
1259 GET_ENVIRONMENT (p, "COLUMNS");
1262 int value = atoi (p);
1269 /* Use a reasonable default. */
1273 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1275 /* The - 2 is because we know that the last entry in the array is NULL. */
1276 i = ARRAY_SIZE (all_cores) - 2;
1278 printf (" %s", all_cores[i].name);
1279 remaining = columns - (strlen (all_cores[i].name) + 4);
1280 gcc_assert (remaining >= 0);
1284 int len = strlen (all_cores[i].name);
1286 if (remaining > len + 2)
1288 printf (", %s", all_cores[i].name);
1289 remaining -= len + 2;
1295 printf ("\n %s", all_cores[i].name);
1296 remaining = columns - (len + 4);
1300 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1302 i = ARRAY_SIZE (all_architectures) - 2;
1305 printf (" %s", all_architectures[i].name);
1306 remaining = columns - (strlen (all_architectures[i].name) + 4);
1307 gcc_assert (remaining >= 0);
1311 int len = strlen (all_architectures[i].name);
1313 if (remaining > len + 2)
1315 printf (", %s", all_architectures[i].name);
1316 remaining -= len + 2;
1322 printf ("\n %s", all_architectures[i].name);
1323 remaining = columns - (len + 4);
1330 /* Fix up any incompatible options that the user has specified.
1331 This has now turned into a maze. */
1333 arm_override_options (void)
1337 if (arm_selected_arch)
1339 if (arm_selected_cpu)
1341 /* Check for conflict between mcpu and march. */
1342 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1344 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1345 arm_selected_cpu->name, arm_selected_arch->name);
1346 /* -march wins for code generation.
1347 -mcpu wins for default tuning. */
1348 if (!arm_selected_tune)
1349 arm_selected_tune = arm_selected_cpu;
1351 arm_selected_cpu = arm_selected_arch;
1355 arm_selected_arch = NULL;
1358 /* Pick a CPU based on the architecture. */
1359 arm_selected_cpu = arm_selected_arch;
1362 /* If the user did not specify a processor, choose one for them. */
1363 if (!arm_selected_cpu)
1365 const struct processors * sel;
1366 unsigned int sought;
1368 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1369 if (!arm_selected_cpu->name)
1371 #ifdef SUBTARGET_CPU_DEFAULT
1372 /* Use the subtarget default CPU if none was specified by
1374 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1376 /* Default to ARM6. */
1377 if (arm_selected_cpu->name)
1378 arm_selected_cpu = &all_cores[arm6];
1381 sel = arm_selected_cpu;
1382 insn_flags = sel->flags;
1384 /* Now check to see if the user has specified some command line
1385 switch that require certain abilities from the cpu. */
1388 if (TARGET_INTERWORK || TARGET_THUMB)
1390 sought |= (FL_THUMB | FL_MODE32);
1392 /* There are no ARM processors that support both APCS-26 and
1393 interworking. Therefore we force FL_MODE26 to be removed
1394 from insn_flags here (if it was set), so that the search
1395 below will always be able to find a compatible processor. */
1396 insn_flags &= ~FL_MODE26;
1399 if (sought != 0 && ((sought & insn_flags) != sought))
1401 /* Try to locate a CPU type that supports all of the abilities
1402 of the default CPU, plus the extra abilities requested by
1404 for (sel = all_cores; sel->name != NULL; sel++)
1405 if ((sel->flags & sought) == (sought | insn_flags))
1408 if (sel->name == NULL)
1410 unsigned current_bit_count = 0;
1411 const struct processors * best_fit = NULL;
1413 /* Ideally we would like to issue an error message here
1414 saying that it was not possible to find a CPU compatible
1415 with the default CPU, but which also supports the command
1416 line options specified by the programmer, and so they
1417 ought to use the -mcpu=<name> command line option to
1418 override the default CPU type.
1420 If we cannot find a cpu that has both the
1421 characteristics of the default cpu and the given
1422 command line options we scan the array again looking
1423 for a best match. */
1424 for (sel = all_cores; sel->name != NULL; sel++)
1425 if ((sel->flags & sought) == sought)
1429 count = bit_count (sel->flags & insn_flags);
1431 if (count >= current_bit_count)
1434 current_bit_count = count;
1438 gcc_assert (best_fit);
1442 arm_selected_cpu = sel;
1446 gcc_assert (arm_selected_cpu);
1447 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1448 if (!arm_selected_tune)
1449 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1451 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1452 insn_flags = arm_selected_cpu->flags;
1454 arm_tune = arm_selected_tune->core;
1455 tune_flags = arm_selected_tune->flags;
1456 current_tune = arm_selected_tune->tune;
1458 if (target_fp16_format_name)
1460 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1462 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1464 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1468 if (i == ARRAY_SIZE (all_fp16_formats))
1469 error ("invalid __fp16 format option: -mfp16-format=%s",
1470 target_fp16_format_name);
1473 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1475 if (target_abi_name)
1477 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1479 if (streq (arm_all_abis[i].name, target_abi_name))
1481 arm_abi = arm_all_abis[i].abi_type;
1485 if (i == ARRAY_SIZE (arm_all_abis))
1486 error ("invalid ABI option: -mabi=%s", target_abi_name);
1489 arm_abi = ARM_DEFAULT_ABI;
1491 /* Make sure that the processor choice does not conflict with any of the
1492 other command line choices. */
1493 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1494 error ("target CPU does not support ARM mode");
1496 /* BPABI targets use linker tricks to allow interworking on cores
1497 without thumb support. */
1498 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1500 warning (0, "target CPU does not support interworking" );
1501 target_flags &= ~MASK_INTERWORK;
1504 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1506 warning (0, "target CPU does not support THUMB instructions");
1507 target_flags &= ~MASK_THUMB;
1510 if (TARGET_APCS_FRAME && TARGET_THUMB)
1512 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1513 target_flags &= ~MASK_APCS_FRAME;
1516 /* Callee super interworking implies thumb interworking. Adding
1517 this to the flags here simplifies the logic elsewhere. */
1518 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1519 target_flags |= MASK_INTERWORK;
1521 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1522 from here where no function is being compiled currently. */
1523 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1524 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1526 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1527 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1529 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1530 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1532 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1534 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1535 target_flags |= MASK_APCS_FRAME;
1538 if (TARGET_POKE_FUNCTION_NAME)
1539 target_flags |= MASK_APCS_FRAME;
1541 if (TARGET_APCS_REENT && flag_pic)
1542 error ("-fpic and -mapcs-reent are incompatible");
1544 if (TARGET_APCS_REENT)
1545 warning (0, "APCS reentrant code not supported. Ignored");
1547 /* If this target is normally configured to use APCS frames, warn if they
1548 are turned off and debugging is turned on. */
1550 && write_symbols != NO_DEBUG
1551 && !TARGET_APCS_FRAME
1552 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1553 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1555 if (TARGET_APCS_FLOAT)
1556 warning (0, "passing floating point arguments in fp regs not yet supported");
1558 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1559 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1560 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1561 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1562 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1563 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1564 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1565 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1566 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1567 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1568 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1569 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1570 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1572 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1573 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1574 thumb_code = (TARGET_ARM == 0);
1575 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1576 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1577 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1578 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1579 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1581 /* If we are not using the default (ARM mode) section anchor offset
1582 ranges, then set the correct ranges now. */
1585 /* Thumb-1 LDR instructions cannot have negative offsets.
1586 Permissible positive offset ranges are 5-bit (for byte loads),
1587 6-bit (for halfword loads), or 7-bit (for word loads).
1588 Empirical results suggest a 7-bit anchor range gives the best
1589 overall code size. */
1590 targetm.min_anchor_offset = 0;
1591 targetm.max_anchor_offset = 127;
1593 else if (TARGET_THUMB2)
1595 /* The minimum is set such that the total size of the block
1596 for a particular anchor is 248 + 1 + 4095 bytes, which is
1597 divisible by eight, ensuring natural spacing of anchors. */
1598 targetm.min_anchor_offset = -248;
1599 targetm.max_anchor_offset = 4095;
1602 /* V5 code we generate is completely interworking capable, so we turn off
1603 TARGET_INTERWORK here to avoid many tests later on. */
1605 /* XXX However, we must pass the right pre-processor defines to CPP
1606 or GLD can get confused. This is a hack. */
1607 if (TARGET_INTERWORK)
1608 arm_cpp_interwork = 1;
1611 target_flags &= ~MASK_INTERWORK;
1613 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1614 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1616 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1617 error ("iwmmxt abi requires an iwmmxt capable cpu");
1619 if (target_fpu_name == NULL && target_fpe_name != NULL)
1621 if (streq (target_fpe_name, "2"))
1622 target_fpu_name = "fpe2";
1623 else if (streq (target_fpe_name, "3"))
1624 target_fpu_name = "fpe3";
1626 error ("invalid floating point emulation option: -mfpe=%s",
1630 if (target_fpu_name == NULL)
1632 #ifdef FPUTYPE_DEFAULT
1633 target_fpu_name = FPUTYPE_DEFAULT;
1635 if (arm_arch_cirrus)
1636 target_fpu_name = "maverick";
1638 target_fpu_name = "fpe2";
1642 arm_fpu_desc = NULL;
1643 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1645 if (streq (all_fpus[i].name, target_fpu_name))
1647 arm_fpu_desc = &all_fpus[i];
1654 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1658 switch (arm_fpu_desc->model)
1660 case ARM_FP_MODEL_FPA:
1661 if (arm_fpu_desc->rev == 2)
1662 arm_fpu_attr = FPU_FPE2;
1663 else if (arm_fpu_desc->rev == 3)
1664 arm_fpu_attr = FPU_FPE3;
1666 arm_fpu_attr = FPU_FPA;
1669 case ARM_FP_MODEL_MAVERICK:
1670 arm_fpu_attr = FPU_MAVERICK;
1673 case ARM_FP_MODEL_VFP:
1674 arm_fpu_attr = FPU_VFP;
1681 if (target_float_abi_name != NULL)
1683 /* The user specified a FP ABI. */
1684 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1686 if (streq (all_float_abis[i].name, target_float_abi_name))
1688 arm_float_abi = all_float_abis[i].abi_type;
1692 if (i == ARRAY_SIZE (all_float_abis))
1693 error ("invalid floating point abi: -mfloat-abi=%s",
1694 target_float_abi_name);
1697 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1699 if (TARGET_AAPCS_BASED
1700 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1701 error ("FPA is unsupported in the AAPCS");
1703 if (TARGET_AAPCS_BASED)
1705 if (TARGET_CALLER_INTERWORKING)
1706 error ("AAPCS does not support -mcaller-super-interworking");
1708 if (TARGET_CALLEE_INTERWORKING)
1709 error ("AAPCS does not support -mcallee-super-interworking");
1712 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1713 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1714 will ever exist. GCC makes no attempt to support this combination. */
1715 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1716 sorry ("iWMMXt and hardware floating point");
1718 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1719 if (TARGET_THUMB2 && TARGET_IWMMXT)
1720 sorry ("Thumb-2 iWMMXt");
1722 /* __fp16 support currently assumes the core has ldrh. */
1723 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1724 sorry ("__fp16 and no ldrh");
1726 /* If soft-float is specified then don't use FPU. */
1727 if (TARGET_SOFT_FLOAT)
1728 arm_fpu_attr = FPU_NONE;
1730 if (TARGET_AAPCS_BASED)
1732 if (arm_abi == ARM_ABI_IWMMXT)
1733 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1734 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1735 && TARGET_HARD_FLOAT
1737 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1739 arm_pcs_default = ARM_PCS_AAPCS;
1743 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1744 sorry ("-mfloat-abi=hard and VFP");
1746 if (arm_abi == ARM_ABI_APCS)
1747 arm_pcs_default = ARM_PCS_APCS;
1749 arm_pcs_default = ARM_PCS_ATPCS;
1752 /* For arm2/3 there is no need to do any scheduling if there is only
1753 a floating point emulator, or we are doing software floating-point. */
1754 if ((TARGET_SOFT_FLOAT
1755 || (TARGET_FPA && arm_fpu_desc->rev))
1756 && (tune_flags & FL_MODE32) == 0)
1757 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1759 if (target_thread_switch)
1761 if (strcmp (target_thread_switch, "soft") == 0)
1762 target_thread_pointer = TP_SOFT;
1763 else if (strcmp (target_thread_switch, "auto") == 0)
1764 target_thread_pointer = TP_AUTO;
1765 else if (strcmp (target_thread_switch, "cp15") == 0)
1766 target_thread_pointer = TP_CP15;
1768 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1771 /* Use the cp15 method if it is available. */
1772 if (target_thread_pointer == TP_AUTO)
1774 if (arm_arch6k && !TARGET_THUMB1)
1775 target_thread_pointer = TP_CP15;
1777 target_thread_pointer = TP_SOFT;
1780 if (TARGET_HARD_TP && TARGET_THUMB1)
1781 error ("can not use -mtp=cp15 with 16-bit Thumb");
1783 /* Override the default structure alignment for AAPCS ABI. */
1784 if (TARGET_AAPCS_BASED)
1785 arm_structure_size_boundary = 8;
1787 if (structure_size_string != NULL)
1789 int size = strtol (structure_size_string, NULL, 0);
1791 if (size == 8 || size == 32
1792 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1793 arm_structure_size_boundary = size;
1795 warning (0, "structure size boundary can only be set to %s",
1796 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1799 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1801 error ("RTP PIC is incompatible with Thumb");
1805 /* If stack checking is disabled, we can use r10 as the PIC register,
1806 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1807 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1809 if (TARGET_VXWORKS_RTP)
1810 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1811 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1814 if (flag_pic && TARGET_VXWORKS_RTP)
1815 arm_pic_register = 9;
1817 if (arm_pic_register_string != NULL)
1819 int pic_register = decode_reg_name (arm_pic_register_string);
1822 warning (0, "-mpic-register= is useless without -fpic");
1824 /* Prevent the user from choosing an obviously stupid PIC register. */
1825 else if (pic_register < 0 || call_used_regs[pic_register]
1826 || pic_register == HARD_FRAME_POINTER_REGNUM
1827 || pic_register == STACK_POINTER_REGNUM
1828 || pic_register >= PC_REGNUM
1829 || (TARGET_VXWORKS_RTP
1830 && (unsigned int) pic_register != arm_pic_register))
1831 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1833 arm_pic_register = pic_register;
1836 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1837 if (fix_cm3_ldrd == 2)
1839 if (arm_selected_cpu->core == cortexm3)
1845 if (TARGET_THUMB1 && flag_schedule_insns)
1847 /* Don't warn since it's on by default in -O2. */
1848 flag_schedule_insns = 0;
1853 /* If optimizing for size, bump the number of instructions that we
1854 are prepared to conditionally execute (even on a StrongARM). */
1855 max_insns_skipped = 6;
1859 /* StrongARM has early execution of branches, so a sequence
1860 that is worth skipping is shorter. */
1861 if (arm_tune_strongarm)
1862 max_insns_skipped = 3;
1865 /* Hot/Cold partitioning is not currently supported, since we can't
1866 handle literal pool placement in that case. */
1867 if (flag_reorder_blocks_and_partition)
1869 inform (input_location,
1870 "-freorder-blocks-and-partition not supported on this architecture");
1871 flag_reorder_blocks_and_partition = 0;
1872 flag_reorder_blocks = 1;
1875 /* Register global variables with the garbage collector. */
1876 arm_add_gc_roots ();
1880 arm_add_gc_roots (void)
1882 gcc_obstack_init(&minipool_obstack);
1883 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1886 /* A table of known ARM exception types.
1887 For use with the interrupt function attribute. */
1891 const char *const arg;
1892 const unsigned long return_value;
1896 static const isr_attribute_arg isr_attribute_args [] =
1898 { "IRQ", ARM_FT_ISR },
1899 { "irq", ARM_FT_ISR },
1900 { "FIQ", ARM_FT_FIQ },
1901 { "fiq", ARM_FT_FIQ },
1902 { "ABORT", ARM_FT_ISR },
1903 { "abort", ARM_FT_ISR },
1904 { "ABORT", ARM_FT_ISR },
1905 { "abort", ARM_FT_ISR },
1906 { "UNDEF", ARM_FT_EXCEPTION },
1907 { "undef", ARM_FT_EXCEPTION },
1908 { "SWI", ARM_FT_EXCEPTION },
1909 { "swi", ARM_FT_EXCEPTION },
1910 { NULL, ARM_FT_NORMAL }
1913 /* Returns the (interrupt) function type of the current
1914 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1916 static unsigned long
1917 arm_isr_value (tree argument)
1919 const isr_attribute_arg * ptr;
1923 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1925 /* No argument - default to IRQ. */
1926 if (argument == NULL_TREE)
1929 /* Get the value of the argument. */
1930 if (TREE_VALUE (argument) == NULL_TREE
1931 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1932 return ARM_FT_UNKNOWN;
1934 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1936 /* Check it against the list of known arguments. */
1937 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1938 if (streq (arg, ptr->arg))
1939 return ptr->return_value;
1941 /* An unrecognized interrupt type. */
1942 return ARM_FT_UNKNOWN;
1945 /* Computes the type of the current function. */
1947 static unsigned long
1948 arm_compute_func_type (void)
1950 unsigned long type = ARM_FT_UNKNOWN;
1954 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1956 /* Decide if the current function is volatile. Such functions
1957 never return, and many memory cycles can be saved by not storing
1958 register values that will never be needed again. This optimization
1959 was added to speed up context switching in a kernel application. */
1961 && (TREE_NOTHROW (current_function_decl)
1962 || !(flag_unwind_tables
1963 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1964 && TREE_THIS_VOLATILE (current_function_decl))
1965 type |= ARM_FT_VOLATILE;
1967 if (cfun->static_chain_decl != NULL)
1968 type |= ARM_FT_NESTED;
1970 attr = DECL_ATTRIBUTES (current_function_decl);
1972 a = lookup_attribute ("naked", attr);
1974 type |= ARM_FT_NAKED;
1976 a = lookup_attribute ("isr", attr);
1978 a = lookup_attribute ("interrupt", attr);
1981 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1983 type |= arm_isr_value (TREE_VALUE (a));
1988 /* Returns the type of the current function. */
1991 arm_current_func_type (void)
1993 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1994 cfun->machine->func_type = arm_compute_func_type ();
1996 return cfun->machine->func_type;
2000 arm_allocate_stack_slots_for_args (void)
2002 /* Naked functions should not allocate stack slots for arguments. */
2003 return !IS_NAKED (arm_current_func_type ());
2007 /* Output assembler code for a block containing the constant parts
2008 of a trampoline, leaving space for the variable parts.
2010 On the ARM, (if r8 is the static chain regnum, and remembering that
2011 referencing pc adds an offset of 8) the trampoline looks like:
2014 .word static chain value
2015 .word function's address
2016 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2019 arm_asm_trampoline_template (FILE *f)
2023 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2024 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2026 else if (TARGET_THUMB2)
2028 /* The Thumb-2 trampoline is similar to the arm implementation.
2029 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2030 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2031 STATIC_CHAIN_REGNUM, PC_REGNUM);
2032 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2036 ASM_OUTPUT_ALIGN (f, 2);
2037 fprintf (f, "\t.code\t16\n");
2038 fprintf (f, ".Ltrampoline_start:\n");
2039 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2040 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2041 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2042 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2043 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2044 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2046 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2047 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2050 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2053 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2055 rtx fnaddr, mem, a_tramp;
2057 emit_block_move (m_tramp, assemble_trampoline_template (),
2058 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2060 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2061 emit_move_insn (mem, chain_value);
2063 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2064 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2065 emit_move_insn (mem, fnaddr);
2067 a_tramp = XEXP (m_tramp, 0);
2068 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2069 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2070 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2073 /* Thumb trampolines should be entered in thumb mode, so set
2074 the bottom bit of the address. */
2077 arm_trampoline_adjust_address (rtx addr)
2080 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2081 NULL, 0, OPTAB_LIB_WIDEN);
2085 /* Return 1 if it is possible to return using a single instruction.
2086 If SIBLING is non-null, this is a test for a return before a sibling
2087 call. SIBLING is the call insn, so we can examine its register usage. */
2090 use_return_insn (int iscond, rtx sibling)
2093 unsigned int func_type;
2094 unsigned long saved_int_regs;
2095 unsigned HOST_WIDE_INT stack_adjust;
2096 arm_stack_offsets *offsets;
2098 /* Never use a return instruction before reload has run. */
2099 if (!reload_completed)
2102 func_type = arm_current_func_type ();
2104 /* Naked, volatile and stack alignment functions need special
2106 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2109 /* So do interrupt functions that use the frame pointer and Thumb
2110 interrupt functions. */
2111 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2114 offsets = arm_get_frame_offsets ();
2115 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2117 /* As do variadic functions. */
2118 if (crtl->args.pretend_args_size
2119 || cfun->machine->uses_anonymous_args
2120 /* Or if the function calls __builtin_eh_return () */
2121 || crtl->calls_eh_return
2122 /* Or if the function calls alloca */
2123 || cfun->calls_alloca
2124 /* Or if there is a stack adjustment. However, if the stack pointer
2125 is saved on the stack, we can use a pre-incrementing stack load. */
2126 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2127 && stack_adjust == 4)))
2130 saved_int_regs = offsets->saved_regs_mask;
2132 /* Unfortunately, the insn
2134 ldmib sp, {..., sp, ...}
2136 triggers a bug on most SA-110 based devices, such that the stack
2137 pointer won't be correctly restored if the instruction takes a
2138 page fault. We work around this problem by popping r3 along with
2139 the other registers, since that is never slower than executing
2140 another instruction.
2142 We test for !arm_arch5 here, because code for any architecture
2143 less than this could potentially be run on one of the buggy
2145 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2147 /* Validate that r3 is a call-clobbered register (always true in
2148 the default abi) ... */
2149 if (!call_used_regs[3])
2152 /* ... that it isn't being used for a return value ... */
2153 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2156 /* ... or for a tail-call argument ... */
2159 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2161 if (find_regno_fusage (sibling, USE, 3))
2165 /* ... and that there are no call-saved registers in r0-r2
2166 (always true in the default ABI). */
2167 if (saved_int_regs & 0x7)
2171 /* Can't be done if interworking with Thumb, and any registers have been
2173 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2176 /* On StrongARM, conditional returns are expensive if they aren't
2177 taken and multiple registers have been stacked. */
2178 if (iscond && arm_tune_strongarm)
2180 /* Conditional return when just the LR is stored is a simple
2181 conditional-load instruction, that's not expensive. */
2182 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2186 && arm_pic_register != INVALID_REGNUM
2187 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2191 /* If there are saved registers but the LR isn't saved, then we need
2192 two instructions for the return. */
2193 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2196 /* Can't be done if any of the FPA regs are pushed,
2197 since this also requires an insn. */
2198 if (TARGET_HARD_FLOAT && TARGET_FPA)
2199 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2200 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2203 /* Likewise VFP regs. */
2204 if (TARGET_HARD_FLOAT && TARGET_VFP)
2205 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2206 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2209 if (TARGET_REALLY_IWMMXT)
2210 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2211 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2217 /* Return TRUE if int I is a valid immediate ARM constant. */
2220 const_ok_for_arm (HOST_WIDE_INT i)
2224 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2225 be all zero, or all one. */
2226 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2227 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2228 != ((~(unsigned HOST_WIDE_INT) 0)
2229 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2232 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2234 /* Fast return for 0 and small values. We must do this for zero, since
2235 the code below can't handle that one case. */
2236 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2239 /* Get the number of trailing zeros. */
2240 lowbit = ffs((int) i) - 1;
2242 /* Only even shifts are allowed in ARM mode so round down to the
2243 nearest even number. */
2247 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2252 /* Allow rotated constants in ARM mode. */
2254 && ((i & ~0xc000003f) == 0
2255 || (i & ~0xf000000f) == 0
2256 || (i & ~0xfc000003) == 0))
2263 /* Allow repeated pattern. */
2266 if (i == v || i == (v | (v << 8)))
2273 /* Return true if I is a valid constant for the operation CODE. */
2275 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2277 if (const_ok_for_arm (i))
2301 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2303 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2309 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2313 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2320 /* Emit a sequence of insns to handle a large constant.
2321 CODE is the code of the operation required, it can be any of SET, PLUS,
2322 IOR, AND, XOR, MINUS;
2323 MODE is the mode in which the operation is being performed;
2324 VAL is the integer to operate on;
2325 SOURCE is the other operand (a register, or a null-pointer for SET);
2326 SUBTARGETS means it is safe to create scratch registers if that will
2327 either produce a simpler sequence, or we will want to cse the values.
2328 Return value is the number of insns emitted. */
2330 /* ??? Tweak this for thumb2. */
2332 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2333 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2337 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2338 cond = COND_EXEC_TEST (PATTERN (insn));
2342 if (subtargets || code == SET
2343 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2344 && REGNO (target) != REGNO (source)))
2346 /* After arm_reorg has been called, we can't fix up expensive
2347 constants by pushing them into memory so we must synthesize
2348 them in-line, regardless of the cost. This is only likely to
2349 be more costly on chips that have load delay slots and we are
2350 compiling without running the scheduler (so no splitting
2351 occurred before the final instruction emission).
2353 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2355 if (!after_arm_reorg
2357 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2359 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2364 /* Currently SET is the only monadic value for CODE, all
2365 the rest are diadic. */
2366 if (TARGET_USE_MOVT)
2367 arm_emit_movpair (target, GEN_INT (val));
2369 emit_set_insn (target, GEN_INT (val));
2375 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2377 if (TARGET_USE_MOVT)
2378 arm_emit_movpair (temp, GEN_INT (val));
2380 emit_set_insn (temp, GEN_INT (val));
2382 /* For MINUS, the value is subtracted from, since we never
2383 have subtraction of a constant. */
2385 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2387 emit_set_insn (target,
2388 gen_rtx_fmt_ee (code, mode, source, temp));
2394 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2398 /* Return the number of instructions required to synthesize the given
2399 constant, if we start emitting them from bit-position I. */
2401 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2403 HOST_WIDE_INT temp1;
2404 int step_size = TARGET_ARM ? 2 : 1;
2407 gcc_assert (TARGET_ARM || i == 0);
2415 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2420 temp1 = remainder & ((0x0ff << end)
2421 | ((i < end) ? (0xff >> (32 - end)) : 0));
2422 remainder &= ~temp1;
2427 } while (remainder);
2432 find_best_start (unsigned HOST_WIDE_INT remainder)
2434 int best_consecutive_zeros = 0;
2438 /* If we aren't targetting ARM, the best place to start is always at
2443 for (i = 0; i < 32; i += 2)
2445 int consecutive_zeros = 0;
2447 if (!(remainder & (3 << i)))
2449 while ((i < 32) && !(remainder & (3 << i)))
2451 consecutive_zeros += 2;
2454 if (consecutive_zeros > best_consecutive_zeros)
2456 best_consecutive_zeros = consecutive_zeros;
2457 best_start = i - consecutive_zeros;
2463 /* So long as it won't require any more insns to do so, it's
2464 desirable to emit a small constant (in bits 0...9) in the last
2465 insn. This way there is more chance that it can be combined with
2466 a later addressing insn to form a pre-indexed load or store
2467 operation. Consider:
2469 *((volatile int *)0xe0000100) = 1;
2470 *((volatile int *)0xe0000110) = 2;
2472 We want this to wind up as:
2476 str rB, [rA, #0x100]
2478 str rB, [rA, #0x110]
2480 rather than having to synthesize both large constants from scratch.
2482 Therefore, we calculate how many insns would be required to emit
2483 the constant starting from `best_start', and also starting from
2484 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2485 yield a shorter sequence, we may as well use zero. */
2487 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2488 && (count_insns_for_constant (remainder, 0) <=
2489 count_insns_for_constant (remainder, best_start)))
2495 /* Emit an instruction with the indicated PATTERN. If COND is
2496 non-NULL, conditionalize the execution of the instruction on COND
2500 emit_constant_insn (rtx cond, rtx pattern)
2503 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2504 emit_insn (pattern);
2507 /* As above, but extra parameter GENERATE which, if clear, suppresses
2509 /* ??? This needs more work for thumb2. */
2512 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2513 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2518 int final_invert = 0;
2519 int can_negate_initial = 0;
2521 int num_bits_set = 0;
2522 int set_sign_bit_copies = 0;
2523 int clear_sign_bit_copies = 0;
2524 int clear_zero_bit_copies = 0;
2525 int set_zero_bit_copies = 0;
2527 unsigned HOST_WIDE_INT temp1, temp2;
2528 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2529 int step_size = TARGET_ARM ? 2 : 1;
2531 /* Find out which operations are safe for a given CODE. Also do a quick
2532 check for degenerate cases; these can occur when DImode operations
2543 can_negate_initial = 1;
2547 if (remainder == 0xffffffff)
2550 emit_constant_insn (cond,
2551 gen_rtx_SET (VOIDmode, target,
2552 GEN_INT (ARM_SIGN_EXTEND (val))));
2558 if (reload_completed && rtx_equal_p (target, source))
2562 emit_constant_insn (cond,
2563 gen_rtx_SET (VOIDmode, target, source));
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target, const0_rtx));
2579 if (remainder == 0xffffffff)
2581 if (reload_completed && rtx_equal_p (target, source))
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target, source));
2594 if (reload_completed && rtx_equal_p (target, source))
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, target, source));
2602 if (remainder == 0xffffffff)
2605 emit_constant_insn (cond,
2606 gen_rtx_SET (VOIDmode, target,
2607 gen_rtx_NOT (mode, source)));
2613 /* We treat MINUS as (val - source), since (source - val) is always
2614 passed as (source + (-val)). */
2618 emit_constant_insn (cond,
2619 gen_rtx_SET (VOIDmode, target,
2620 gen_rtx_NEG (mode, source)));
2623 if (const_ok_for_arm (val))
2626 emit_constant_insn (cond,
2627 gen_rtx_SET (VOIDmode, target,
2628 gen_rtx_MINUS (mode, GEN_INT (val),
2640 /* If we can do it in one insn get out quickly. */
2641 if (const_ok_for_arm (val)
2642 || (can_negate_initial && const_ok_for_arm (-val))
2643 || (can_invert && const_ok_for_arm (~val)))
2646 emit_constant_insn (cond,
2647 gen_rtx_SET (VOIDmode, target,
2649 ? gen_rtx_fmt_ee (code, mode, source,
2655 /* Calculate a few attributes that may be useful for specific
2657 /* Count number of leading zeros. */
2658 for (i = 31; i >= 0; i--)
2660 if ((remainder & (1 << i)) == 0)
2661 clear_sign_bit_copies++;
2666 /* Count number of leading 1's. */
2667 for (i = 31; i >= 0; i--)
2669 if ((remainder & (1 << i)) != 0)
2670 set_sign_bit_copies++;
2675 /* Count number of trailing zero's. */
2676 for (i = 0; i <= 31; i++)
2678 if ((remainder & (1 << i)) == 0)
2679 clear_zero_bit_copies++;
2684 /* Count number of trailing 1's. */
2685 for (i = 0; i <= 31; i++)
2687 if ((remainder & (1 << i)) != 0)
2688 set_zero_bit_copies++;
2696 /* See if we can use movw. */
2697 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2700 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2705 /* See if we can do this by sign_extending a constant that is known
2706 to be negative. This is a good, way of doing it, since the shift
2707 may well merge into a subsequent insn. */
2708 if (set_sign_bit_copies > 1)
2710 if (const_ok_for_arm
2711 (temp1 = ARM_SIGN_EXTEND (remainder
2712 << (set_sign_bit_copies - 1))))
2716 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2717 emit_constant_insn (cond,
2718 gen_rtx_SET (VOIDmode, new_src,
2720 emit_constant_insn (cond,
2721 gen_ashrsi3 (target, new_src,
2722 GEN_INT (set_sign_bit_copies - 1)));
2726 /* For an inverted constant, we will need to set the low bits,
2727 these will be shifted out of harm's way. */
2728 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2729 if (const_ok_for_arm (~temp1))
2733 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2734 emit_constant_insn (cond,
2735 gen_rtx_SET (VOIDmode, new_src,
2737 emit_constant_insn (cond,
2738 gen_ashrsi3 (target, new_src,
2739 GEN_INT (set_sign_bit_copies - 1)));
2745 /* See if we can calculate the value as the difference between two
2746 valid immediates. */
2747 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2749 int topshift = clear_sign_bit_copies & ~1;
2751 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2752 & (0xff000000 >> topshift));
2754 /* If temp1 is zero, then that means the 9 most significant
2755 bits of remainder were 1 and we've caused it to overflow.
2756 When topshift is 0 we don't need to do anything since we
2757 can borrow from 'bit 32'. */
2758 if (temp1 == 0 && topshift != 0)
2759 temp1 = 0x80000000 >> (topshift - 1);
2761 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2763 if (const_ok_for_arm (temp2))
2767 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2768 emit_constant_insn (cond,
2769 gen_rtx_SET (VOIDmode, new_src,
2771 emit_constant_insn (cond,
2772 gen_addsi3 (target, new_src,
2780 /* See if we can generate this by setting the bottom (or the top)
2781 16 bits, and then shifting these into the other half of the
2782 word. We only look for the simplest cases, to do more would cost
2783 too much. Be careful, however, not to generate this when the
2784 alternative would take fewer insns. */
2785 if (val & 0xffff0000)
2787 temp1 = remainder & 0xffff0000;
2788 temp2 = remainder & 0x0000ffff;
2790 /* Overlaps outside this range are best done using other methods. */
2791 for (i = 9; i < 24; i++)
2793 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2794 && !const_ok_for_arm (temp2))
2796 rtx new_src = (subtargets
2797 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2799 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2800 source, subtargets, generate);
2808 gen_rtx_ASHIFT (mode, source,
2815 /* Don't duplicate cases already considered. */
2816 for (i = 17; i < 24; i++)
2818 if (((temp1 | (temp1 >> i)) == remainder)
2819 && !const_ok_for_arm (temp1))
2821 rtx new_src = (subtargets
2822 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2824 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2825 source, subtargets, generate);
2830 gen_rtx_SET (VOIDmode, target,
2833 gen_rtx_LSHIFTRT (mode, source,
2844 /* If we have IOR or XOR, and the constant can be loaded in a
2845 single instruction, and we can find a temporary to put it in,
2846 then this can be done in two instructions instead of 3-4. */
2848 /* TARGET can't be NULL if SUBTARGETS is 0 */
2849 || (reload_completed && !reg_mentioned_p (target, source)))
2851 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2855 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2857 emit_constant_insn (cond,
2858 gen_rtx_SET (VOIDmode, sub,
2860 emit_constant_insn (cond,
2861 gen_rtx_SET (VOIDmode, target,
2862 gen_rtx_fmt_ee (code, mode,
2873 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2874 and the remainder 0s for e.g. 0xfff00000)
2875 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2877 This can be done in 2 instructions by using shifts with mov or mvn.
2882 mvn r0, r0, lsr #12 */
2883 if (set_sign_bit_copies > 8
2884 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2888 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2889 rtx shift = GEN_INT (set_sign_bit_copies);
2893 gen_rtx_SET (VOIDmode, sub,
2895 gen_rtx_ASHIFT (mode,
2900 gen_rtx_SET (VOIDmode, target,
2902 gen_rtx_LSHIFTRT (mode, sub,
2909 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2911 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2913 For eg. r0 = r0 | 0xfff
2918 if (set_zero_bit_copies > 8
2919 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2923 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2924 rtx shift = GEN_INT (set_zero_bit_copies);
2928 gen_rtx_SET (VOIDmode, sub,
2930 gen_rtx_LSHIFTRT (mode,
2935 gen_rtx_SET (VOIDmode, target,
2937 gen_rtx_ASHIFT (mode, sub,
2943 /* This will never be reached for Thumb2 because orn is a valid
2944 instruction. This is for Thumb1 and the ARM 32 bit cases.
2946 x = y | constant (such that ~constant is a valid constant)
2948 x = ~(~y & ~constant).
2950 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2954 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2955 emit_constant_insn (cond,
2956 gen_rtx_SET (VOIDmode, sub,
2957 gen_rtx_NOT (mode, source)));
2960 sub = gen_reg_rtx (mode);
2961 emit_constant_insn (cond,
2962 gen_rtx_SET (VOIDmode, sub,
2963 gen_rtx_AND (mode, source,
2965 emit_constant_insn (cond,
2966 gen_rtx_SET (VOIDmode, target,
2967 gen_rtx_NOT (mode, sub)));
2974 /* See if two shifts will do 2 or more insn's worth of work. */
2975 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2977 HOST_WIDE_INT shift_mask = ((0xffffffff
2978 << (32 - clear_sign_bit_copies))
2981 if ((remainder | shift_mask) != 0xffffffff)
2985 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2986 insns = arm_gen_constant (AND, mode, cond,
2987 remainder | shift_mask,
2988 new_src, source, subtargets, 1);
2993 rtx targ = subtargets ? NULL_RTX : target;
2994 insns = arm_gen_constant (AND, mode, cond,
2995 remainder | shift_mask,
2996 targ, source, subtargets, 0);
3002 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3003 rtx shift = GEN_INT (clear_sign_bit_copies);
3005 emit_insn (gen_ashlsi3 (new_src, source, shift));
3006 emit_insn (gen_lshrsi3 (target, new_src, shift));
3012 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3014 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3016 if ((remainder | shift_mask) != 0xffffffff)
3020 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 insns = arm_gen_constant (AND, mode, cond,
3023 remainder | shift_mask,
3024 new_src, source, subtargets, 1);
3029 rtx targ = subtargets ? NULL_RTX : target;
3031 insns = arm_gen_constant (AND, mode, cond,
3032 remainder | shift_mask,
3033 targ, source, subtargets, 0);
3039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3040 rtx shift = GEN_INT (clear_zero_bit_copies);
3042 emit_insn (gen_lshrsi3 (new_src, source, shift));
3043 emit_insn (gen_ashlsi3 (target, new_src, shift));
3055 for (i = 0; i < 32; i++)
3056 if (remainder & (1 << i))
3060 || (code != IOR && can_invert && num_bits_set > 16))
3061 remainder ^= 0xffffffff;
3062 else if (code == PLUS && num_bits_set > 16)
3063 remainder = (-remainder) & 0xffffffff;
3065 /* For XOR, if more than half the bits are set and there's a sequence
3066 of more than 8 consecutive ones in the pattern then we can XOR by the
3067 inverted constant and then invert the final result; this may save an
3068 instruction and might also lead to the final mvn being merged with
3069 some other operation. */
3070 else if (code == XOR && num_bits_set > 16
3071 && (count_insns_for_constant (remainder ^ 0xffffffff,
3073 (remainder ^ 0xffffffff))
3074 < count_insns_for_constant (remainder,
3075 find_best_start (remainder))))
3077 remainder ^= 0xffffffff;
3086 /* Now try and find a way of doing the job in either two or three
3088 We start by looking for the largest block of zeros that are aligned on
3089 a 2-bit boundary, we then fill up the temps, wrapping around to the
3090 top of the word when we drop off the bottom.
3091 In the worst case this code should produce no more than four insns.
3092 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3093 best place to start. */
3095 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3098 /* Now start emitting the insns. */
3099 i = find_best_start (remainder);
3106 if (remainder & (3 << (i - 2)))
3111 temp1 = remainder & ((0x0ff << end)
3112 | ((i < end) ? (0xff >> (32 - end)) : 0));
3113 remainder &= ~temp1;
3117 rtx new_src, temp1_rtx;
3119 if (code == SET || code == MINUS)
3121 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3122 if (can_invert && code != MINUS)
3127 if ((final_invert || remainder) && subtargets)
3128 new_src = gen_reg_rtx (mode);
3133 else if (can_negate)
3137 temp1 = trunc_int_for_mode (temp1, mode);
3138 temp1_rtx = GEN_INT (temp1);
3142 else if (code == MINUS)
3143 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3145 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, new_src,
3158 else if (code == MINUS)
3164 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3174 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3175 gen_rtx_NOT (mode, source)));
3182 /* Canonicalize a comparison so that we are more likely to recognize it.
3183 This can be done for a few constant compares, where we can make the
3184 immediate value easier to load. */
3187 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3189 enum machine_mode mode;
3190 unsigned HOST_WIDE_INT i, maxval;
3192 mode = GET_MODE (*op0);
3193 if (mode == VOIDmode)
3194 mode = GET_MODE (*op1);
3196 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3198 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3199 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3200 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3201 for GTU/LEU in Thumb mode. */
3206 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3208 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3211 if (code == GT || code == LE
3212 || (!TARGET_ARM && (code == GTU || code == LEU)))
3214 /* Missing comparison. First try to use an available
3216 if (GET_CODE (*op1) == CONST_INT)
3224 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3226 *op1 = GEN_INT (i + 1);
3227 return code == GT ? GE : LT;
3232 if (i != ~((unsigned HOST_WIDE_INT) 0)
3233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3235 *op1 = GEN_INT (i + 1);
3236 return code == GTU ? GEU : LTU;
3244 /* If that did not work, reverse the condition. */
3248 return swap_condition (code);
3254 /* Comparisons smaller than DImode. Only adjust comparisons against
3255 an out-of-range constant. */
3256 if (GET_CODE (*op1) != CONST_INT
3257 || const_ok_for_arm (INTVAL (*op1))
3258 || const_ok_for_arm (- INTVAL (*op1)))
3272 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3274 *op1 = GEN_INT (i + 1);
3275 return code == GT ? GE : LT;
3282 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3284 *op1 = GEN_INT (i - 1);
3285 return code == GE ? GT : LE;
3291 if (i != ~((unsigned HOST_WIDE_INT) 0)
3292 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3294 *op1 = GEN_INT (i + 1);
3295 return code == GTU ? GEU : LTU;
3302 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3304 *op1 = GEN_INT (i - 1);
3305 return code == GEU ? GTU : LEU;
3317 /* Define how to find the value returned by a function. */
3320 arm_function_value(const_tree type, const_tree func,
3321 bool outgoing ATTRIBUTE_UNUSED)
3323 enum machine_mode mode;
3324 int unsignedp ATTRIBUTE_UNUSED;
3325 rtx r ATTRIBUTE_UNUSED;
3327 mode = TYPE_MODE (type);
3329 if (TARGET_AAPCS_BASED)
3330 return aapcs_allocate_return_reg (mode, type, func);
3332 /* Promote integer types. */
3333 if (INTEGRAL_TYPE_P (type))
3334 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3336 /* Promotes small structs returned in a register to full-word size
3337 for big-endian AAPCS. */
3338 if (arm_return_in_msb (type))
3340 HOST_WIDE_INT size = int_size_in_bytes (type);
3341 if (size % UNITS_PER_WORD != 0)
3343 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3344 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3348 return LIBCALL_VALUE (mode);
3352 libcall_eq (const void *p1, const void *p2)
3354 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3358 libcall_hash (const void *p1)
3360 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3364 add_libcall (htab_t htab, rtx libcall)
3366 *htab_find_slot (htab, libcall, INSERT) = libcall;
3370 arm_libcall_uses_aapcs_base (const_rtx libcall)
3372 static bool init_done = false;
3373 static htab_t libcall_htab;
3379 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3381 add_libcall (libcall_htab,
3382 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3383 add_libcall (libcall_htab,
3384 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3385 add_libcall (libcall_htab,
3386 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3387 add_libcall (libcall_htab,
3388 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3390 add_libcall (libcall_htab,
3391 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3392 add_libcall (libcall_htab,
3393 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3394 add_libcall (libcall_htab,
3395 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3396 add_libcall (libcall_htab,
3397 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3399 add_libcall (libcall_htab,
3400 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3401 add_libcall (libcall_htab,
3402 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3403 add_libcall (libcall_htab,
3404 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3405 add_libcall (libcall_htab,
3406 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3407 add_libcall (libcall_htab,
3408 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3409 add_libcall (libcall_htab,
3410 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3413 return libcall && htab_find (libcall_htab, libcall) != NULL;
3417 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3419 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3420 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3422 /* The following libcalls return their result in integer registers,
3423 even though they return a floating point value. */
3424 if (arm_libcall_uses_aapcs_base (libcall))
3425 return gen_rtx_REG (mode, ARG_REGISTER(1));
3429 return LIBCALL_VALUE (mode);
3432 /* Determine the amount of memory needed to store the possible return
3433 registers of an untyped call. */
3435 arm_apply_result_size (void)
3441 if (TARGET_HARD_FLOAT_ABI)
3447 if (TARGET_MAVERICK)
3450 if (TARGET_IWMMXT_ABI)
3457 /* Decide whether TYPE should be returned in memory (true)
3458 or in a register (false). FNTYPE is the type of the function making
3461 arm_return_in_memory (const_tree type, const_tree fntype)
3465 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3467 if (TARGET_AAPCS_BASED)
3469 /* Simple, non-aggregate types (ie not including vectors and
3470 complex) are always returned in a register (or registers).
3471 We don't care about which register here, so we can short-cut
3472 some of the detail. */
3473 if (!AGGREGATE_TYPE_P (type)
3474 && TREE_CODE (type) != VECTOR_TYPE
3475 && TREE_CODE (type) != COMPLEX_TYPE)
3478 /* Any return value that is no larger than one word can be
3480 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3483 /* Check any available co-processors to see if they accept the
3484 type as a register candidate (VFP, for example, can return
3485 some aggregates in consecutive registers). These aren't
3486 available if the call is variadic. */
3487 if (aapcs_select_return_coproc (type, fntype) >= 0)
3490 /* Vector values should be returned using ARM registers, not
3491 memory (unless they're over 16 bytes, which will break since
3492 we only have four call-clobbered registers to play with). */
3493 if (TREE_CODE (type) == VECTOR_TYPE)
3494 return (size < 0 || size > (4 * UNITS_PER_WORD));
3496 /* The rest go in memory. */
3500 if (TREE_CODE (type) == VECTOR_TYPE)
3501 return (size < 0 || size > (4 * UNITS_PER_WORD));
3503 if (!AGGREGATE_TYPE_P (type) &&
3504 (TREE_CODE (type) != VECTOR_TYPE))
3505 /* All simple types are returned in registers. */
3508 if (arm_abi != ARM_ABI_APCS)
3510 /* ATPCS and later return aggregate types in memory only if they are
3511 larger than a word (or are variable size). */
3512 return (size < 0 || size > UNITS_PER_WORD);
3515 /* For the arm-wince targets we choose to be compatible with Microsoft's
3516 ARM and Thumb compilers, which always return aggregates in memory. */
3518 /* All structures/unions bigger than one word are returned in memory.
3519 Also catch the case where int_size_in_bytes returns -1. In this case
3520 the aggregate is either huge or of variable size, and in either case
3521 we will want to return it via memory and not in a register. */
3522 if (size < 0 || size > UNITS_PER_WORD)
3525 if (TREE_CODE (type) == RECORD_TYPE)
3529 /* For a struct the APCS says that we only return in a register
3530 if the type is 'integer like' and every addressable element
3531 has an offset of zero. For practical purposes this means
3532 that the structure can have at most one non bit-field element
3533 and that this element must be the first one in the structure. */
3535 /* Find the first field, ignoring non FIELD_DECL things which will
3536 have been created by C++. */
3537 for (field = TYPE_FIELDS (type);
3538 field && TREE_CODE (field) != FIELD_DECL;
3539 field = DECL_CHAIN (field))
3543 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3545 /* Check that the first field is valid for returning in a register. */
3547 /* ... Floats are not allowed */
3548 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3551 /* ... Aggregates that are not themselves valid for returning in
3552 a register are not allowed. */
3553 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3556 /* Now check the remaining fields, if any. Only bitfields are allowed,
3557 since they are not addressable. */
3558 for (field = DECL_CHAIN (field);
3560 field = DECL_CHAIN (field))
3562 if (TREE_CODE (field) != FIELD_DECL)
3565 if (!DECL_BIT_FIELD_TYPE (field))
3572 if (TREE_CODE (type) == UNION_TYPE)
3576 /* Unions can be returned in registers if every element is
3577 integral, or can be returned in an integer register. */
3578 for (field = TYPE_FIELDS (type);
3580 field = DECL_CHAIN (field))
3582 if (TREE_CODE (field) != FIELD_DECL)
3585 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3588 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3594 #endif /* not ARM_WINCE */
3596 /* Return all other types in memory. */
3600 /* Indicate whether or not words of a double are in big-endian order. */
3603 arm_float_words_big_endian (void)
3605 if (TARGET_MAVERICK)
3608 /* For FPA, float words are always big-endian. For VFP, floats words
3609 follow the memory system mode. */
3617 return (TARGET_BIG_END ? 1 : 0);
3622 const struct pcs_attribute_arg
3626 } pcs_attribute_args[] =
3628 {"aapcs", ARM_PCS_AAPCS},
3629 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3631 /* We could recognize these, but changes would be needed elsewhere
3632 * to implement them. */
3633 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3634 {"atpcs", ARM_PCS_ATPCS},
3635 {"apcs", ARM_PCS_APCS},
3637 {NULL, ARM_PCS_UNKNOWN}
3641 arm_pcs_from_attribute (tree attr)
3643 const struct pcs_attribute_arg *ptr;
3646 /* Get the value of the argument. */
3647 if (TREE_VALUE (attr) == NULL_TREE
3648 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3649 return ARM_PCS_UNKNOWN;
3651 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3653 /* Check it against the list of known arguments. */
3654 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3655 if (streq (arg, ptr->arg))
3658 /* An unrecognized interrupt type. */
3659 return ARM_PCS_UNKNOWN;
3662 /* Get the PCS variant to use for this call. TYPE is the function's type
3663 specification, DECL is the specific declartion. DECL may be null if
3664 the call could be indirect or if this is a library call. */
3666 arm_get_pcs_model (const_tree type, const_tree decl)
3668 bool user_convention = false;
3669 enum arm_pcs user_pcs = arm_pcs_default;
3674 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3677 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3678 user_convention = true;
3681 if (TARGET_AAPCS_BASED)
3683 /* Detect varargs functions. These always use the base rules
3684 (no argument is ever a candidate for a co-processor
3686 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3687 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3688 != void_type_node));
3690 if (user_convention)
3692 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3693 sorry ("Non-AAPCS derived PCS variant");
3694 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3695 error ("Variadic functions must use the base AAPCS variant");
3699 return ARM_PCS_AAPCS;
3700 else if (user_convention)
3702 else if (decl && flag_unit_at_a_time)
3704 /* Local functions never leak outside this compilation unit,
3705 so we are free to use whatever conventions are
3707 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3708 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3710 return ARM_PCS_AAPCS_LOCAL;
3713 else if (user_convention && user_pcs != arm_pcs_default)
3714 sorry ("PCS variant");
3716 /* For everything else we use the target's default. */
3717 return arm_pcs_default;
3722 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3723 const_tree fntype ATTRIBUTE_UNUSED,
3724 rtx libcall ATTRIBUTE_UNUSED,
3725 const_tree fndecl ATTRIBUTE_UNUSED)
3727 /* Record the unallocated VFP registers. */
3728 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3729 pcum->aapcs_vfp_reg_alloc = 0;
3732 /* Walk down the type tree of TYPE counting consecutive base elements.
3733 If *MODEP is VOIDmode, then set it to the first valid floating point
3734 type. If a non-floating point type is found, or if a floating point
3735 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3736 otherwise return the count in the sub-tree. */
3738 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3740 enum machine_mode mode;
3743 switch (TREE_CODE (type))
3746 mode = TYPE_MODE (type);
3747 if (mode != DFmode && mode != SFmode)
3750 if (*modep == VOIDmode)
3759 mode = TYPE_MODE (TREE_TYPE (type));
3760 if (mode != DFmode && mode != SFmode)
3763 if (*modep == VOIDmode)
3772 /* Use V2SImode and V4SImode as representatives of all 64-bit
3773 and 128-bit vector types, whether or not those modes are
3774 supported with the present options. */
3775 size = int_size_in_bytes (type);
3788 if (*modep == VOIDmode)
3791 /* Vector modes are considered to be opaque: two vectors are
3792 equivalent for the purposes of being homogeneous aggregates
3793 if they are the same size. */
3802 tree index = TYPE_DOMAIN (type);
3804 /* Can't handle incomplete types. */
3805 if (!COMPLETE_TYPE_P(type))
3808 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3811 || !TYPE_MAX_VALUE (index)
3812 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3813 || !TYPE_MIN_VALUE (index)
3814 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3818 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3819 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3821 /* There must be no padding. */
3822 if (!host_integerp (TYPE_SIZE (type), 1)
3823 || (tree_low_cst (TYPE_SIZE (type), 1)
3824 != count * GET_MODE_BITSIZE (*modep)))
3836 /* Can't handle incomplete types. */
3837 if (!COMPLETE_TYPE_P(type))
3840 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3842 if (TREE_CODE (field) != FIELD_DECL)
3845 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3851 /* There must be no padding. */
3852 if (!host_integerp (TYPE_SIZE (type), 1)
3853 || (tree_low_cst (TYPE_SIZE (type), 1)
3854 != count * GET_MODE_BITSIZE (*modep)))
3861 case QUAL_UNION_TYPE:
3863 /* These aren't very interesting except in a degenerate case. */
3868 /* Can't handle incomplete types. */
3869 if (!COMPLETE_TYPE_P(type))
3872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3874 if (TREE_CODE (field) != FIELD_DECL)
3877 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3880 count = count > sub_count ? count : sub_count;
3883 /* There must be no padding. */
3884 if (!host_integerp (TYPE_SIZE (type), 1)
3885 || (tree_low_cst (TYPE_SIZE (type), 1)
3886 != count * GET_MODE_BITSIZE (*modep)))
3899 /* Return true if PCS_VARIANT should use VFP registers. */
3901 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3903 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3905 static bool seen_thumb1_vfp = false;
3907 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3909 sorry ("Thumb-1 hard-float VFP ABI");
3910 /* sorry() is not immediately fatal, so only display this once. */
3911 seen_thumb1_vfp = true;
3917 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3920 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3921 (TARGET_VFP_DOUBLE || !is_double));
3925 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3926 enum machine_mode mode, const_tree type,
3927 enum machine_mode *base_mode, int *count)
3929 enum machine_mode new_mode = VOIDmode;
3931 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3932 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3933 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3938 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3941 new_mode = (mode == DCmode ? DFmode : SFmode);
3943 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3945 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3947 if (ag_count > 0 && ag_count <= 4)
3956 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3959 *base_mode = new_mode;
3964 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3965 enum machine_mode mode, const_tree type)
3967 int count ATTRIBUTE_UNUSED;
3968 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3970 if (!use_vfp_abi (pcs_variant, false))
3972 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3977 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3980 if (!use_vfp_abi (pcum->pcs_variant, false))
3983 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3984 &pcum->aapcs_vfp_rmode,
3985 &pcum->aapcs_vfp_rcount);
3989 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3990 const_tree type ATTRIBUTE_UNUSED)
3992 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3993 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3996 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3997 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3999 pcum->aapcs_vfp_reg_alloc = mask << regno;
4000 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4003 int rcount = pcum->aapcs_vfp_rcount;
4005 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4009 /* Avoid using unsupported vector modes. */
4010 if (rmode == V2SImode)
4012 else if (rmode == V4SImode)
4019 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4020 for (i = 0; i < rcount; i++)
4022 rtx tmp = gen_rtx_REG (rmode,
4023 FIRST_VFP_REGNUM + regno + i * rshift);
4024 tmp = gen_rtx_EXPR_LIST
4026 GEN_INT (i * GET_MODE_SIZE (rmode)));
4027 XVECEXP (par, 0, i) = tmp;
4030 pcum->aapcs_reg = par;
4033 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4040 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4041 enum machine_mode mode,
4042 const_tree type ATTRIBUTE_UNUSED)
4044 if (!use_vfp_abi (pcs_variant, false))
4047 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4050 enum machine_mode ag_mode;
4055 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4060 if (ag_mode == V2SImode)
4062 else if (ag_mode == V4SImode)
4068 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4069 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4070 for (i = 0; i < count; i++)
4072 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4073 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4074 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4075 XVECEXP (par, 0, i) = tmp;
4081 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4085 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4086 enum machine_mode mode ATTRIBUTE_UNUSED,
4087 const_tree type ATTRIBUTE_UNUSED)
4089 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4090 pcum->aapcs_vfp_reg_alloc = 0;
4094 #define AAPCS_CP(X) \
4096 aapcs_ ## X ## _cum_init, \
4097 aapcs_ ## X ## _is_call_candidate, \
4098 aapcs_ ## X ## _allocate, \
4099 aapcs_ ## X ## _is_return_candidate, \
4100 aapcs_ ## X ## _allocate_return_reg, \
4101 aapcs_ ## X ## _advance \
4104 /* Table of co-processors that can be used to pass arguments in
4105 registers. Idealy no arugment should be a candidate for more than
4106 one co-processor table entry, but the table is processed in order
4107 and stops after the first match. If that entry then fails to put
4108 the argument into a co-processor register, the argument will go on
4112 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4113 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4115 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4116 BLKmode) is a candidate for this co-processor's registers; this
4117 function should ignore any position-dependent state in
4118 CUMULATIVE_ARGS and only use call-type dependent information. */
4119 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4121 /* Return true if the argument does get a co-processor register; it
4122 should set aapcs_reg to an RTX of the register allocated as is
4123 required for a return from FUNCTION_ARG. */
4124 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4126 /* Return true if a result of mode MODE (or type TYPE if MODE is
4127 BLKmode) is can be returned in this co-processor's registers. */
4128 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4130 /* Allocate and return an RTX element to hold the return type of a
4131 call, this routine must not fail and will only be called if
4132 is_return_candidate returned true with the same parameters. */
4133 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4135 /* Finish processing this argument and prepare to start processing
4137 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4138 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4146 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4151 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4152 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4159 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4161 /* We aren't passed a decl, so we can't check that a call is local.
4162 However, it isn't clear that that would be a win anyway, since it
4163 might limit some tail-calling opportunities. */
4164 enum arm_pcs pcs_variant;
4168 const_tree fndecl = NULL_TREE;
4170 if (TREE_CODE (fntype) == FUNCTION_DECL)
4173 fntype = TREE_TYPE (fntype);
4176 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4179 pcs_variant = arm_pcs_default;
4181 if (pcs_variant != ARM_PCS_AAPCS)
4185 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4186 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4195 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4198 /* We aren't passed a decl, so we can't check that a call is local.
4199 However, it isn't clear that that would be a win anyway, since it
4200 might limit some tail-calling opportunities. */
4201 enum arm_pcs pcs_variant;
4202 int unsignedp ATTRIBUTE_UNUSED;
4206 const_tree fndecl = NULL_TREE;
4208 if (TREE_CODE (fntype) == FUNCTION_DECL)
4211 fntype = TREE_TYPE (fntype);
4214 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4217 pcs_variant = arm_pcs_default;
4219 /* Promote integer types. */
4220 if (type && INTEGRAL_TYPE_P (type))
4221 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4223 if (pcs_variant != ARM_PCS_AAPCS)
4227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4228 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4230 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4234 /* Promotes small structs returned in a register to full-word size
4235 for big-endian AAPCS. */
4236 if (type && arm_return_in_msb (type))
4238 HOST_WIDE_INT size = int_size_in_bytes (type);
4239 if (size % UNITS_PER_WORD != 0)
4241 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4242 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4246 return gen_rtx_REG (mode, R0_REGNUM);
4250 aapcs_libcall_value (enum machine_mode mode)
4252 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4255 /* Lay out a function argument using the AAPCS rules. The rule
4256 numbers referred to here are those in the AAPCS. */
4258 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4259 tree type, int named)
4264 /* We only need to do this once per argument. */
4265 if (pcum->aapcs_arg_processed)
4268 pcum->aapcs_arg_processed = true;
4270 /* Special case: if named is false then we are handling an incoming
4271 anonymous argument which is on the stack. */
4275 /* Is this a potential co-processor register candidate? */
4276 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4278 int slot = aapcs_select_call_coproc (pcum, mode, type);
4279 pcum->aapcs_cprc_slot = slot;
4281 /* We don't have to apply any of the rules from part B of the
4282 preparation phase, these are handled elsewhere in the
4287 /* A Co-processor register candidate goes either in its own
4288 class of registers or on the stack. */
4289 if (!pcum->aapcs_cprc_failed[slot])
4291 /* C1.cp - Try to allocate the argument to co-processor
4293 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4296 /* C2.cp - Put the argument on the stack and note that we
4297 can't assign any more candidates in this slot. We also
4298 need to note that we have allocated stack space, so that
4299 we won't later try to split a non-cprc candidate between
4300 core registers and the stack. */
4301 pcum->aapcs_cprc_failed[slot] = true;
4302 pcum->can_split = false;
4305 /* We didn't get a register, so this argument goes on the
4307 gcc_assert (pcum->can_split == false);
4312 /* C3 - For double-word aligned arguments, round the NCRN up to the
4313 next even number. */
4314 ncrn = pcum->aapcs_ncrn;
4315 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4318 nregs = ARM_NUM_REGS2(mode, type);
4320 /* Sigh, this test should really assert that nregs > 0, but a GCC
4321 extension allows empty structs and then gives them empty size; it
4322 then allows such a structure to be passed by value. For some of
4323 the code below we have to pretend that such an argument has
4324 non-zero size so that we 'locate' it correctly either in
4325 registers or on the stack. */
4326 gcc_assert (nregs >= 0);
4328 nregs2 = nregs ? nregs : 1;
4330 /* C4 - Argument fits entirely in core registers. */
4331 if (ncrn + nregs2 <= NUM_ARG_REGS)
4333 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4334 pcum->aapcs_next_ncrn = ncrn + nregs;
4338 /* C5 - Some core registers left and there are no arguments already
4339 on the stack: split this argument between the remaining core
4340 registers and the stack. */
4341 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4343 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4344 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4345 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4349 /* C6 - NCRN is set to 4. */
4350 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4352 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4356 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4357 for a call to a function whose data type is FNTYPE.
4358 For a library call, FNTYPE is NULL. */
4360 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4362 tree fndecl ATTRIBUTE_UNUSED)
4364 /* Long call handling. */
4366 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4368 pcum->pcs_variant = arm_pcs_default;
4370 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4372 if (arm_libcall_uses_aapcs_base (libname))
4373 pcum->pcs_variant = ARM_PCS_AAPCS;
4375 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4376 pcum->aapcs_reg = NULL_RTX;
4377 pcum->aapcs_partial = 0;
4378 pcum->aapcs_arg_processed = false;
4379 pcum->aapcs_cprc_slot = -1;
4380 pcum->can_split = true;
4382 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4386 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4388 pcum->aapcs_cprc_failed[i] = false;
4389 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4397 /* On the ARM, the offset starts at 0. */
4399 pcum->iwmmxt_nregs = 0;
4400 pcum->can_split = true;
4402 /* Varargs vectors are treated the same as long long.
4403 named_count avoids having to change the way arm handles 'named' */
4404 pcum->named_count = 0;
4407 if (TARGET_REALLY_IWMMXT && fntype)
4411 for (fn_arg = TYPE_ARG_TYPES (fntype);
4413 fn_arg = TREE_CHAIN (fn_arg))
4414 pcum->named_count += 1;
4416 if (! pcum->named_count)
4417 pcum->named_count = INT_MAX;
4422 /* Return true if mode/type need doubleword alignment. */
4424 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4426 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4427 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4431 /* Determine where to put an argument to a function.
4432 Value is zero to push the argument on the stack,
4433 or a hard register in which to store the argument.
4435 MODE is the argument's machine mode.
4436 TYPE is the data type of the argument (as a tree).
4437 This is null for libcalls where that information may
4439 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4440 the preceding args and about the function being called.
4441 NAMED is nonzero if this argument is a named parameter
4442 (otherwise it is an extra parameter matching an ellipsis). */
4445 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4446 tree type, int named)
4450 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4451 a call insn (op3 of a call_value insn). */
4452 if (mode == VOIDmode)
4455 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4457 aapcs_layout_arg (pcum, mode, type, named);
4458 return pcum->aapcs_reg;
4461 /* Varargs vectors are treated the same as long long.
4462 named_count avoids having to change the way arm handles 'named' */
4463 if (TARGET_IWMMXT_ABI
4464 && arm_vector_mode_supported_p (mode)
4465 && pcum->named_count > pcum->nargs + 1)
4467 if (pcum->iwmmxt_nregs <= 9)
4468 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4471 pcum->can_split = false;
4476 /* Put doubleword aligned quantities in even register pairs. */
4478 && ARM_DOUBLEWORD_ALIGN
4479 && arm_needs_doubleword_align (mode, type))
4482 if (mode == VOIDmode)
4483 /* Pick an arbitrary value for operand 2 of the call insn. */
4486 /* Only allow splitting an arg between regs and memory if all preceding
4487 args were allocated to regs. For args passed by reference we only count
4488 the reference pointer. */
4489 if (pcum->can_split)
4492 nregs = ARM_NUM_REGS2 (mode, type);
4494 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4497 return gen_rtx_REG (mode, pcum->nregs);
4501 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4502 tree type, bool named)
4504 int nregs = pcum->nregs;
4506 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4508 aapcs_layout_arg (pcum, mode, type, named);
4509 return pcum->aapcs_partial;
4512 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4515 if (NUM_ARG_REGS > nregs
4516 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4518 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4524 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4525 tree type, bool named)
4527 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4529 aapcs_layout_arg (pcum, mode, type, named);
4531 if (pcum->aapcs_cprc_slot >= 0)
4533 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4535 pcum->aapcs_cprc_slot = -1;
4538 /* Generic stuff. */
4539 pcum->aapcs_arg_processed = false;
4540 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4541 pcum->aapcs_reg = NULL_RTX;
4542 pcum->aapcs_partial = 0;
4547 if (arm_vector_mode_supported_p (mode)
4548 && pcum->named_count > pcum->nargs
4549 && TARGET_IWMMXT_ABI)
4550 pcum->iwmmxt_nregs += 1;
4552 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4556 /* Variable sized types are passed by reference. This is a GCC
4557 extension to the ARM ABI. */
4560 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4561 enum machine_mode mode ATTRIBUTE_UNUSED,
4562 const_tree type, bool named ATTRIBUTE_UNUSED)
4564 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4567 /* Encode the current state of the #pragma [no_]long_calls. */
4570 OFF, /* No #pragma [no_]long_calls is in effect. */
4571 LONG, /* #pragma long_calls is in effect. */
4572 SHORT /* #pragma no_long_calls is in effect. */
4575 static arm_pragma_enum arm_pragma_long_calls = OFF;
4578 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4580 arm_pragma_long_calls = LONG;
4584 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4586 arm_pragma_long_calls = SHORT;
4590 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4592 arm_pragma_long_calls = OFF;
4595 /* Handle an attribute requiring a FUNCTION_DECL;
4596 arguments as in struct attribute_spec.handler. */
4598 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4599 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4601 if (TREE_CODE (*node) != FUNCTION_DECL)
4603 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4605 *no_add_attrs = true;
4611 /* Handle an "interrupt" or "isr" attribute;
4612 arguments as in struct attribute_spec.handler. */
4614 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4619 if (TREE_CODE (*node) != FUNCTION_DECL)
4621 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4623 *no_add_attrs = true;
4625 /* FIXME: the argument if any is checked for type attributes;
4626 should it be checked for decl ones? */
4630 if (TREE_CODE (*node) == FUNCTION_TYPE
4631 || TREE_CODE (*node) == METHOD_TYPE)
4633 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4635 warning (OPT_Wattributes, "%qE attribute ignored",
4637 *no_add_attrs = true;
4640 else if (TREE_CODE (*node) == POINTER_TYPE
4641 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4642 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4643 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4645 *node = build_variant_type_copy (*node);
4646 TREE_TYPE (*node) = build_type_attribute_variant
4648 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4649 *no_add_attrs = true;
4653 /* Possibly pass this attribute on from the type to a decl. */
4654 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4655 | (int) ATTR_FLAG_FUNCTION_NEXT
4656 | (int) ATTR_FLAG_ARRAY_NEXT))
4658 *no_add_attrs = true;
4659 return tree_cons (name, args, NULL_TREE);
4663 warning (OPT_Wattributes, "%qE attribute ignored",
4672 /* Handle a "pcs" attribute; arguments as in struct
4673 attribute_spec.handler. */
4675 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4678 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4680 warning (OPT_Wattributes, "%qE attribute ignored", name);
4681 *no_add_attrs = true;
4686 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4687 /* Handle the "notshared" attribute. This attribute is another way of
4688 requesting hidden visibility. ARM's compiler supports
4689 "__declspec(notshared)"; we support the same thing via an
4693 arm_handle_notshared_attribute (tree *node,
4694 tree name ATTRIBUTE_UNUSED,
4695 tree args ATTRIBUTE_UNUSED,
4696 int flags ATTRIBUTE_UNUSED,
4699 tree decl = TYPE_NAME (*node);
4703 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4704 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4705 *no_add_attrs = false;
4711 /* Return 0 if the attributes for two types are incompatible, 1 if they
4712 are compatible, and 2 if they are nearly compatible (which causes a
4713 warning to be generated). */
4715 arm_comp_type_attributes (const_tree type1, const_tree type2)
4719 /* Check for mismatch of non-default calling convention. */
4720 if (TREE_CODE (type1) != FUNCTION_TYPE)
4723 /* Check for mismatched call attributes. */
4724 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4725 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4726 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4727 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4729 /* Only bother to check if an attribute is defined. */
4730 if (l1 | l2 | s1 | s2)
4732 /* If one type has an attribute, the other must have the same attribute. */
4733 if ((l1 != l2) || (s1 != s2))
4736 /* Disallow mixed attributes. */
4737 if ((l1 & s2) || (l2 & s1))
4741 /* Check for mismatched ISR attribute. */
4742 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4744 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4745 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4747 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4754 /* Assigns default attributes to newly defined type. This is used to
4755 set short_call/long_call attributes for function types of
4756 functions defined inside corresponding #pragma scopes. */
4758 arm_set_default_type_attributes (tree type)
4760 /* Add __attribute__ ((long_call)) to all functions, when
4761 inside #pragma long_calls or __attribute__ ((short_call)),
4762 when inside #pragma no_long_calls. */
4763 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4765 tree type_attr_list, attr_name;
4766 type_attr_list = TYPE_ATTRIBUTES (type);
4768 if (arm_pragma_long_calls == LONG)
4769 attr_name = get_identifier ("long_call");
4770 else if (arm_pragma_long_calls == SHORT)
4771 attr_name = get_identifier ("short_call");
4775 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4776 TYPE_ATTRIBUTES (type) = type_attr_list;
4780 /* Return true if DECL is known to be linked into section SECTION. */
4783 arm_function_in_section_p (tree decl, section *section)
4785 /* We can only be certain about functions defined in the same
4786 compilation unit. */
4787 if (!TREE_STATIC (decl))
4790 /* Make sure that SYMBOL always binds to the definition in this
4791 compilation unit. */
4792 if (!targetm.binds_local_p (decl))
4795 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4796 if (!DECL_SECTION_NAME (decl))
4798 /* Make sure that we will not create a unique section for DECL. */
4799 if (flag_function_sections || DECL_ONE_ONLY (decl))
4803 return function_section (decl) == section;
4806 /* Return nonzero if a 32-bit "long_call" should be generated for
4807 a call from the current function to DECL. We generate a long_call
4810 a. has an __attribute__((long call))
4811 or b. is within the scope of a #pragma long_calls
4812 or c. the -mlong-calls command line switch has been specified
4814 However we do not generate a long call if the function:
4816 d. has an __attribute__ ((short_call))
4817 or e. is inside the scope of a #pragma no_long_calls
4818 or f. is defined in the same section as the current function. */
4821 arm_is_long_call_p (tree decl)
4826 return TARGET_LONG_CALLS;
4828 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4829 if (lookup_attribute ("short_call", attrs))
4832 /* For "f", be conservative, and only cater for cases in which the
4833 whole of the current function is placed in the same section. */
4834 if (!flag_reorder_blocks_and_partition
4835 && TREE_CODE (decl) == FUNCTION_DECL
4836 && arm_function_in_section_p (decl, current_function_section ()))
4839 if (lookup_attribute ("long_call", attrs))
4842 return TARGET_LONG_CALLS;
4845 /* Return nonzero if it is ok to make a tail-call to DECL. */
4847 arm_function_ok_for_sibcall (tree decl, tree exp)
4849 unsigned long func_type;
4851 if (cfun->machine->sibcall_blocked)
4854 /* Never tailcall something for which we have no decl, or if we
4855 are generating code for Thumb-1. */
4856 if (decl == NULL || TARGET_THUMB1)
4859 /* The PIC register is live on entry to VxWorks PLT entries, so we
4860 must make the call before restoring the PIC register. */
4861 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4864 /* Cannot tail-call to long calls, since these are out of range of
4865 a branch instruction. */
4866 if (arm_is_long_call_p (decl))
4869 /* If we are interworking and the function is not declared static
4870 then we can't tail-call it unless we know that it exists in this
4871 compilation unit (since it might be a Thumb routine). */
4872 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4875 func_type = arm_current_func_type ();
4876 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4877 if (IS_INTERRUPT (func_type))
4880 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4882 /* Check that the return value locations are the same. For
4883 example that we aren't returning a value from the sibling in
4884 a VFP register but then need to transfer it to a core
4888 a = arm_function_value (TREE_TYPE (exp), decl, false);
4889 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4891 if (!rtx_equal_p (a, b))
4895 /* Never tailcall if function may be called with a misaligned SP. */
4896 if (IS_STACKALIGN (func_type))
4899 /* Everything else is ok. */
4904 /* Addressing mode support functions. */
4906 /* Return nonzero if X is a legitimate immediate operand when compiling
4907 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4909 legitimate_pic_operand_p (rtx x)
4911 if (GET_CODE (x) == SYMBOL_REF
4912 || (GET_CODE (x) == CONST
4913 && GET_CODE (XEXP (x, 0)) == PLUS
4914 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4920 /* Record that the current function needs a PIC register. Initialize
4921 cfun->machine->pic_reg if we have not already done so. */
4924 require_pic_register (void)
4926 /* A lot of the logic here is made obscure by the fact that this
4927 routine gets called as part of the rtx cost estimation process.
4928 We don't want those calls to affect any assumptions about the real
4929 function; and further, we can't call entry_of_function() until we
4930 start the real expansion process. */
4931 if (!crtl->uses_pic_offset_table)
4933 gcc_assert (can_create_pseudo_p ());
4934 if (arm_pic_register != INVALID_REGNUM)
4936 if (!cfun->machine->pic_reg)
4937 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4939 /* Play games to avoid marking the function as needing pic
4940 if we are being called as part of the cost-estimation
4942 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4943 crtl->uses_pic_offset_table = 1;
4949 if (!cfun->machine->pic_reg)
4950 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4952 /* Play games to avoid marking the function as needing pic
4953 if we are being called as part of the cost-estimation
4955 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4957 crtl->uses_pic_offset_table = 1;
4960 arm_load_pic_register (0UL);
4964 /* We can be called during expansion of PHI nodes, where
4965 we can't yet emit instructions directly in the final
4966 insn stream. Queue the insns on the entry edge, they will
4967 be committed after everything else is expanded. */
4968 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4975 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4977 if (GET_CODE (orig) == SYMBOL_REF
4978 || GET_CODE (orig) == LABEL_REF)
4980 rtx pic_ref, address;
4985 gcc_assert (can_create_pseudo_p ());
4986 reg = gen_reg_rtx (Pmode);
4987 address = gen_reg_rtx (Pmode);
4992 /* VxWorks does not impose a fixed gap between segments; the run-time
4993 gap can be different from the object-file gap. We therefore can't
4994 use GOTOFF unless we are absolutely sure that the symbol is in the
4995 same segment as the GOT. Unfortunately, the flexibility of linker
4996 scripts means that we can't be sure of that in general, so assume
4997 that GOTOFF is never valid on VxWorks. */
4998 if ((GET_CODE (orig) == LABEL_REF
4999 || (GET_CODE (orig) == SYMBOL_REF &&
5000 SYMBOL_REF_LOCAL_P (orig)))
5002 && !TARGET_VXWORKS_RTP)
5003 insn = arm_pic_static_addr (orig, reg);
5006 /* If this function doesn't have a pic register, create one now. */
5007 require_pic_register ();
5010 emit_insn (gen_pic_load_addr_32bit (address, orig));
5011 else /* TARGET_THUMB1 */
5012 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
5014 pic_ref = gen_const_mem (Pmode,
5015 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
5017 insn = emit_move_insn (reg, pic_ref);
5020 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5022 set_unique_reg_note (insn, REG_EQUAL, orig);
5026 else if (GET_CODE (orig) == CONST)
5030 if (GET_CODE (XEXP (orig, 0)) == PLUS
5031 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5034 /* Handle the case where we have: const (UNSPEC_TLS). */
5035 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5036 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5039 /* Handle the case where we have:
5040 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5042 if (GET_CODE (XEXP (orig, 0)) == PLUS
5043 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5044 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5046 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5052 gcc_assert (can_create_pseudo_p ());
5053 reg = gen_reg_rtx (Pmode);
5056 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5058 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5059 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5060 base == reg ? 0 : reg);
5062 if (GET_CODE (offset) == CONST_INT)
5064 /* The base register doesn't really matter, we only want to
5065 test the index for the appropriate mode. */
5066 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5068 gcc_assert (can_create_pseudo_p ());
5069 offset = force_reg (Pmode, offset);
5072 if (GET_CODE (offset) == CONST_INT)
5073 return plus_constant (base, INTVAL (offset));
5076 if (GET_MODE_SIZE (mode) > 4
5077 && (GET_MODE_CLASS (mode) == MODE_INT
5078 || TARGET_SOFT_FLOAT))
5080 emit_insn (gen_addsi3 (reg, base, offset));
5084 return gen_rtx_PLUS (Pmode, base, offset);
5091 /* Find a spare register to use during the prolog of a function. */
5094 thumb_find_work_register (unsigned long pushed_regs_mask)
5098 /* Check the argument registers first as these are call-used. The
5099 register allocation order means that sometimes r3 might be used
5100 but earlier argument registers might not, so check them all. */
5101 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5102 if (!df_regs_ever_live_p (reg))
5105 /* Before going on to check the call-saved registers we can try a couple
5106 more ways of deducing that r3 is available. The first is when we are
5107 pushing anonymous arguments onto the stack and we have less than 4
5108 registers worth of fixed arguments(*). In this case r3 will be part of
5109 the variable argument list and so we can be sure that it will be
5110 pushed right at the start of the function. Hence it will be available
5111 for the rest of the prologue.
5112 (*): ie crtl->args.pretend_args_size is greater than 0. */
5113 if (cfun->machine->uses_anonymous_args
5114 && crtl->args.pretend_args_size > 0)
5115 return LAST_ARG_REGNUM;
5117 /* The other case is when we have fixed arguments but less than 4 registers
5118 worth. In this case r3 might be used in the body of the function, but
5119 it is not being used to convey an argument into the function. In theory
5120 we could just check crtl->args.size to see how many bytes are
5121 being passed in argument registers, but it seems that it is unreliable.
5122 Sometimes it will have the value 0 when in fact arguments are being
5123 passed. (See testcase execute/20021111-1.c for an example). So we also
5124 check the args_info.nregs field as well. The problem with this field is
5125 that it makes no allowances for arguments that are passed to the
5126 function but which are not used. Hence we could miss an opportunity
5127 when a function has an unused argument in r3. But it is better to be
5128 safe than to be sorry. */
5129 if (! cfun->machine->uses_anonymous_args
5130 && crtl->args.size >= 0
5131 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5132 && crtl->args.info.nregs < 4)
5133 return LAST_ARG_REGNUM;
5135 /* Otherwise look for a call-saved register that is going to be pushed. */
5136 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5137 if (pushed_regs_mask & (1 << reg))
5142 /* Thumb-2 can use high regs. */
5143 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5144 if (pushed_regs_mask & (1 << reg))
5147 /* Something went wrong - thumb_compute_save_reg_mask()
5148 should have arranged for a suitable register to be pushed. */
5152 static GTY(()) int pic_labelno;
5154 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5158 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5160 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5162 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5165 gcc_assert (flag_pic);
5167 pic_reg = cfun->machine->pic_reg;
5168 if (TARGET_VXWORKS_RTP)
5170 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5171 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5172 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5174 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5176 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5177 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5181 /* We use an UNSPEC rather than a LABEL_REF because this label
5182 never appears in the code stream. */
5184 labelno = GEN_INT (pic_labelno++);
5185 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5186 l1 = gen_rtx_CONST (VOIDmode, l1);
5188 /* On the ARM the PC register contains 'dot + 8' at the time of the
5189 addition, on the Thumb it is 'dot + 4'. */
5190 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5191 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5193 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5197 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5199 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5201 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5203 else /* TARGET_THUMB1 */
5205 if (arm_pic_register != INVALID_REGNUM
5206 && REGNO (pic_reg) > LAST_LO_REGNUM)
5208 /* We will have pushed the pic register, so we should always be
5209 able to find a work register. */
5210 pic_tmp = gen_rtx_REG (SImode,
5211 thumb_find_work_register (saved_regs));
5212 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5213 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5216 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5217 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5221 /* Need to emit this whether or not we obey regdecls,
5222 since setjmp/longjmp can cause life info to screw up. */
5226 /* Generate code to load the address of a static var when flag_pic is set. */
5228 arm_pic_static_addr (rtx orig, rtx reg)
5230 rtx l1, labelno, offset_rtx, insn;
5232 gcc_assert (flag_pic);
5234 /* We use an UNSPEC rather than a LABEL_REF because this label
5235 never appears in the code stream. */
5236 labelno = GEN_INT (pic_labelno++);
5237 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5238 l1 = gen_rtx_CONST (VOIDmode, l1);
5240 /* On the ARM the PC register contains 'dot + 8' at the time of the
5241 addition, on the Thumb it is 'dot + 4'. */
5242 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5243 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5244 UNSPEC_SYMBOL_OFFSET);
5245 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5249 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5251 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5253 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5255 else /* TARGET_THUMB1 */
5257 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5258 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5264 /* Return nonzero if X is valid as an ARM state addressing register. */
5266 arm_address_register_rtx_p (rtx x, int strict_p)
5270 if (GET_CODE (x) != REG)
5276 return ARM_REGNO_OK_FOR_BASE_P (regno);
5278 return (regno <= LAST_ARM_REGNUM
5279 || regno >= FIRST_PSEUDO_REGISTER
5280 || regno == FRAME_POINTER_REGNUM
5281 || regno == ARG_POINTER_REGNUM);
5284 /* Return TRUE if this rtx is the difference of a symbol and a label,
5285 and will reduce to a PC-relative relocation in the object file.
5286 Expressions like this can be left alone when generating PIC, rather
5287 than forced through the GOT. */
5289 pcrel_constant_p (rtx x)
5291 if (GET_CODE (x) == MINUS)
5292 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5297 /* Return nonzero if X is a valid ARM state address operand. */
5299 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5303 enum rtx_code code = GET_CODE (x);
5305 if (arm_address_register_rtx_p (x, strict_p))
5308 use_ldrd = (TARGET_LDRD
5310 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5312 if (code == POST_INC || code == PRE_DEC
5313 || ((code == PRE_INC || code == POST_DEC)
5314 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5315 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5317 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5318 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5319 && GET_CODE (XEXP (x, 1)) == PLUS
5320 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5322 rtx addend = XEXP (XEXP (x, 1), 1);
5324 /* Don't allow ldrd post increment by register because it's hard
5325 to fixup invalid register choices. */
5327 && GET_CODE (x) == POST_MODIFY
5328 && GET_CODE (addend) == REG)
5331 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5332 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5335 /* After reload constants split into minipools will have addresses
5336 from a LABEL_REF. */
5337 else if (reload_completed
5338 && (code == LABEL_REF
5340 && GET_CODE (XEXP (x, 0)) == PLUS
5341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5345 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5348 else if (code == PLUS)
5350 rtx xop0 = XEXP (x, 0);
5351 rtx xop1 = XEXP (x, 1);
5353 return ((arm_address_register_rtx_p (xop0, strict_p)
5354 && GET_CODE(xop1) == CONST_INT
5355 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5356 || (arm_address_register_rtx_p (xop1, strict_p)
5357 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5361 /* Reload currently can't handle MINUS, so disable this for now */
5362 else if (GET_CODE (x) == MINUS)
5364 rtx xop0 = XEXP (x, 0);
5365 rtx xop1 = XEXP (x, 1);
5367 return (arm_address_register_rtx_p (xop0, strict_p)
5368 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5372 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5373 && code == SYMBOL_REF
5374 && CONSTANT_POOL_ADDRESS_P (x)
5376 && symbol_mentioned_p (get_pool_constant (x))
5377 && ! pcrel_constant_p (get_pool_constant (x))))
5383 /* Return nonzero if X is a valid Thumb-2 address operand. */
5385 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5388 enum rtx_code code = GET_CODE (x);
5390 if (arm_address_register_rtx_p (x, strict_p))
5393 use_ldrd = (TARGET_LDRD
5395 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5397 if (code == POST_INC || code == PRE_DEC
5398 || ((code == PRE_INC || code == POST_DEC)
5399 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5400 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5402 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5403 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5404 && GET_CODE (XEXP (x, 1)) == PLUS
5405 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5407 /* Thumb-2 only has autoincrement by constant. */
5408 rtx addend = XEXP (XEXP (x, 1), 1);
5409 HOST_WIDE_INT offset;
5411 if (GET_CODE (addend) != CONST_INT)
5414 offset = INTVAL(addend);
5415 if (GET_MODE_SIZE (mode) <= 4)
5416 return (offset > -256 && offset < 256);
5418 return (use_ldrd && offset > -1024 && offset < 1024
5419 && (offset & 3) == 0);
5422 /* After reload constants split into minipools will have addresses
5423 from a LABEL_REF. */
5424 else if (reload_completed
5425 && (code == LABEL_REF
5427 && GET_CODE (XEXP (x, 0)) == PLUS
5428 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5429 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5432 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5435 else if (code == PLUS)
5437 rtx xop0 = XEXP (x, 0);
5438 rtx xop1 = XEXP (x, 1);
5440 return ((arm_address_register_rtx_p (xop0, strict_p)
5441 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5442 || (arm_address_register_rtx_p (xop1, strict_p)
5443 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5446 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5447 && code == SYMBOL_REF
5448 && CONSTANT_POOL_ADDRESS_P (x)
5450 && symbol_mentioned_p (get_pool_constant (x))
5451 && ! pcrel_constant_p (get_pool_constant (x))))
5457 /* Return nonzero if INDEX is valid for an address index operand in
5460 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5463 HOST_WIDE_INT range;
5464 enum rtx_code code = GET_CODE (index);
5466 /* Standard coprocessor addressing modes. */
5467 if (TARGET_HARD_FLOAT
5468 && (TARGET_FPA || TARGET_MAVERICK)
5469 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5470 || (TARGET_MAVERICK && mode == DImode)))
5471 return (code == CONST_INT && INTVAL (index) < 1024
5472 && INTVAL (index) > -1024
5473 && (INTVAL (index) & 3) == 0);
5476 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5477 return (code == CONST_INT
5478 && INTVAL (index) < 1016
5479 && INTVAL (index) > -1024
5480 && (INTVAL (index) & 3) == 0);
5482 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5483 return (code == CONST_INT
5484 && INTVAL (index) < 1024
5485 && INTVAL (index) > -1024
5486 && (INTVAL (index) & 3) == 0);
5488 if (arm_address_register_rtx_p (index, strict_p)
5489 && (GET_MODE_SIZE (mode) <= 4))
5492 if (mode == DImode || mode == DFmode)
5494 if (code == CONST_INT)
5496 HOST_WIDE_INT val = INTVAL (index);
5499 return val > -256 && val < 256;
5501 return val > -4096 && val < 4092;
5504 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5507 if (GET_MODE_SIZE (mode) <= 4
5511 || (mode == QImode && outer == SIGN_EXTEND))))
5515 rtx xiop0 = XEXP (index, 0);
5516 rtx xiop1 = XEXP (index, 1);
5518 return ((arm_address_register_rtx_p (xiop0, strict_p)
5519 && power_of_two_operand (xiop1, SImode))
5520 || (arm_address_register_rtx_p (xiop1, strict_p)
5521 && power_of_two_operand (xiop0, SImode)));
5523 else if (code == LSHIFTRT || code == ASHIFTRT
5524 || code == ASHIFT || code == ROTATERT)
5526 rtx op = XEXP (index, 1);
5528 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5529 && GET_CODE (op) == CONST_INT
5531 && INTVAL (op) <= 31);
5535 /* For ARM v4 we may be doing a sign-extend operation during the
5541 || (outer == SIGN_EXTEND && mode == QImode))
5547 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5549 return (code == CONST_INT
5550 && INTVAL (index) < range
5551 && INTVAL (index) > -range);
5554 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5555 index operand. i.e. 1, 2, 4 or 8. */
5557 thumb2_index_mul_operand (rtx op)
5561 if (GET_CODE(op) != CONST_INT)
5565 return (val == 1 || val == 2 || val == 4 || val == 8);
5568 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5570 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5572 enum rtx_code code = GET_CODE (index);
5574 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5575 /* Standard coprocessor addressing modes. */
5576 if (TARGET_HARD_FLOAT
5577 && (TARGET_FPA || TARGET_MAVERICK)
5578 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5579 || (TARGET_MAVERICK && mode == DImode)))
5580 return (code == CONST_INT && INTVAL (index) < 1024
5581 && INTVAL (index) > -1024
5582 && (INTVAL (index) & 3) == 0);
5584 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5586 /* For DImode assume values will usually live in core regs
5587 and only allow LDRD addressing modes. */
5588 if (!TARGET_LDRD || mode != DImode)
5589 return (code == CONST_INT
5590 && INTVAL (index) < 1024
5591 && INTVAL (index) > -1024
5592 && (INTVAL (index) & 3) == 0);
5596 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5597 return (code == CONST_INT
5598 && INTVAL (index) < 1016
5599 && INTVAL (index) > -1024
5600 && (INTVAL (index) & 3) == 0);
5602 if (arm_address_register_rtx_p (index, strict_p)
5603 && (GET_MODE_SIZE (mode) <= 4))
5606 if (mode == DImode || mode == DFmode)
5608 if (code == CONST_INT)
5610 HOST_WIDE_INT val = INTVAL (index);
5611 /* ??? Can we assume ldrd for thumb2? */
5612 /* Thumb-2 ldrd only has reg+const addressing modes. */
5613 /* ldrd supports offsets of +-1020.
5614 However the ldr fallback does not. */
5615 return val > -256 && val < 256 && (val & 3) == 0;
5623 rtx xiop0 = XEXP (index, 0);
5624 rtx xiop1 = XEXP (index, 1);
5626 return ((arm_address_register_rtx_p (xiop0, strict_p)
5627 && thumb2_index_mul_operand (xiop1))
5628 || (arm_address_register_rtx_p (xiop1, strict_p)
5629 && thumb2_index_mul_operand (xiop0)));
5631 else if (code == ASHIFT)
5633 rtx op = XEXP (index, 1);
5635 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5636 && GET_CODE (op) == CONST_INT
5638 && INTVAL (op) <= 3);
5641 return (code == CONST_INT
5642 && INTVAL (index) < 4096
5643 && INTVAL (index) > -256);
5646 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5648 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5652 if (GET_CODE (x) != REG)
5658 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5660 return (regno <= LAST_LO_REGNUM
5661 || regno > LAST_VIRTUAL_REGISTER
5662 || regno == FRAME_POINTER_REGNUM
5663 || (GET_MODE_SIZE (mode) >= 4
5664 && (regno == STACK_POINTER_REGNUM
5665 || regno >= FIRST_PSEUDO_REGISTER
5666 || x == hard_frame_pointer_rtx
5667 || x == arg_pointer_rtx)));
5670 /* Return nonzero if x is a legitimate index register. This is the case
5671 for any base register that can access a QImode object. */
5673 thumb1_index_register_rtx_p (rtx x, int strict_p)
5675 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5678 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5680 The AP may be eliminated to either the SP or the FP, so we use the
5681 least common denominator, e.g. SImode, and offsets from 0 to 64.
5683 ??? Verify whether the above is the right approach.
5685 ??? Also, the FP may be eliminated to the SP, so perhaps that
5686 needs special handling also.
5688 ??? Look at how the mips16 port solves this problem. It probably uses
5689 better ways to solve some of these problems.
5691 Although it is not incorrect, we don't accept QImode and HImode
5692 addresses based on the frame pointer or arg pointer until the
5693 reload pass starts. This is so that eliminating such addresses
5694 into stack based ones won't produce impossible code. */
5696 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5698 /* ??? Not clear if this is right. Experiment. */
5699 if (GET_MODE_SIZE (mode) < 4
5700 && !(reload_in_progress || reload_completed)
5701 && (reg_mentioned_p (frame_pointer_rtx, x)
5702 || reg_mentioned_p (arg_pointer_rtx, x)
5703 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5704 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5705 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5706 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5709 /* Accept any base register. SP only in SImode or larger. */
5710 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5713 /* This is PC relative data before arm_reorg runs. */
5714 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5715 && GET_CODE (x) == SYMBOL_REF
5716 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5719 /* This is PC relative data after arm_reorg runs. */
5720 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5722 && (GET_CODE (x) == LABEL_REF
5723 || (GET_CODE (x) == CONST
5724 && GET_CODE (XEXP (x, 0)) == PLUS
5725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5726 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5729 /* Post-inc indexing only supported for SImode and larger. */
5730 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5731 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5734 else if (GET_CODE (x) == PLUS)
5736 /* REG+REG address can be any two index registers. */
5737 /* We disallow FRAME+REG addressing since we know that FRAME
5738 will be replaced with STACK, and SP relative addressing only
5739 permits SP+OFFSET. */
5740 if (GET_MODE_SIZE (mode) <= 4
5741 && XEXP (x, 0) != frame_pointer_rtx
5742 && XEXP (x, 1) != frame_pointer_rtx
5743 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5744 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5747 /* REG+const has 5-7 bit offset for non-SP registers. */
5748 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5749 || XEXP (x, 0) == arg_pointer_rtx)
5750 && GET_CODE (XEXP (x, 1)) == CONST_INT
5751 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5754 /* REG+const has 10-bit offset for SP, but only SImode and
5755 larger is supported. */
5756 /* ??? Should probably check for DI/DFmode overflow here
5757 just like GO_IF_LEGITIMATE_OFFSET does. */
5758 else if (GET_CODE (XEXP (x, 0)) == REG
5759 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5760 && GET_MODE_SIZE (mode) >= 4
5761 && GET_CODE (XEXP (x, 1)) == CONST_INT
5762 && INTVAL (XEXP (x, 1)) >= 0
5763 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5764 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5767 else if (GET_CODE (XEXP (x, 0)) == REG
5768 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5769 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5770 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5771 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5772 && GET_MODE_SIZE (mode) >= 4
5773 && GET_CODE (XEXP (x, 1)) == CONST_INT
5774 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5778 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5779 && GET_MODE_SIZE (mode) == 4
5780 && GET_CODE (x) == SYMBOL_REF
5781 && CONSTANT_POOL_ADDRESS_P (x)
5783 && symbol_mentioned_p (get_pool_constant (x))
5784 && ! pcrel_constant_p (get_pool_constant (x))))
5790 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5791 instruction of mode MODE. */
5793 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5795 switch (GET_MODE_SIZE (mode))
5798 return val >= 0 && val < 32;
5801 return val >= 0 && val < 64 && (val & 1) == 0;
5805 && (val + GET_MODE_SIZE (mode)) <= 128
5811 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5814 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5815 else if (TARGET_THUMB2)
5816 return thumb2_legitimate_address_p (mode, x, strict_p);
5817 else /* if (TARGET_THUMB1) */
5818 return thumb1_legitimate_address_p (mode, x, strict_p);
5821 /* Build the SYMBOL_REF for __tls_get_addr. */
5823 static GTY(()) rtx tls_get_addr_libfunc;
5826 get_tls_get_addr (void)
5828 if (!tls_get_addr_libfunc)
5829 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5830 return tls_get_addr_libfunc;
5834 arm_load_tp (rtx target)
5837 target = gen_reg_rtx (SImode);
5841 /* Can return in any reg. */
5842 emit_insn (gen_load_tp_hard (target));
5846 /* Always returned in r0. Immediately copy the result into a pseudo,
5847 otherwise other uses of r0 (e.g. setting up function arguments) may
5848 clobber the value. */
5852 emit_insn (gen_load_tp_soft ());
5854 tmp = gen_rtx_REG (SImode, 0);
5855 emit_move_insn (target, tmp);
5861 load_tls_operand (rtx x, rtx reg)
5865 if (reg == NULL_RTX)
5866 reg = gen_reg_rtx (SImode);
5868 tmp = gen_rtx_CONST (SImode, x);
5870 emit_move_insn (reg, tmp);
5876 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5878 rtx insns, label, labelno, sum;
5882 labelno = GEN_INT (pic_labelno++);
5883 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5884 label = gen_rtx_CONST (VOIDmode, label);
5886 sum = gen_rtx_UNSPEC (Pmode,
5887 gen_rtvec (4, x, GEN_INT (reloc), label,
5888 GEN_INT (TARGET_ARM ? 8 : 4)),
5890 reg = load_tls_operand (sum, reg);
5893 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5894 else if (TARGET_THUMB2)
5895 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5896 else /* TARGET_THUMB1 */
5897 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5899 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5900 Pmode, 1, reg, Pmode);
5902 insns = get_insns ();
5909 legitimize_tls_address (rtx x, rtx reg)
5911 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5912 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5916 case TLS_MODEL_GLOBAL_DYNAMIC:
5917 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5918 dest = gen_reg_rtx (Pmode);
5919 emit_libcall_block (insns, dest, ret, x);
5922 case TLS_MODEL_LOCAL_DYNAMIC:
5923 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5925 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5926 share the LDM result with other LD model accesses. */
5927 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5929 dest = gen_reg_rtx (Pmode);
5930 emit_libcall_block (insns, dest, ret, eqv);
5932 /* Load the addend. */
5933 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5935 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5936 return gen_rtx_PLUS (Pmode, dest, addend);
5938 case TLS_MODEL_INITIAL_EXEC:
5939 labelno = GEN_INT (pic_labelno++);
5940 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5941 label = gen_rtx_CONST (VOIDmode, label);
5942 sum = gen_rtx_UNSPEC (Pmode,
5943 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5944 GEN_INT (TARGET_ARM ? 8 : 4)),
5946 reg = load_tls_operand (sum, reg);
5949 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5950 else if (TARGET_THUMB2)
5951 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5954 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5955 emit_move_insn (reg, gen_const_mem (SImode, reg));
5958 tp = arm_load_tp (NULL_RTX);
5960 return gen_rtx_PLUS (Pmode, tp, reg);
5962 case TLS_MODEL_LOCAL_EXEC:
5963 tp = arm_load_tp (NULL_RTX);
5965 reg = gen_rtx_UNSPEC (Pmode,
5966 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5968 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5970 return gen_rtx_PLUS (Pmode, tp, reg);
5977 /* Try machine-dependent ways of modifying an illegitimate address
5978 to be legitimate. If we find one, return the new, valid address. */
5980 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5984 /* TODO: legitimize_address for Thumb2. */
5987 return thumb_legitimize_address (x, orig_x, mode);
5990 if (arm_tls_symbol_p (x))
5991 return legitimize_tls_address (x, NULL_RTX);
5993 if (GET_CODE (x) == PLUS)
5995 rtx xop0 = XEXP (x, 0);
5996 rtx xop1 = XEXP (x, 1);
5998 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5999 xop0 = force_reg (SImode, xop0);
6001 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6002 xop1 = force_reg (SImode, xop1);
6004 if (ARM_BASE_REGISTER_RTX_P (xop0)
6005 && GET_CODE (xop1) == CONST_INT)
6007 HOST_WIDE_INT n, low_n;
6011 /* VFP addressing modes actually allow greater offsets, but for
6012 now we just stick with the lowest common denominator. */
6014 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6026 low_n = ((mode) == TImode ? 0
6027 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6031 base_reg = gen_reg_rtx (SImode);
6032 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6033 emit_move_insn (base_reg, val);
6034 x = plus_constant (base_reg, low_n);
6036 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6037 x = gen_rtx_PLUS (SImode, xop0, xop1);
6040 /* XXX We don't allow MINUS any more -- see comment in
6041 arm_legitimate_address_outer_p (). */
6042 else if (GET_CODE (x) == MINUS)
6044 rtx xop0 = XEXP (x, 0);
6045 rtx xop1 = XEXP (x, 1);
6047 if (CONSTANT_P (xop0))
6048 xop0 = force_reg (SImode, xop0);
6050 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6051 xop1 = force_reg (SImode, xop1);
6053 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6054 x = gen_rtx_MINUS (SImode, xop0, xop1);
6057 /* Make sure to take full advantage of the pre-indexed addressing mode
6058 with absolute addresses which often allows for the base register to
6059 be factorized for multiple adjacent memory references, and it might
6060 even allows for the mini pool to be avoided entirely. */
6061 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6064 HOST_WIDE_INT mask, base, index;
6067 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6068 use a 8-bit index. So let's use a 12-bit index for SImode only and
6069 hope that arm_gen_constant will enable ldrb to use more bits. */
6070 bits = (mode == SImode) ? 12 : 8;
6071 mask = (1 << bits) - 1;
6072 base = INTVAL (x) & ~mask;
6073 index = INTVAL (x) & mask;
6074 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6076 /* It'll most probably be more efficient to generate the base
6077 with more bits set and use a negative index instead. */
6081 base_reg = force_reg (SImode, GEN_INT (base));
6082 x = plus_constant (base_reg, index);
6087 /* We need to find and carefully transform any SYMBOL and LABEL
6088 references; so go back to the original address expression. */
6089 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6091 if (new_x != orig_x)
6099 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6100 to be legitimate. If we find one, return the new, valid address. */
6102 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6104 if (arm_tls_symbol_p (x))
6105 return legitimize_tls_address (x, NULL_RTX);
6107 if (GET_CODE (x) == PLUS
6108 && GET_CODE (XEXP (x, 1)) == CONST_INT
6109 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6110 || INTVAL (XEXP (x, 1)) < 0))
6112 rtx xop0 = XEXP (x, 0);
6113 rtx xop1 = XEXP (x, 1);
6114 HOST_WIDE_INT offset = INTVAL (xop1);
6116 /* Try and fold the offset into a biasing of the base register and
6117 then offsetting that. Don't do this when optimizing for space
6118 since it can cause too many CSEs. */
6119 if (optimize_size && offset >= 0
6120 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6122 HOST_WIDE_INT delta;
6125 delta = offset - (256 - GET_MODE_SIZE (mode));
6126 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6127 delta = 31 * GET_MODE_SIZE (mode);
6129 delta = offset & (~31 * GET_MODE_SIZE (mode));
6131 xop0 = force_operand (plus_constant (xop0, offset - delta),
6133 x = plus_constant (xop0, delta);
6135 else if (offset < 0 && offset > -256)
6136 /* Small negative offsets are best done with a subtract before the
6137 dereference, forcing these into a register normally takes two
6139 x = force_operand (x, NULL_RTX);
6142 /* For the remaining cases, force the constant into a register. */
6143 xop1 = force_reg (SImode, xop1);
6144 x = gen_rtx_PLUS (SImode, xop0, xop1);
6147 else if (GET_CODE (x) == PLUS
6148 && s_register_operand (XEXP (x, 1), SImode)
6149 && !s_register_operand (XEXP (x, 0), SImode))
6151 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6153 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6158 /* We need to find and carefully transform any SYMBOL and LABEL
6159 references; so go back to the original address expression. */
6160 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6162 if (new_x != orig_x)
6170 thumb_legitimize_reload_address (rtx *x_p,
6171 enum machine_mode mode,
6172 int opnum, int type,
6173 int ind_levels ATTRIBUTE_UNUSED)
6177 if (GET_CODE (x) == PLUS
6178 && GET_MODE_SIZE (mode) < 4
6179 && REG_P (XEXP (x, 0))
6180 && XEXP (x, 0) == stack_pointer_rtx
6181 && GET_CODE (XEXP (x, 1)) == CONST_INT
6182 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6187 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6188 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6192 /* If both registers are hi-regs, then it's better to reload the
6193 entire expression rather than each register individually. That
6194 only requires one reload register rather than two. */
6195 if (GET_CODE (x) == PLUS
6196 && REG_P (XEXP (x, 0))
6197 && REG_P (XEXP (x, 1))
6198 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6199 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6204 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6205 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6212 /* Test for various thread-local symbols. */
6214 /* Return TRUE if X is a thread-local symbol. */
6217 arm_tls_symbol_p (rtx x)
6219 if (! TARGET_HAVE_TLS)
6222 if (GET_CODE (x) != SYMBOL_REF)
6225 return SYMBOL_REF_TLS_MODEL (x) != 0;
6228 /* Helper for arm_tls_referenced_p. */
6231 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6233 if (GET_CODE (*x) == SYMBOL_REF)
6234 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6236 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6237 TLS offsets, not real symbol references. */
6238 if (GET_CODE (*x) == UNSPEC
6239 && XINT (*x, 1) == UNSPEC_TLS)
6245 /* Return TRUE if X contains any TLS symbol references. */
6248 arm_tls_referenced_p (rtx x)
6250 if (! TARGET_HAVE_TLS)
6253 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6256 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6259 arm_cannot_force_const_mem (rtx x)
6263 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6265 split_const (x, &base, &offset);
6266 if (GET_CODE (base) == SYMBOL_REF
6267 && !offset_within_block_p (base, INTVAL (offset)))
6270 return arm_tls_referenced_p (x);
6273 #define REG_OR_SUBREG_REG(X) \
6274 (GET_CODE (X) == REG \
6275 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6277 #define REG_OR_SUBREG_RTX(X) \
6278 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6280 #ifndef COSTS_N_INSNS
6281 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6284 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6286 enum machine_mode mode = GET_MODE (x);
6300 return COSTS_N_INSNS (1);
6303 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6306 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6313 return COSTS_N_INSNS (2) + cycles;
6315 return COSTS_N_INSNS (1) + 16;
6318 return (COSTS_N_INSNS (1)
6319 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6320 + GET_CODE (SET_DEST (x)) == MEM));
6325 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6327 if (thumb_shiftable_const (INTVAL (x)))
6328 return COSTS_N_INSNS (2);
6329 return COSTS_N_INSNS (3);
6331 else if ((outer == PLUS || outer == COMPARE)
6332 && INTVAL (x) < 256 && INTVAL (x) > -256)
6334 else if ((outer == IOR || outer == XOR || outer == AND)
6335 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6336 return COSTS_N_INSNS (1);
6337 else if (outer == AND)
6340 /* This duplicates the tests in the andsi3 expander. */
6341 for (i = 9; i <= 31; i++)
6342 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6343 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6344 return COSTS_N_INSNS (2);
6346 else if (outer == ASHIFT || outer == ASHIFTRT
6347 || outer == LSHIFTRT)
6349 return COSTS_N_INSNS (2);
6355 return COSTS_N_INSNS (3);
6373 /* XXX another guess. */
6374 /* Memory costs quite a lot for the first word, but subsequent words
6375 load at the equivalent of a single insn each. */
6376 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6377 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6382 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6388 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6389 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6395 return total + COSTS_N_INSNS (1);
6397 /* Assume a two-shift sequence. Increase the cost slightly so
6398 we prefer actual shifts over an extend operation. */
6399 return total + 1 + COSTS_N_INSNS (2);
6407 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6409 enum machine_mode mode = GET_MODE (x);
6410 enum rtx_code subcode;
6412 enum rtx_code code = GET_CODE (x);
6418 /* Memory costs quite a lot for the first word, but subsequent words
6419 load at the equivalent of a single insn each. */
6420 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6427 if (TARGET_HARD_FLOAT && mode == SFmode)
6428 *total = COSTS_N_INSNS (2);
6429 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6430 *total = COSTS_N_INSNS (4);
6432 *total = COSTS_N_INSNS (20);
6436 if (GET_CODE (XEXP (x, 1)) == REG)
6437 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6438 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6439 *total = rtx_cost (XEXP (x, 1), code, speed);
6445 *total += COSTS_N_INSNS (4);
6450 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6451 *total += rtx_cost (XEXP (x, 0), code, speed);
6454 *total += COSTS_N_INSNS (3);
6458 *total += COSTS_N_INSNS (1);
6459 /* Increase the cost of complex shifts because they aren't any faster,
6460 and reduce dual issue opportunities. */
6461 if (arm_tune_cortex_a9
6462 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6470 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6471 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6472 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6474 *total += rtx_cost (XEXP (x, 1), code, speed);
6478 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6479 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6481 *total += rtx_cost (XEXP (x, 0), code, speed);
6488 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6490 if (TARGET_HARD_FLOAT
6492 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6494 *total = COSTS_N_INSNS (1);
6495 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6496 && arm_const_double_rtx (XEXP (x, 0)))
6498 *total += rtx_cost (XEXP (x, 1), code, speed);
6502 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6503 && arm_const_double_rtx (XEXP (x, 1)))
6505 *total += rtx_cost (XEXP (x, 0), code, speed);
6511 *total = COSTS_N_INSNS (20);
6515 *total = COSTS_N_INSNS (1);
6516 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6517 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6519 *total += rtx_cost (XEXP (x, 1), code, speed);
6523 subcode = GET_CODE (XEXP (x, 1));
6524 if (subcode == ASHIFT || subcode == ASHIFTRT
6525 || subcode == LSHIFTRT
6526 || subcode == ROTATE || subcode == ROTATERT)
6528 *total += rtx_cost (XEXP (x, 0), code, speed);
6529 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6533 /* A shift as a part of RSB costs no more than RSB itself. */
6534 if (GET_CODE (XEXP (x, 0)) == MULT
6535 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6537 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6538 *total += rtx_cost (XEXP (x, 1), code, speed);
6543 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6545 *total += rtx_cost (XEXP (x, 0), code, speed);
6546 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6550 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6551 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6553 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6554 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6555 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6556 *total += COSTS_N_INSNS (1);
6564 if (code == PLUS && arm_arch6 && mode == SImode
6565 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6566 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6568 *total = COSTS_N_INSNS (1);
6569 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6571 *total += rtx_cost (XEXP (x, 1), code, speed);
6575 /* MLA: All arguments must be registers. We filter out
6576 multiplication by a power of two, so that we fall down into
6578 if (GET_CODE (XEXP (x, 0)) == MULT
6579 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6581 /* The cost comes from the cost of the multiply. */
6585 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6587 if (TARGET_HARD_FLOAT
6589 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6591 *total = COSTS_N_INSNS (1);
6592 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6593 && arm_const_double_rtx (XEXP (x, 1)))
6595 *total += rtx_cost (XEXP (x, 0), code, speed);
6602 *total = COSTS_N_INSNS (20);
6606 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6607 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6609 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6610 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6611 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6612 *total += COSTS_N_INSNS (1);
6618 case AND: case XOR: case IOR:
6620 /* Normally the frame registers will be spilt into reg+const during
6621 reload, so it is a bad idea to combine them with other instructions,
6622 since then they might not be moved outside of loops. As a compromise
6623 we allow integration with ops that have a constant as their second
6625 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6626 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6627 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6628 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6629 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6634 *total += COSTS_N_INSNS (2);
6635 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6636 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6638 *total += rtx_cost (XEXP (x, 0), code, speed);
6645 *total += COSTS_N_INSNS (1);
6646 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6647 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6649 *total += rtx_cost (XEXP (x, 0), code, speed);
6652 subcode = GET_CODE (XEXP (x, 0));
6653 if (subcode == ASHIFT || subcode == ASHIFTRT
6654 || subcode == LSHIFTRT
6655 || subcode == ROTATE || subcode == ROTATERT)
6657 *total += rtx_cost (XEXP (x, 1), code, speed);
6658 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6663 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6665 *total += rtx_cost (XEXP (x, 1), code, speed);
6666 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6670 if (subcode == UMIN || subcode == UMAX
6671 || subcode == SMIN || subcode == SMAX)
6673 *total = COSTS_N_INSNS (3);
6680 /* This should have been handled by the CPU specific routines. */
6684 if (arm_arch3m && mode == SImode
6685 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6686 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6687 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6688 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6689 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6690 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6692 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6695 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6699 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6701 if (TARGET_HARD_FLOAT
6703 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6705 *total = COSTS_N_INSNS (1);
6708 *total = COSTS_N_INSNS (2);
6714 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6715 if (mode == SImode && code == NOT)
6717 subcode = GET_CODE (XEXP (x, 0));
6718 if (subcode == ASHIFT || subcode == ASHIFTRT
6719 || subcode == LSHIFTRT
6720 || subcode == ROTATE || subcode == ROTATERT
6722 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6724 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6725 /* Register shifts cost an extra cycle. */
6726 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6727 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6736 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6738 *total = COSTS_N_INSNS (4);
6742 operand = XEXP (x, 0);
6744 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6745 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6746 && GET_CODE (XEXP (operand, 0)) == REG
6747 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6748 *total += COSTS_N_INSNS (1);
6749 *total += (rtx_cost (XEXP (x, 1), code, speed)
6750 + rtx_cost (XEXP (x, 2), code, speed));
6754 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6756 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6762 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6763 && mode == SImode && XEXP (x, 1) == const0_rtx)
6765 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6771 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6772 && mode == SImode && XEXP (x, 1) == const0_rtx)
6774 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6794 /* SCC insns. In the case where the comparison has already been
6795 performed, then they cost 2 instructions. Otherwise they need
6796 an additional comparison before them. */
6797 *total = COSTS_N_INSNS (2);
6798 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6805 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6811 *total += COSTS_N_INSNS (1);
6812 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6813 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6815 *total += rtx_cost (XEXP (x, 0), code, speed);
6819 subcode = GET_CODE (XEXP (x, 0));
6820 if (subcode == ASHIFT || subcode == ASHIFTRT
6821 || subcode == LSHIFTRT
6822 || subcode == ROTATE || subcode == ROTATERT)
6824 *total += rtx_cost (XEXP (x, 1), code, speed);
6825 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6830 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6832 *total += rtx_cost (XEXP (x, 1), code, speed);
6833 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6843 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6844 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6845 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6846 *total += rtx_cost (XEXP (x, 1), code, speed);
6850 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6852 if (TARGET_HARD_FLOAT
6854 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6856 *total = COSTS_N_INSNS (1);
6859 *total = COSTS_N_INSNS (20);
6862 *total = COSTS_N_INSNS (1);
6864 *total += COSTS_N_INSNS (3);
6870 if (GET_MODE_CLASS (mode) == MODE_INT)
6872 rtx op = XEXP (x, 0);
6873 enum machine_mode opmode = GET_MODE (op);
6876 *total += COSTS_N_INSNS (1);
6878 if (opmode != SImode)
6882 /* If !arm_arch4, we use one of the extendhisi2_mem
6883 or movhi_bytes patterns for HImode. For a QImode
6884 sign extension, we first zero-extend from memory
6885 and then perform a shift sequence. */
6886 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6887 *total += COSTS_N_INSNS (2);
6890 *total += COSTS_N_INSNS (1);
6892 /* We don't have the necessary insn, so we need to perform some
6894 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6895 /* An and with constant 255. */
6896 *total += COSTS_N_INSNS (1);
6898 /* A shift sequence. Increase costs slightly to avoid
6899 combining two shifts into an extend operation. */
6900 *total += COSTS_N_INSNS (2) + 1;
6906 switch (GET_MODE (XEXP (x, 0)))
6913 *total = COSTS_N_INSNS (1);
6923 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6927 if (const_ok_for_arm (INTVAL (x))
6928 || const_ok_for_arm (~INTVAL (x)))
6929 *total = COSTS_N_INSNS (1);
6931 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6932 INTVAL (x), NULL_RTX,
6939 *total = COSTS_N_INSNS (3);
6943 *total = COSTS_N_INSNS (1);
6947 *total = COSTS_N_INSNS (1);
6948 *total += rtx_cost (XEXP (x, 0), code, speed);
6952 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6953 && (mode == SFmode || !TARGET_VFP_SINGLE))
6954 *total = COSTS_N_INSNS (1);
6956 *total = COSTS_N_INSNS (4);
6960 *total = COSTS_N_INSNS (4);
6965 /* Estimates the size cost of thumb1 instructions.
6966 For now most of the code is copied from thumb1_rtx_costs. We need more
6967 fine grain tuning when we have more related test cases. */
6969 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6971 enum machine_mode mode = GET_MODE (x);
6984 return COSTS_N_INSNS (1);
6987 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6989 /* Thumb1 mul instruction can't operate on const. We must Load it
6990 into a register first. */
6991 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6992 return COSTS_N_INSNS (1) + const_size;
6994 return COSTS_N_INSNS (1);
6997 return (COSTS_N_INSNS (1)
6998 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6999 + GET_CODE (SET_DEST (x)) == MEM));
7004 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7006 if (thumb_shiftable_const (INTVAL (x)))
7007 return COSTS_N_INSNS (2);
7008 return COSTS_N_INSNS (3);
7010 else if ((outer == PLUS || outer == COMPARE)
7011 && INTVAL (x) < 256 && INTVAL (x) > -256)
7013 else if ((outer == IOR || outer == XOR || outer == AND)
7014 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7015 return COSTS_N_INSNS (1);
7016 else if (outer == AND)
7019 /* This duplicates the tests in the andsi3 expander. */
7020 for (i = 9; i <= 31; i++)
7021 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7022 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7023 return COSTS_N_INSNS (2);
7025 else if (outer == ASHIFT || outer == ASHIFTRT
7026 || outer == LSHIFTRT)
7028 return COSTS_N_INSNS (2);
7034 return COSTS_N_INSNS (3);
7052 /* XXX another guess. */
7053 /* Memory costs quite a lot for the first word, but subsequent words
7054 load at the equivalent of a single insn each. */
7055 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7066 /* XXX still guessing. */
7067 switch (GET_MODE (XEXP (x, 0)))
7070 return (1 + (mode == DImode ? 4 : 0)
7071 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7074 return (4 + (mode == DImode ? 4 : 0)
7075 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7078 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7089 /* RTX costs when optimizing for size. */
7091 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7094 enum machine_mode mode = GET_MODE (x);
7097 *total = thumb1_size_rtx_costs (x, code, outer_code);
7101 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7105 /* A memory access costs 1 insn if the mode is small, or the address is
7106 a single register, otherwise it costs one insn per word. */
7107 if (REG_P (XEXP (x, 0)))
7108 *total = COSTS_N_INSNS (1);
7110 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7117 /* Needs a libcall, so it costs about this. */
7118 *total = COSTS_N_INSNS (2);
7122 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7124 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7132 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7134 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7137 else if (mode == SImode)
7139 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7140 /* Slightly disparage register shifts, but not by much. */
7141 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7142 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7146 /* Needs a libcall. */
7147 *total = COSTS_N_INSNS (2);
7151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7152 && (mode == SFmode || !TARGET_VFP_SINGLE))
7154 *total = COSTS_N_INSNS (1);
7160 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7161 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7163 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7164 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7165 || subcode1 == ROTATE || subcode1 == ROTATERT
7166 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7167 || subcode1 == ASHIFTRT)
7169 /* It's just the cost of the two operands. */
7174 *total = COSTS_N_INSNS (1);
7178 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7182 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7183 && (mode == SFmode || !TARGET_VFP_SINGLE))
7185 *total = COSTS_N_INSNS (1);
7189 /* A shift as a part of ADD costs nothing. */
7190 if (GET_CODE (XEXP (x, 0)) == MULT
7191 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7193 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7194 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7195 *total += rtx_cost (XEXP (x, 1), code, false);
7200 case AND: case XOR: case IOR:
7203 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7205 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7206 || subcode == LSHIFTRT || subcode == ASHIFTRT
7207 || (code == AND && subcode == NOT))
7209 /* It's just the cost of the two operands. */
7215 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7219 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7223 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7224 && (mode == SFmode || !TARGET_VFP_SINGLE))
7226 *total = COSTS_N_INSNS (1);
7232 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7241 if (cc_register (XEXP (x, 0), VOIDmode))
7244 *total = COSTS_N_INSNS (1);
7248 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7249 && (mode == SFmode || !TARGET_VFP_SINGLE))
7250 *total = COSTS_N_INSNS (1);
7252 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7257 return arm_rtx_costs_1 (x, outer_code, total, 0);
7260 if (const_ok_for_arm (INTVAL (x)))
7261 /* A multiplication by a constant requires another instruction
7262 to load the constant to a register. */
7263 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7265 else if (const_ok_for_arm (~INTVAL (x)))
7266 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7267 else if (const_ok_for_arm (-INTVAL (x)))
7269 if (outer_code == COMPARE || outer_code == PLUS
7270 || outer_code == MINUS)
7273 *total = COSTS_N_INSNS (1);
7276 *total = COSTS_N_INSNS (2);
7282 *total = COSTS_N_INSNS (2);
7286 *total = COSTS_N_INSNS (4);
7291 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7292 cost of these slightly. */
7293 *total = COSTS_N_INSNS (1) + 1;
7297 if (mode != VOIDmode)
7298 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7300 *total = COSTS_N_INSNS (4); /* How knows? */
7305 /* RTX costs when optimizing for size. */
7307 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7311 return arm_size_rtx_costs (x, (enum rtx_code) code,
7312 (enum rtx_code) outer_code, total);
7314 return current_tune->rtx_costs (x, (enum rtx_code) code,
7315 (enum rtx_code) outer_code,
7319 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7320 supported on any "slowmul" cores, so it can be ignored. */
7323 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7324 int *total, bool speed)
7326 enum machine_mode mode = GET_MODE (x);
7330 *total = thumb1_rtx_costs (x, code, outer_code);
7337 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7340 *total = COSTS_N_INSNS (20);
7344 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7346 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7347 & (unsigned HOST_WIDE_INT) 0xffffffff);
7348 int cost, const_ok = const_ok_for_arm (i);
7349 int j, booth_unit_size;
7351 /* Tune as appropriate. */
7352 cost = const_ok ? 4 : 8;
7353 booth_unit_size = 2;
7354 for (j = 0; i && j < 32; j += booth_unit_size)
7356 i >>= booth_unit_size;
7360 *total = COSTS_N_INSNS (cost);
7361 *total += rtx_cost (XEXP (x, 0), code, speed);
7365 *total = COSTS_N_INSNS (20);
7369 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7374 /* RTX cost for cores with a fast multiply unit (M variants). */
7377 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7378 int *total, bool speed)
7380 enum machine_mode mode = GET_MODE (x);
7384 *total = thumb1_rtx_costs (x, code, outer_code);
7388 /* ??? should thumb2 use different costs? */
7392 /* There is no point basing this on the tuning, since it is always the
7393 fast variant if it exists at all. */
7395 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7396 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7397 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7399 *total = COSTS_N_INSNS(2);
7406 *total = COSTS_N_INSNS (5);
7410 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7412 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7413 & (unsigned HOST_WIDE_INT) 0xffffffff);
7414 int cost, const_ok = const_ok_for_arm (i);
7415 int j, booth_unit_size;
7417 /* Tune as appropriate. */
7418 cost = const_ok ? 4 : 8;
7419 booth_unit_size = 8;
7420 for (j = 0; i && j < 32; j += booth_unit_size)
7422 i >>= booth_unit_size;
7426 *total = COSTS_N_INSNS(cost);
7432 *total = COSTS_N_INSNS (4);
7436 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7438 if (TARGET_HARD_FLOAT
7440 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7442 *total = COSTS_N_INSNS (1);
7447 /* Requires a lib call */
7448 *total = COSTS_N_INSNS (20);
7452 return arm_rtx_costs_1 (x, outer_code, total, speed);
7457 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7458 so it can be ignored. */
7461 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7462 int *total, bool speed)
7464 enum machine_mode mode = GET_MODE (x);
7468 *total = thumb1_rtx_costs (x, code, outer_code);
7475 if (GET_CODE (XEXP (x, 0)) != MULT)
7476 return arm_rtx_costs_1 (x, outer_code, total, speed);
7478 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7479 will stall until the multiplication is complete. */
7480 *total = COSTS_N_INSNS (3);
7484 /* There is no point basing this on the tuning, since it is always the
7485 fast variant if it exists at all. */
7487 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7488 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7489 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7491 *total = COSTS_N_INSNS (2);
7498 *total = COSTS_N_INSNS (5);
7502 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7504 /* If operand 1 is a constant we can more accurately
7505 calculate the cost of the multiply. The multiplier can
7506 retire 15 bits on the first cycle and a further 12 on the
7507 second. We do, of course, have to load the constant into
7508 a register first. */
7509 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7510 /* There's a general overhead of one cycle. */
7512 unsigned HOST_WIDE_INT masked_const;
7517 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7519 masked_const = i & 0xffff8000;
7520 if (masked_const != 0)
7523 masked_const = i & 0xf8000000;
7524 if (masked_const != 0)
7527 *total = COSTS_N_INSNS (cost);
7533 *total = COSTS_N_INSNS (3);
7537 /* Requires a lib call */
7538 *total = COSTS_N_INSNS (20);
7542 return arm_rtx_costs_1 (x, outer_code, total, speed);
7547 /* RTX costs for 9e (and later) cores. */
7550 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7551 int *total, bool speed)
7553 enum machine_mode mode = GET_MODE (x);
7560 *total = COSTS_N_INSNS (3);
7564 *total = thumb1_rtx_costs (x, code, outer_code);
7572 /* There is no point basing this on the tuning, since it is always the
7573 fast variant if it exists at all. */
7575 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7576 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7577 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7579 *total = COSTS_N_INSNS (2);
7586 *total = COSTS_N_INSNS (5);
7592 *total = COSTS_N_INSNS (2);
7596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7598 if (TARGET_HARD_FLOAT
7600 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7602 *total = COSTS_N_INSNS (1);
7607 *total = COSTS_N_INSNS (20);
7611 return arm_rtx_costs_1 (x, outer_code, total, speed);
7614 /* All address computations that can be done are free, but rtx cost returns
7615 the same for practically all of them. So we weight the different types
7616 of address here in the order (most pref first):
7617 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7619 arm_arm_address_cost (rtx x)
7621 enum rtx_code c = GET_CODE (x);
7623 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7625 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7630 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7633 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7643 arm_thumb_address_cost (rtx x)
7645 enum rtx_code c = GET_CODE (x);
7650 && GET_CODE (XEXP (x, 0)) == REG
7651 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7658 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7660 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7664 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7668 /* Some true dependencies can have a higher cost depending
7669 on precisely how certain input operands are used. */
7671 && REG_NOTE_KIND (link) == 0
7672 && recog_memoized (insn) >= 0
7673 && recog_memoized (dep) >= 0)
7675 int shift_opnum = get_attr_shift (insn);
7676 enum attr_type attr_type = get_attr_type (dep);
7678 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7679 operand for INSN. If we have a shifted input operand and the
7680 instruction we depend on is another ALU instruction, then we may
7681 have to account for an additional stall. */
7682 if (shift_opnum != 0
7683 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7685 rtx shifted_operand;
7688 /* Get the shifted operand. */
7689 extract_insn (insn);
7690 shifted_operand = recog_data.operand[shift_opnum];
7692 /* Iterate over all the operands in DEP. If we write an operand
7693 that overlaps with SHIFTED_OPERAND, then we have increase the
7694 cost of this dependency. */
7696 preprocess_constraints ();
7697 for (opno = 0; opno < recog_data.n_operands; opno++)
7699 /* We can ignore strict inputs. */
7700 if (recog_data.operand_type[opno] == OP_IN)
7703 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7710 /* XXX This is not strictly true for the FPA. */
7711 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7712 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7715 /* Call insns don't incur a stall, even if they follow a load. */
7716 if (REG_NOTE_KIND (link) == 0
7717 && GET_CODE (insn) == CALL_INSN)
7720 if ((i_pat = single_set (insn)) != NULL
7721 && GET_CODE (SET_SRC (i_pat)) == MEM
7722 && (d_pat = single_set (dep)) != NULL
7723 && GET_CODE (SET_DEST (d_pat)) == MEM)
7725 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7726 /* This is a load after a store, there is no conflict if the load reads
7727 from a cached area. Assume that loads from the stack, and from the
7728 constant pool are cached, and that others will miss. This is a
7731 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7732 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7733 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7734 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7741 static int fp_consts_inited = 0;
7743 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7744 static const char * const strings_fp[8] =
7747 "4", "5", "0.5", "10"
7750 static REAL_VALUE_TYPE values_fp[8];
7753 init_fp_table (void)
7759 fp_consts_inited = 1;
7761 fp_consts_inited = 8;
7763 for (i = 0; i < fp_consts_inited; i++)
7765 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7770 /* Return TRUE if rtx X is a valid immediate FP constant. */
7772 arm_const_double_rtx (rtx x)
7777 if (!fp_consts_inited)
7780 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7781 if (REAL_VALUE_MINUS_ZERO (r))
7784 for (i = 0; i < fp_consts_inited; i++)
7785 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7791 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7793 neg_const_double_rtx_ok_for_fpa (rtx x)
7798 if (!fp_consts_inited)
7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7802 r = real_value_negate (&r);
7803 if (REAL_VALUE_MINUS_ZERO (r))
7806 for (i = 0; i < 8; i++)
7807 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7814 /* VFPv3 has a fairly wide range of representable immediates, formed from
7815 "quarter-precision" floating-point values. These can be evaluated using this
7816 formula (with ^ for exponentiation):
7820 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7821 16 <= n <= 31 and 0 <= r <= 7.
7823 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7825 - A (most-significant) is the sign bit.
7826 - BCD are the exponent (encoded as r XOR 3).
7827 - EFGH are the mantissa (encoded as n - 16).
7830 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7831 fconst[sd] instruction, or -1 if X isn't suitable. */
7833 vfp3_const_double_index (rtx x)
7835 REAL_VALUE_TYPE r, m;
7837 unsigned HOST_WIDE_INT mantissa, mant_hi;
7838 unsigned HOST_WIDE_INT mask;
7839 HOST_WIDE_INT m1, m2;
7840 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7842 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7845 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7847 /* We can't represent these things, so detect them first. */
7848 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7851 /* Extract sign, exponent and mantissa. */
7852 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7853 r = real_value_abs (&r);
7854 exponent = REAL_EXP (&r);
7855 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7856 highest (sign) bit, with a fixed binary point at bit point_pos.
7857 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7858 bits for the mantissa, this may fail (low bits would be lost). */
7859 real_ldexp (&m, &r, point_pos - exponent);
7860 REAL_VALUE_TO_INT (&m1, &m2, m);
7864 /* If there are bits set in the low part of the mantissa, we can't
7865 represent this value. */
7869 /* Now make it so that mantissa contains the most-significant bits, and move
7870 the point_pos to indicate that the least-significant bits have been
7872 point_pos -= HOST_BITS_PER_WIDE_INT;
7875 /* We can permit four significant bits of mantissa only, plus a high bit
7876 which is always 1. */
7877 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7878 if ((mantissa & mask) != 0)
7881 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7882 mantissa >>= point_pos - 5;
7884 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7885 floating-point immediate zero with Neon using an integer-zero load, but
7886 that case is handled elsewhere.) */
7890 gcc_assert (mantissa >= 16 && mantissa <= 31);
7892 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7893 normalized significands are in the range [1, 2). (Our mantissa is shifted
7894 left 4 places at this point relative to normalized IEEE754 values). GCC
7895 internally uses [0.5, 1) (see real.c), so the exponent returned from
7896 REAL_EXP must be altered. */
7897 exponent = 5 - exponent;
7899 if (exponent < 0 || exponent > 7)
7902 /* Sign, mantissa and exponent are now in the correct form to plug into the
7903 formula described in the comment above. */
7904 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7907 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7909 vfp3_const_double_rtx (rtx x)
7914 return vfp3_const_double_index (x) != -1;
7917 /* Recognize immediates which can be used in various Neon instructions. Legal
7918 immediates are described by the following table (for VMVN variants, the
7919 bitwise inverse of the constant shown is recognized. In either case, VMOV
7920 is output and the correct instruction to use for a given constant is chosen
7921 by the assembler). The constant shown is replicated across all elements of
7922 the destination vector.
7924 insn elems variant constant (binary)
7925 ---- ----- ------- -----------------
7926 vmov i32 0 00000000 00000000 00000000 abcdefgh
7927 vmov i32 1 00000000 00000000 abcdefgh 00000000
7928 vmov i32 2 00000000 abcdefgh 00000000 00000000
7929 vmov i32 3 abcdefgh 00000000 00000000 00000000
7930 vmov i16 4 00000000 abcdefgh
7931 vmov i16 5 abcdefgh 00000000
7932 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7933 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7934 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7935 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7936 vmvn i16 10 00000000 abcdefgh
7937 vmvn i16 11 abcdefgh 00000000
7938 vmov i32 12 00000000 00000000 abcdefgh 11111111
7939 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7940 vmov i32 14 00000000 abcdefgh 11111111 11111111
7941 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7943 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7944 eeeeeeee ffffffff gggggggg hhhhhhhh
7945 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7947 For case 18, B = !b. Representable values are exactly those accepted by
7948 vfp3_const_double_index, but are output as floating-point numbers rather
7951 Variants 0-5 (inclusive) may also be used as immediates for the second
7952 operand of VORR/VBIC instructions.
7954 The INVERSE argument causes the bitwise inverse of the given operand to be
7955 recognized instead (used for recognizing legal immediates for the VAND/VORN
7956 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7957 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7958 output, rather than the real insns vbic/vorr).
7960 INVERSE makes no difference to the recognition of float vectors.
7962 The return value is the variant of immediate as shown in the above table, or
7963 -1 if the given value doesn't match any of the listed patterns.
7966 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7967 rtx *modconst, int *elementwidth)
7969 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7971 for (i = 0; i < idx; i += (STRIDE)) \
7976 immtype = (CLASS); \
7977 elsize = (ELSIZE); \
7981 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7982 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7983 unsigned char bytes[16];
7984 int immtype = -1, matches;
7985 unsigned int invmask = inverse ? 0xff : 0;
7987 /* Vectors of float constants. */
7988 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7990 rtx el0 = CONST_VECTOR_ELT (op, 0);
7993 if (!vfp3_const_double_rtx (el0))
7996 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7998 for (i = 1; i < n_elts; i++)
8000 rtx elt = CONST_VECTOR_ELT (op, i);
8003 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8005 if (!REAL_VALUES_EQUAL (r0, re))
8010 *modconst = CONST_VECTOR_ELT (op, 0);
8018 /* Splat vector constant out into a byte vector. */
8019 for (i = 0; i < n_elts; i++)
8021 rtx el = CONST_VECTOR_ELT (op, i);
8022 unsigned HOST_WIDE_INT elpart;
8023 unsigned int part, parts;
8025 if (GET_CODE (el) == CONST_INT)
8027 elpart = INTVAL (el);
8030 else if (GET_CODE (el) == CONST_DOUBLE)
8032 elpart = CONST_DOUBLE_LOW (el);
8038 for (part = 0; part < parts; part++)
8041 for (byte = 0; byte < innersize; byte++)
8043 bytes[idx++] = (elpart & 0xff) ^ invmask;
8044 elpart >>= BITS_PER_UNIT;
8046 if (GET_CODE (el) == CONST_DOUBLE)
8047 elpart = CONST_DOUBLE_HIGH (el);
8052 gcc_assert (idx == GET_MODE_SIZE (mode));
8056 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8057 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8059 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8062 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8063 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8065 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8066 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8068 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8070 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8072 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8073 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8075 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8076 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8078 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8079 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8081 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8082 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8084 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8086 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8088 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8089 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8091 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8092 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8094 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8095 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8097 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8098 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8100 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8102 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8103 && bytes[i] == bytes[(i + 8) % idx]);
8111 *elementwidth = elsize;
8115 unsigned HOST_WIDE_INT imm = 0;
8117 /* Un-invert bytes of recognized vector, if necessary. */
8119 for (i = 0; i < idx; i++)
8120 bytes[i] ^= invmask;
8124 /* FIXME: Broken on 32-bit H_W_I hosts. */
8125 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8127 for (i = 0; i < 8; i++)
8128 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8129 << (i * BITS_PER_UNIT);
8131 *modconst = GEN_INT (imm);
8135 unsigned HOST_WIDE_INT imm = 0;
8137 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8138 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8140 *modconst = GEN_INT (imm);
8148 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8149 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8150 float elements), and a modified constant (whatever should be output for a
8151 VMOV) in *MODCONST. */
8154 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8155 rtx *modconst, int *elementwidth)
8159 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8165 *modconst = tmpconst;
8168 *elementwidth = tmpwidth;
8173 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8174 the immediate is valid, write a constant suitable for using as an operand
8175 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8176 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8179 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8180 rtx *modconst, int *elementwidth)
8184 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8186 if (retval < 0 || retval > 5)
8190 *modconst = tmpconst;
8193 *elementwidth = tmpwidth;
8198 /* Return a string suitable for output of Neon immediate logic operation
8202 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8203 int inverse, int quad)
8205 int width, is_valid;
8206 static char templ[40];
8208 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8210 gcc_assert (is_valid != 0);
8213 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8215 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8220 /* Output a sequence of pairwise operations to implement a reduction.
8221 NOTE: We do "too much work" here, because pairwise operations work on two
8222 registers-worth of operands in one go. Unfortunately we can't exploit those
8223 extra calculations to do the full operation in fewer steps, I don't think.
8224 Although all vector elements of the result but the first are ignored, we
8225 actually calculate the same result in each of the elements. An alternative
8226 such as initially loading a vector with zero to use as each of the second
8227 operands would use up an additional register and take an extra instruction,
8228 for no particular gain. */
8231 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8232 rtx (*reduc) (rtx, rtx, rtx))
8234 enum machine_mode inner = GET_MODE_INNER (mode);
8235 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8238 for (i = parts / 2; i >= 1; i /= 2)
8240 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8241 emit_insn (reduc (dest, tmpsum, tmpsum));
8246 /* If VALS is a vector constant that can be loaded into a register
8247 using VDUP, generate instructions to do so and return an RTX to
8248 assign to the register. Otherwise return NULL_RTX. */
8251 neon_vdup_constant (rtx vals)
8253 enum machine_mode mode = GET_MODE (vals);
8254 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8255 int n_elts = GET_MODE_NUNITS (mode);
8256 bool all_same = true;
8260 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8263 for (i = 0; i < n_elts; ++i)
8265 x = XVECEXP (vals, 0, i);
8266 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8271 /* The elements are not all the same. We could handle repeating
8272 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8273 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8277 /* We can load this constant by using VDUP and a constant in a
8278 single ARM register. This will be cheaper than a vector
8281 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8282 return gen_rtx_VEC_DUPLICATE (mode, x);
8285 /* Generate code to load VALS, which is a PARALLEL containing only
8286 constants (for vec_init) or CONST_VECTOR, efficiently into a
8287 register. Returns an RTX to copy into the register, or NULL_RTX
8288 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8291 neon_make_constant (rtx vals)
8293 enum machine_mode mode = GET_MODE (vals);
8295 rtx const_vec = NULL_RTX;
8296 int n_elts = GET_MODE_NUNITS (mode);
8300 if (GET_CODE (vals) == CONST_VECTOR)
8302 else if (GET_CODE (vals) == PARALLEL)
8304 /* A CONST_VECTOR must contain only CONST_INTs and
8305 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8306 Only store valid constants in a CONST_VECTOR. */
8307 for (i = 0; i < n_elts; ++i)
8309 rtx x = XVECEXP (vals, 0, i);
8310 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8313 if (n_const == n_elts)
8314 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8319 if (const_vec != NULL
8320 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8321 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8323 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8324 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8325 pipeline cycle; creating the constant takes one or two ARM
8328 else if (const_vec != NULL_RTX)
8329 /* Load from constant pool. On Cortex-A8 this takes two cycles
8330 (for either double or quad vectors). We can not take advantage
8331 of single-cycle VLD1 because we need a PC-relative addressing
8335 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8336 We can not construct an initializer. */
8340 /* Initialize vector TARGET to VALS. */
8343 neon_expand_vector_init (rtx target, rtx vals)
8345 enum machine_mode mode = GET_MODE (target);
8346 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8347 int n_elts = GET_MODE_NUNITS (mode);
8348 int n_var = 0, one_var = -1;
8349 bool all_same = true;
8353 for (i = 0; i < n_elts; ++i)
8355 x = XVECEXP (vals, 0, i);
8356 if (!CONSTANT_P (x))
8357 ++n_var, one_var = i;
8359 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8365 rtx constant = neon_make_constant (vals);
8366 if (constant != NULL_RTX)
8368 emit_move_insn (target, constant);
8373 /* Splat a single non-constant element if we can. */
8374 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8376 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8377 emit_insn (gen_rtx_SET (VOIDmode, target,
8378 gen_rtx_VEC_DUPLICATE (mode, x)));
8382 /* One field is non-constant. Load constant then overwrite varying
8383 field. This is more efficient than using the stack. */
8386 rtx copy = copy_rtx (vals);
8387 rtx index = GEN_INT (one_var);
8389 /* Load constant part of vector, substitute neighboring value for
8391 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8392 neon_expand_vector_init (target, copy);
8394 /* Insert variable. */
8395 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8399 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8402 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8405 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8408 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8411 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8414 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8417 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8420 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8423 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8431 /* Construct the vector in memory one field at a time
8432 and load the whole vector. */
8433 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8434 for (i = 0; i < n_elts; i++)
8435 emit_move_insn (adjust_address_nv (mem, inner_mode,
8436 i * GET_MODE_SIZE (inner_mode)),
8437 XVECEXP (vals, 0, i));
8438 emit_move_insn (target, mem);
8441 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8442 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8443 reported source locations are bogus. */
8446 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8451 gcc_assert (GET_CODE (operand) == CONST_INT);
8453 lane = INTVAL (operand);
8455 if (lane < low || lane >= high)
8459 /* Bounds-check lanes. */
8462 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8464 bounds_check (operand, low, high, "lane out of range");
8467 /* Bounds-check constants. */
8470 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8472 bounds_check (operand, low, high, "constant out of range");
8476 neon_element_bits (enum machine_mode mode)
8479 return GET_MODE_BITSIZE (mode);
8481 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8485 /* Predicates for `match_operand' and `match_operator'. */
8487 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8489 cirrus_memory_offset (rtx op)
8491 /* Reject eliminable registers. */
8492 if (! (reload_in_progress || reload_completed)
8493 && ( reg_mentioned_p (frame_pointer_rtx, op)
8494 || reg_mentioned_p (arg_pointer_rtx, op)
8495 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8496 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8497 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8498 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8501 if (GET_CODE (op) == MEM)
8507 /* Match: (mem (reg)). */
8508 if (GET_CODE (ind) == REG)
8514 if (GET_CODE (ind) == PLUS
8515 && GET_CODE (XEXP (ind, 0)) == REG
8516 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8517 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8524 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8525 WB is true if full writeback address modes are allowed and is false
8526 if limited writeback address modes (POST_INC and PRE_DEC) are
8530 arm_coproc_mem_operand (rtx op, bool wb)
8534 /* Reject eliminable registers. */
8535 if (! (reload_in_progress || reload_completed)
8536 && ( reg_mentioned_p (frame_pointer_rtx, op)
8537 || reg_mentioned_p (arg_pointer_rtx, op)
8538 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8539 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8540 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8541 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8544 /* Constants are converted into offsets from labels. */
8545 if (GET_CODE (op) != MEM)
8550 if (reload_completed
8551 && (GET_CODE (ind) == LABEL_REF
8552 || (GET_CODE (ind) == CONST
8553 && GET_CODE (XEXP (ind, 0)) == PLUS
8554 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8555 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8558 /* Match: (mem (reg)). */
8559 if (GET_CODE (ind) == REG)
8560 return arm_address_register_rtx_p (ind, 0);
8562 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8563 acceptable in any case (subject to verification by
8564 arm_address_register_rtx_p). We need WB to be true to accept
8565 PRE_INC and POST_DEC. */
8566 if (GET_CODE (ind) == POST_INC
8567 || GET_CODE (ind) == PRE_DEC
8569 && (GET_CODE (ind) == PRE_INC
8570 || GET_CODE (ind) == POST_DEC)))
8571 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8574 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8575 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8576 && GET_CODE (XEXP (ind, 1)) == PLUS
8577 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8578 ind = XEXP (ind, 1);
8583 if (GET_CODE (ind) == PLUS
8584 && GET_CODE (XEXP (ind, 0)) == REG
8585 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8586 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8587 && INTVAL (XEXP (ind, 1)) > -1024
8588 && INTVAL (XEXP (ind, 1)) < 1024
8589 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8595 /* Return TRUE if OP is a memory operand which we can load or store a vector
8596 to/from. TYPE is one of the following values:
8597 0 - Vector load/stor (vldr)
8598 1 - Core registers (ldm)
8599 2 - Element/structure loads (vld1)
8602 neon_vector_mem_operand (rtx op, int type)
8606 /* Reject eliminable registers. */
8607 if (! (reload_in_progress || reload_completed)
8608 && ( reg_mentioned_p (frame_pointer_rtx, op)
8609 || reg_mentioned_p (arg_pointer_rtx, op)
8610 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8611 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8612 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8613 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8616 /* Constants are converted into offsets from labels. */
8617 if (GET_CODE (op) != MEM)
8622 if (reload_completed
8623 && (GET_CODE (ind) == LABEL_REF
8624 || (GET_CODE (ind) == CONST
8625 && GET_CODE (XEXP (ind, 0)) == PLUS
8626 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8627 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8630 /* Match: (mem (reg)). */
8631 if (GET_CODE (ind) == REG)
8632 return arm_address_register_rtx_p (ind, 0);
8634 /* Allow post-increment with Neon registers. */
8635 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8636 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8638 /* FIXME: vld1 allows register post-modify. */
8644 && GET_CODE (ind) == PLUS
8645 && GET_CODE (XEXP (ind, 0)) == REG
8646 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8647 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8648 && INTVAL (XEXP (ind, 1)) > -1024
8649 && INTVAL (XEXP (ind, 1)) < 1016
8650 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8656 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8659 neon_struct_mem_operand (rtx op)
8663 /* Reject eliminable registers. */
8664 if (! (reload_in_progress || reload_completed)
8665 && ( reg_mentioned_p (frame_pointer_rtx, op)
8666 || reg_mentioned_p (arg_pointer_rtx, op)
8667 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8668 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8669 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8670 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8673 /* Constants are converted into offsets from labels. */
8674 if (GET_CODE (op) != MEM)
8679 if (reload_completed
8680 && (GET_CODE (ind) == LABEL_REF
8681 || (GET_CODE (ind) == CONST
8682 && GET_CODE (XEXP (ind, 0)) == PLUS
8683 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8684 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8687 /* Match: (mem (reg)). */
8688 if (GET_CODE (ind) == REG)
8689 return arm_address_register_rtx_p (ind, 0);
8694 /* Return true if X is a register that will be eliminated later on. */
8696 arm_eliminable_register (rtx x)
8698 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8699 || REGNO (x) == ARG_POINTER_REGNUM
8700 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8701 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8704 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8705 coprocessor registers. Otherwise return NO_REGS. */
8708 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8712 if (!TARGET_NEON_FP16)
8713 return GENERAL_REGS;
8714 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8716 return GENERAL_REGS;
8720 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8721 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8722 && neon_vector_mem_operand (x, 0))
8725 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8728 return GENERAL_REGS;
8731 /* Values which must be returned in the most-significant end of the return
8735 arm_return_in_msb (const_tree valtype)
8737 return (TARGET_AAPCS_BASED
8739 && (AGGREGATE_TYPE_P (valtype)
8740 || TREE_CODE (valtype) == COMPLEX_TYPE));
8743 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8744 Use by the Cirrus Maverick code which has to workaround
8745 a hardware bug triggered by such instructions. */
8747 arm_memory_load_p (rtx insn)
8749 rtx body, lhs, rhs;;
8751 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8754 body = PATTERN (insn);
8756 if (GET_CODE (body) != SET)
8759 lhs = XEXP (body, 0);
8760 rhs = XEXP (body, 1);
8762 lhs = REG_OR_SUBREG_RTX (lhs);
8764 /* If the destination is not a general purpose
8765 register we do not have to worry. */
8766 if (GET_CODE (lhs) != REG
8767 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8770 /* As well as loads from memory we also have to react
8771 to loads of invalid constants which will be turned
8772 into loads from the minipool. */
8773 return (GET_CODE (rhs) == MEM
8774 || GET_CODE (rhs) == SYMBOL_REF
8775 || note_invalid_constants (insn, -1, false));
8778 /* Return TRUE if INSN is a Cirrus instruction. */
8780 arm_cirrus_insn_p (rtx insn)
8782 enum attr_cirrus attr;
8784 /* get_attr cannot accept USE or CLOBBER. */
8786 || GET_CODE (insn) != INSN
8787 || GET_CODE (PATTERN (insn)) == USE
8788 || GET_CODE (PATTERN (insn)) == CLOBBER)
8791 attr = get_attr_cirrus (insn);
8793 return attr != CIRRUS_NOT;
8796 /* Cirrus reorg for invalid instruction combinations. */
8798 cirrus_reorg (rtx first)
8800 enum attr_cirrus attr;
8801 rtx body = PATTERN (first);
8805 /* Any branch must be followed by 2 non Cirrus instructions. */
8806 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8809 t = next_nonnote_insn (first);
8811 if (arm_cirrus_insn_p (t))
8814 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8818 emit_insn_after (gen_nop (), first);
8823 /* (float (blah)) is in parallel with a clobber. */
8824 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8825 body = XVECEXP (body, 0, 0);
8827 if (GET_CODE (body) == SET)
8829 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8831 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8832 be followed by a non Cirrus insn. */
8833 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8835 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8836 emit_insn_after (gen_nop (), first);
8840 else if (arm_memory_load_p (first))
8842 unsigned int arm_regno;
8844 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8845 ldr/cfmv64hr combination where the Rd field is the same
8846 in both instructions must be split with a non Cirrus
8853 /* Get Arm register number for ldr insn. */
8854 if (GET_CODE (lhs) == REG)
8855 arm_regno = REGNO (lhs);
8858 gcc_assert (GET_CODE (rhs) == REG);
8859 arm_regno = REGNO (rhs);
8863 first = next_nonnote_insn (first);
8865 if (! arm_cirrus_insn_p (first))
8868 body = PATTERN (first);
8870 /* (float (blah)) is in parallel with a clobber. */
8871 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8872 body = XVECEXP (body, 0, 0);
8874 if (GET_CODE (body) == FLOAT)
8875 body = XEXP (body, 0);
8877 if (get_attr_cirrus (first) == CIRRUS_MOVE
8878 && GET_CODE (XEXP (body, 1)) == REG
8879 && arm_regno == REGNO (XEXP (body, 1)))
8880 emit_insn_after (gen_nop (), first);
8886 /* get_attr cannot accept USE or CLOBBER. */
8888 || GET_CODE (first) != INSN
8889 || GET_CODE (PATTERN (first)) == USE
8890 || GET_CODE (PATTERN (first)) == CLOBBER)
8893 attr = get_attr_cirrus (first);
8895 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8896 must be followed by a non-coprocessor instruction. */
8897 if (attr == CIRRUS_COMPARE)
8901 t = next_nonnote_insn (first);
8903 if (arm_cirrus_insn_p (t))
8906 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8910 emit_insn_after (gen_nop (), first);
8916 /* Return TRUE if X references a SYMBOL_REF. */
8918 symbol_mentioned_p (rtx x)
8923 if (GET_CODE (x) == SYMBOL_REF)
8926 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8927 are constant offsets, not symbols. */
8928 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8931 fmt = GET_RTX_FORMAT (GET_CODE (x));
8933 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8939 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8940 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8943 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8950 /* Return TRUE if X references a LABEL_REF. */
8952 label_mentioned_p (rtx x)
8957 if (GET_CODE (x) == LABEL_REF)
8960 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8961 instruction, but they are constant offsets, not symbols. */
8962 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8965 fmt = GET_RTX_FORMAT (GET_CODE (x));
8966 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8972 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8973 if (label_mentioned_p (XVECEXP (x, i, j)))
8976 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8984 tls_mentioned_p (rtx x)
8986 switch (GET_CODE (x))
8989 return tls_mentioned_p (XEXP (x, 0));
8992 if (XINT (x, 1) == UNSPEC_TLS)
9000 /* Must not copy any rtx that uses a pc-relative address. */
9003 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9005 if (GET_CODE (*x) == UNSPEC
9006 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9012 arm_cannot_copy_insn_p (rtx insn)
9014 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9020 enum rtx_code code = GET_CODE (x);
9037 /* Return 1 if memory locations are adjacent. */
9039 adjacent_mem_locations (rtx a, rtx b)
9041 /* We don't guarantee to preserve the order of these memory refs. */
9042 if (volatile_refs_p (a) || volatile_refs_p (b))
9045 if ((GET_CODE (XEXP (a, 0)) == REG
9046 || (GET_CODE (XEXP (a, 0)) == PLUS
9047 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9048 && (GET_CODE (XEXP (b, 0)) == REG
9049 || (GET_CODE (XEXP (b, 0)) == PLUS
9050 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9052 HOST_WIDE_INT val0 = 0, val1 = 0;
9056 if (GET_CODE (XEXP (a, 0)) == PLUS)
9058 reg0 = XEXP (XEXP (a, 0), 0);
9059 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9064 if (GET_CODE (XEXP (b, 0)) == PLUS)
9066 reg1 = XEXP (XEXP (b, 0), 0);
9067 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9072 /* Don't accept any offset that will require multiple
9073 instructions to handle, since this would cause the
9074 arith_adjacentmem pattern to output an overlong sequence. */
9075 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9078 /* Don't allow an eliminable register: register elimination can make
9079 the offset too large. */
9080 if (arm_eliminable_register (reg0))
9083 val_diff = val1 - val0;
9087 /* If the target has load delay slots, then there's no benefit
9088 to using an ldm instruction unless the offset is zero and
9089 we are optimizing for size. */
9090 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9091 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9092 && (val_diff == 4 || val_diff == -4));
9095 return ((REGNO (reg0) == REGNO (reg1))
9096 && (val_diff == 4 || val_diff == -4));
9102 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9103 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9104 instruction. ADD_OFFSET is nonzero if the base address register needs
9105 to be modified with an add instruction before we can use it. */
9108 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9109 int nops, HOST_WIDE_INT add_offset)
9111 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9112 if the offset isn't small enough. The reason 2 ldrs are faster
9113 is because these ARMs are able to do more than one cache access
9114 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9115 whilst the ARM8 has a double bandwidth cache. This means that
9116 these cores can do both an instruction fetch and a data fetch in
9117 a single cycle, so the trick of calculating the address into a
9118 scratch register (one of the result regs) and then doing a load
9119 multiple actually becomes slower (and no smaller in code size).
9120 That is the transformation
9122 ldr rd1, [rbase + offset]
9123 ldr rd2, [rbase + offset + 4]
9127 add rd1, rbase, offset
9128 ldmia rd1, {rd1, rd2}
9130 produces worse code -- '3 cycles + any stalls on rd2' instead of
9131 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9132 access per cycle, the first sequence could never complete in less
9133 than 6 cycles, whereas the ldm sequence would only take 5 and
9134 would make better use of sequential accesses if not hitting the
9137 We cheat here and test 'arm_ld_sched' which we currently know to
9138 only be true for the ARM8, ARM9 and StrongARM. If this ever
9139 changes, then the test below needs to be reworked. */
9140 if (nops == 2 && arm_ld_sched && add_offset != 0)
9146 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9147 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9148 an array ORDER which describes the sequence to use when accessing the
9149 offsets that produces an ascending order. In this sequence, each
9150 offset must be larger by exactly 4 than the previous one. ORDER[0]
9151 must have been filled in with the lowest offset by the caller.
9152 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9153 we use to verify that ORDER produces an ascending order of registers.
9154 Return true if it was possible to construct such an order, false if
9158 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9162 for (i = 1; i < nops; i++)
9166 order[i] = order[i - 1];
9167 for (j = 0; j < nops; j++)
9168 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9170 /* We must find exactly one offset that is higher than the
9171 previous one by 4. */
9172 if (order[i] != order[i - 1])
9176 if (order[i] == order[i - 1])
9178 /* The register numbers must be ascending. */
9179 if (unsorted_regs != NULL
9180 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9187 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9188 HOST_WIDE_INT *load_offset)
9190 int unsorted_regs[MAX_LDM_STM_OPS];
9191 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9192 int order[MAX_LDM_STM_OPS];
9196 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9197 easily extended if required. */
9198 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9200 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9202 /* Loop over the operands and check that the memory references are
9203 suitable (i.e. immediate offsets from the same base register). At
9204 the same time, extract the target register, and the memory
9206 for (i = 0; i < nops; i++)
9211 /* Convert a subreg of a mem into the mem itself. */
9212 if (GET_CODE (operands[nops + i]) == SUBREG)
9213 operands[nops + i] = alter_subreg (operands + (nops + i));
9215 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9217 /* Don't reorder volatile memory references; it doesn't seem worth
9218 looking for the case where the order is ok anyway. */
9219 if (MEM_VOLATILE_P (operands[nops + i]))
9222 offset = const0_rtx;
9224 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9225 || (GET_CODE (reg) == SUBREG
9226 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9227 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9228 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9230 || (GET_CODE (reg) == SUBREG
9231 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9232 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9236 base_reg = REGNO (reg);
9239 if (base_reg != (int) REGNO (reg))
9240 /* Not addressed from the same base register. */
9243 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9244 ? REGNO (operands[i])
9245 : REGNO (SUBREG_REG (operands[i])));
9247 /* If it isn't an integer register, or if it overwrites the
9248 base register but isn't the last insn in the list, then
9249 we can't do this. */
9250 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9251 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9254 unsorted_offsets[i] = INTVAL (offset);
9255 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9259 /* Not a suitable memory address. */
9263 /* All the useful information has now been extracted from the
9264 operands into unsorted_regs and unsorted_offsets; additionally,
9265 order[0] has been set to the lowest offset in the list. Sort
9266 the offsets into order, verifying that they are adjacent, and
9267 check that the register numbers are ascending. */
9268 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9275 for (i = 0; i < nops; i++)
9276 regs[i] = unsorted_regs[order[i]];
9278 *load_offset = unsorted_offsets[order[0]];
9281 if (unsorted_offsets[order[0]] == 0)
9282 ldm_case = 1; /* ldmia */
9283 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9284 ldm_case = 2; /* ldmib */
9285 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9286 ldm_case = 3; /* ldmda */
9287 else if (unsorted_offsets[order[nops - 1]] == -4)
9288 ldm_case = 4; /* ldmdb */
9289 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9290 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9295 if (!multiple_operation_profitable_p (false, nops,
9297 ? unsorted_offsets[order[0]] : 0))
9304 emit_ldm_seq (rtx *operands, int nops)
9306 int regs[MAX_LDM_STM_OPS];
9308 HOST_WIDE_INT offset;
9312 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9315 strcpy (buf, "ldm%(ia%)\t");
9319 strcpy (buf, "ldm%(ib%)\t");
9323 strcpy (buf, "ldm%(da%)\t");
9327 strcpy (buf, "ldm%(db%)\t");
9332 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9333 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9336 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9337 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9339 output_asm_insn (buf, operands);
9341 strcpy (buf, "ldm%(ia%)\t");
9348 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9349 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9351 for (i = 1; i < nops; i++)
9352 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9353 reg_names[regs[i]]);
9355 strcat (buf, "}\t%@ phole ldm");
9357 output_asm_insn (buf, operands);
9362 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9363 HOST_WIDE_INT * load_offset)
9365 int unsorted_regs[MAX_LDM_STM_OPS];
9366 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9367 int order[MAX_LDM_STM_OPS];
9371 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9372 easily extended if required. */
9373 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9375 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9377 /* Loop over the operands and check that the memory references are
9378 suitable (i.e. immediate offsets from the same base register). At
9379 the same time, extract the target register, and the memory
9381 for (i = 0; i < nops; i++)
9386 /* Convert a subreg of a mem into the mem itself. */
9387 if (GET_CODE (operands[nops + i]) == SUBREG)
9388 operands[nops + i] = alter_subreg (operands + (nops + i));
9390 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9392 /* Don't reorder volatile memory references; it doesn't seem worth
9393 looking for the case where the order is ok anyway. */
9394 if (MEM_VOLATILE_P (operands[nops + i]))
9397 offset = const0_rtx;
9399 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9400 || (GET_CODE (reg) == SUBREG
9401 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9402 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9403 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9405 || (GET_CODE (reg) == SUBREG
9406 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9407 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9410 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9411 ? REGNO (operands[i])
9412 : REGNO (SUBREG_REG (operands[i])));
9414 base_reg = REGNO (reg);
9415 else if (base_reg != (int) REGNO (reg))
9416 /* Not addressed from the same base register. */
9419 /* If it isn't an integer register, then we can't do this. */
9420 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9423 unsorted_offsets[i] = INTVAL (offset);
9424 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9428 /* Not a suitable memory address. */
9432 /* All the useful information has now been extracted from the
9433 operands into unsorted_regs and unsorted_offsets; additionally,
9434 order[0] has been set to the lowest offset in the list. Sort
9435 the offsets into order, verifying that they are adjacent, and
9436 check that the register numbers are ascending. */
9437 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9444 for (i = 0; i < nops; i++)
9445 regs[i] = unsorted_regs[order[i]];
9447 *load_offset = unsorted_offsets[order[0]];
9450 if (unsorted_offsets[order[0]] == 0)
9451 stm_case = 1; /* stmia */
9452 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9453 stm_case = 2; /* stmib */
9454 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9455 stm_case = 3; /* stmda */
9456 else if (unsorted_offsets[order[nops - 1]] == -4)
9457 stm_case = 4; /* stmdb */
9461 if (!multiple_operation_profitable_p (false, nops, 0))
9468 emit_stm_seq (rtx *operands, int nops)
9470 int regs[MAX_LDM_STM_OPS];
9472 HOST_WIDE_INT offset;
9476 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9479 strcpy (buf, "stm%(ia%)\t");
9483 strcpy (buf, "stm%(ib%)\t");
9487 strcpy (buf, "stm%(da%)\t");
9491 strcpy (buf, "stm%(db%)\t");
9498 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9499 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9501 for (i = 1; i < nops; i++)
9502 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9503 reg_names[regs[i]]);
9505 strcat (buf, "}\t%@ phole stm");
9507 output_asm_insn (buf, operands);
9511 /* Routines for use in generating RTL. */
9514 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9515 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9517 HOST_WIDE_INT offset = *offsetp;
9520 int sign = up ? 1 : -1;
9523 /* XScale has load-store double instructions, but they have stricter
9524 alignment requirements than load-store multiple, so we cannot
9527 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9528 the pipeline until completion.
9536 An ldr instruction takes 1-3 cycles, but does not block the
9545 Best case ldr will always win. However, the more ldr instructions
9546 we issue, the less likely we are to be able to schedule them well.
9547 Using ldr instructions also increases code size.
9549 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9550 for counts of 3 or 4 regs. */
9551 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9557 for (i = 0; i < count; i++)
9559 addr = plus_constant (from, i * 4 * sign);
9560 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9561 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9567 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9577 result = gen_rtx_PARALLEL (VOIDmode,
9578 rtvec_alloc (count + (write_back ? 1 : 0)));
9581 XVECEXP (result, 0, 0)
9582 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9587 for (j = 0; i < count; i++, j++)
9589 addr = plus_constant (from, j * 4 * sign);
9590 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9591 XVECEXP (result, 0, i)
9592 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9603 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9604 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9606 HOST_WIDE_INT offset = *offsetp;
9609 int sign = up ? 1 : -1;
9612 /* See arm_gen_load_multiple for discussion of
9613 the pros/cons of ldm/stm usage for XScale. */
9614 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9620 for (i = 0; i < count; i++)
9622 addr = plus_constant (to, i * 4 * sign);
9623 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9624 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9630 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9640 result = gen_rtx_PARALLEL (VOIDmode,
9641 rtvec_alloc (count + (write_back ? 1 : 0)));
9644 XVECEXP (result, 0, 0)
9645 = gen_rtx_SET (VOIDmode, to,
9646 plus_constant (to, count * 4 * sign));
9651 for (j = 0; i < count; i++, j++)
9653 addr = plus_constant (to, j * 4 * sign);
9654 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9655 XVECEXP (result, 0, i)
9656 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9667 arm_gen_movmemqi (rtx *operands)
9669 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9670 HOST_WIDE_INT srcoffset, dstoffset;
9672 rtx src, dst, srcbase, dstbase;
9673 rtx part_bytes_reg = NULL;
9676 if (GET_CODE (operands[2]) != CONST_INT
9677 || GET_CODE (operands[3]) != CONST_INT
9678 || INTVAL (operands[2]) > 64
9679 || INTVAL (operands[3]) & 3)
9682 dstbase = operands[0];
9683 srcbase = operands[1];
9685 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9686 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9688 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9689 out_words_to_go = INTVAL (operands[2]) / 4;
9690 last_bytes = INTVAL (operands[2]) & 3;
9691 dstoffset = srcoffset = 0;
9693 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9694 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9696 for (i = 0; in_words_to_go >= 2; i+=4)
9698 if (in_words_to_go > 4)
9699 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9700 srcbase, &srcoffset));
9702 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9703 FALSE, srcbase, &srcoffset));
9705 if (out_words_to_go)
9707 if (out_words_to_go > 4)
9708 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9709 dstbase, &dstoffset));
9710 else if (out_words_to_go != 1)
9711 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9715 dstbase, &dstoffset));
9718 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9719 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9720 if (last_bytes != 0)
9722 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9728 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9729 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9732 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9733 if (out_words_to_go)
9737 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9738 sreg = copy_to_reg (mem);
9740 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9741 emit_move_insn (mem, sreg);
9744 gcc_assert (!in_words_to_go); /* Sanity check */
9749 gcc_assert (in_words_to_go > 0);
9751 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9752 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9755 gcc_assert (!last_bytes || part_bytes_reg);
9757 if (BYTES_BIG_ENDIAN && last_bytes)
9759 rtx tmp = gen_reg_rtx (SImode);
9761 /* The bytes we want are in the top end of the word. */
9762 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9763 GEN_INT (8 * (4 - last_bytes))));
9764 part_bytes_reg = tmp;
9768 mem = adjust_automodify_address (dstbase, QImode,
9769 plus_constant (dst, last_bytes - 1),
9770 dstoffset + last_bytes - 1);
9771 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9775 tmp = gen_reg_rtx (SImode);
9776 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9777 part_bytes_reg = tmp;
9786 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9787 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9791 rtx tmp = gen_reg_rtx (SImode);
9792 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9793 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9794 part_bytes_reg = tmp;
9801 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9802 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9809 /* Select a dominance comparison mode if possible for a test of the general
9810 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9811 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9812 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9813 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9814 In all cases OP will be either EQ or NE, but we don't need to know which
9815 here. If we are unable to support a dominance comparison we return
9816 CC mode. This will then fail to match for the RTL expressions that
9817 generate this call. */
9819 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9821 enum rtx_code cond1, cond2;
9824 /* Currently we will probably get the wrong result if the individual
9825 comparisons are not simple. This also ensures that it is safe to
9826 reverse a comparison if necessary. */
9827 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9829 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9833 /* The if_then_else variant of this tests the second condition if the
9834 first passes, but is true if the first fails. Reverse the first
9835 condition to get a true "inclusive-or" expression. */
9836 if (cond_or == DOM_CC_NX_OR_Y)
9837 cond1 = reverse_condition (cond1);
9839 /* If the comparisons are not equal, and one doesn't dominate the other,
9840 then we can't do this. */
9842 && !comparison_dominates_p (cond1, cond2)
9843 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9848 enum rtx_code temp = cond1;
9856 if (cond_or == DOM_CC_X_AND_Y)
9861 case EQ: return CC_DEQmode;
9862 case LE: return CC_DLEmode;
9863 case LEU: return CC_DLEUmode;
9864 case GE: return CC_DGEmode;
9865 case GEU: return CC_DGEUmode;
9866 default: gcc_unreachable ();
9870 if (cond_or == DOM_CC_X_AND_Y)
9886 if (cond_or == DOM_CC_X_AND_Y)
9902 if (cond_or == DOM_CC_X_AND_Y)
9918 if (cond_or == DOM_CC_X_AND_Y)
9933 /* The remaining cases only occur when both comparisons are the
9936 gcc_assert (cond1 == cond2);
9940 gcc_assert (cond1 == cond2);
9944 gcc_assert (cond1 == cond2);
9948 gcc_assert (cond1 == cond2);
9952 gcc_assert (cond1 == cond2);
9961 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9963 /* All floating point compares return CCFP if it is an equality
9964 comparison, and CCFPE otherwise. */
9965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9985 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9994 /* A compare with a shifted operand. Because of canonicalization, the
9995 comparison will have to be swapped when we emit the assembler. */
9996 if (GET_MODE (y) == SImode
9997 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9998 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9999 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10000 || GET_CODE (x) == ROTATERT))
10003 /* This operation is performed swapped, but since we only rely on the Z
10004 flag we don't need an additional mode. */
10005 if (GET_MODE (y) == SImode
10006 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10007 && GET_CODE (x) == NEG
10008 && (op == EQ || op == NE))
10011 /* This is a special case that is used by combine to allow a
10012 comparison of a shifted byte load to be split into a zero-extend
10013 followed by a comparison of the shifted integer (only valid for
10014 equalities and unsigned inequalities). */
10015 if (GET_MODE (x) == SImode
10016 && GET_CODE (x) == ASHIFT
10017 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10018 && GET_CODE (XEXP (x, 0)) == SUBREG
10019 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10020 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10021 && (op == EQ || op == NE
10022 || op == GEU || op == GTU || op == LTU || op == LEU)
10023 && GET_CODE (y) == CONST_INT)
10026 /* A construct for a conditional compare, if the false arm contains
10027 0, then both conditions must be true, otherwise either condition
10028 must be true. Not all conditions are possible, so CCmode is
10029 returned if it can't be done. */
10030 if (GET_CODE (x) == IF_THEN_ELSE
10031 && (XEXP (x, 2) == const0_rtx
10032 || XEXP (x, 2) == const1_rtx)
10033 && COMPARISON_P (XEXP (x, 0))
10034 && COMPARISON_P (XEXP (x, 1)))
10035 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10036 INTVAL (XEXP (x, 2)));
10038 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10039 if (GET_CODE (x) == AND
10040 && COMPARISON_P (XEXP (x, 0))
10041 && COMPARISON_P (XEXP (x, 1)))
10042 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10045 if (GET_CODE (x) == IOR
10046 && COMPARISON_P (XEXP (x, 0))
10047 && COMPARISON_P (XEXP (x, 1)))
10048 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10051 /* An operation (on Thumb) where we want to test for a single bit.
10052 This is done by shifting that bit up into the top bit of a
10053 scratch register; we can then branch on the sign bit. */
10055 && GET_MODE (x) == SImode
10056 && (op == EQ || op == NE)
10057 && GET_CODE (x) == ZERO_EXTRACT
10058 && XEXP (x, 1) == const1_rtx)
10061 /* An operation that sets the condition codes as a side-effect, the
10062 V flag is not set correctly, so we can only use comparisons where
10063 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10065 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10066 if (GET_MODE (x) == SImode
10068 && (op == EQ || op == NE || op == LT || op == GE)
10069 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10070 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10071 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10072 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10073 || GET_CODE (x) == LSHIFTRT
10074 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10075 || GET_CODE (x) == ROTATERT
10076 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10077 return CC_NOOVmode;
10079 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10082 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10083 && GET_CODE (x) == PLUS
10084 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10087 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10089 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10091 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10098 /* A DImode comparison against zero can be implemented by
10099 or'ing the two halves together. */
10100 if (y == const0_rtx)
10103 /* We can do an equality test in three Thumb instructions. */
10113 /* DImode unsigned comparisons can be implemented by cmp +
10114 cmpeq without a scratch register. Not worth doing in
10125 /* DImode signed and unsigned comparisons can be implemented
10126 by cmp + sbcs with a scratch register, but that does not
10127 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10128 gcc_assert (op != EQ && op != NE);
10132 gcc_unreachable ();
10139 /* X and Y are two things to compare using CODE. Emit the compare insn and
10140 return the rtx for register 0 in the proper mode. FP means this is a
10141 floating point compare: I don't think that it is needed on the arm. */
10143 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10145 enum machine_mode mode;
10147 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10149 /* We might have X as a constant, Y as a register because of the predicates
10150 used for cmpdi. If so, force X to a register here. */
10151 if (dimode_comparison && !REG_P (x))
10152 x = force_reg (DImode, x);
10154 mode = SELECT_CC_MODE (code, x, y);
10155 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10157 if (dimode_comparison
10158 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10159 && mode != CC_CZmode)
10163 /* To compare two non-zero values for equality, XOR them and
10164 then compare against zero. Not used for ARM mode; there
10165 CC_CZmode is cheaper. */
10166 if (mode == CC_Zmode && y != const0_rtx)
10168 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10171 /* A scratch register is required. */
10172 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10173 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10174 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10177 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10182 /* Generate a sequence of insns that will generate the correct return
10183 address mask depending on the physical architecture that the program
10186 arm_gen_return_addr_mask (void)
10188 rtx reg = gen_reg_rtx (Pmode);
10190 emit_insn (gen_return_addr_mask (reg));
10195 arm_reload_in_hi (rtx *operands)
10197 rtx ref = operands[1];
10199 HOST_WIDE_INT offset = 0;
10201 if (GET_CODE (ref) == SUBREG)
10203 offset = SUBREG_BYTE (ref);
10204 ref = SUBREG_REG (ref);
10207 if (GET_CODE (ref) == REG)
10209 /* We have a pseudo which has been spilt onto the stack; there
10210 are two cases here: the first where there is a simple
10211 stack-slot replacement and a second where the stack-slot is
10212 out of range, or is used as a subreg. */
10213 if (reg_equiv_mem[REGNO (ref)])
10215 ref = reg_equiv_mem[REGNO (ref)];
10216 base = find_replacement (&XEXP (ref, 0));
10219 /* The slot is out of range, or was dressed up in a SUBREG. */
10220 base = reg_equiv_address[REGNO (ref)];
10223 base = find_replacement (&XEXP (ref, 0));
10225 /* Handle the case where the address is too complex to be offset by 1. */
10226 if (GET_CODE (base) == MINUS
10227 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10229 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10231 emit_set_insn (base_plus, base);
10234 else if (GET_CODE (base) == PLUS)
10236 /* The addend must be CONST_INT, or we would have dealt with it above. */
10237 HOST_WIDE_INT hi, lo;
10239 offset += INTVAL (XEXP (base, 1));
10240 base = XEXP (base, 0);
10242 /* Rework the address into a legal sequence of insns. */
10243 /* Valid range for lo is -4095 -> 4095 */
10246 : -((-offset) & 0xfff));
10248 /* Corner case, if lo is the max offset then we would be out of range
10249 once we have added the additional 1 below, so bump the msb into the
10250 pre-loading insn(s). */
10254 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10255 ^ (HOST_WIDE_INT) 0x80000000)
10256 - (HOST_WIDE_INT) 0x80000000);
10258 gcc_assert (hi + lo == offset);
10262 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10264 /* Get the base address; addsi3 knows how to handle constants
10265 that require more than one insn. */
10266 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10272 /* Operands[2] may overlap operands[0] (though it won't overlap
10273 operands[1]), that's why we asked for a DImode reg -- so we can
10274 use the bit that does not overlap. */
10275 if (REGNO (operands[2]) == REGNO (operands[0]))
10276 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10278 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10280 emit_insn (gen_zero_extendqisi2 (scratch,
10281 gen_rtx_MEM (QImode,
10282 plus_constant (base,
10284 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10285 gen_rtx_MEM (QImode,
10286 plus_constant (base,
10288 if (!BYTES_BIG_ENDIAN)
10289 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10290 gen_rtx_IOR (SImode,
10293 gen_rtx_SUBREG (SImode, operands[0], 0),
10297 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10298 gen_rtx_IOR (SImode,
10299 gen_rtx_ASHIFT (SImode, scratch,
10301 gen_rtx_SUBREG (SImode, operands[0], 0)));
10304 /* Handle storing a half-word to memory during reload by synthesizing as two
10305 byte stores. Take care not to clobber the input values until after we
10306 have moved them somewhere safe. This code assumes that if the DImode
10307 scratch in operands[2] overlaps either the input value or output address
10308 in some way, then that value must die in this insn (we absolutely need
10309 two scratch registers for some corner cases). */
10311 arm_reload_out_hi (rtx *operands)
10313 rtx ref = operands[0];
10314 rtx outval = operands[1];
10316 HOST_WIDE_INT offset = 0;
10318 if (GET_CODE (ref) == SUBREG)
10320 offset = SUBREG_BYTE (ref);
10321 ref = SUBREG_REG (ref);
10324 if (GET_CODE (ref) == REG)
10326 /* We have a pseudo which has been spilt onto the stack; there
10327 are two cases here: the first where there is a simple
10328 stack-slot replacement and a second where the stack-slot is
10329 out of range, or is used as a subreg. */
10330 if (reg_equiv_mem[REGNO (ref)])
10332 ref = reg_equiv_mem[REGNO (ref)];
10333 base = find_replacement (&XEXP (ref, 0));
10336 /* The slot is out of range, or was dressed up in a SUBREG. */
10337 base = reg_equiv_address[REGNO (ref)];
10340 base = find_replacement (&XEXP (ref, 0));
10342 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10344 /* Handle the case where the address is too complex to be offset by 1. */
10345 if (GET_CODE (base) == MINUS
10346 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10348 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10350 /* Be careful not to destroy OUTVAL. */
10351 if (reg_overlap_mentioned_p (base_plus, outval))
10353 /* Updating base_plus might destroy outval, see if we can
10354 swap the scratch and base_plus. */
10355 if (!reg_overlap_mentioned_p (scratch, outval))
10358 scratch = base_plus;
10363 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10365 /* Be conservative and copy OUTVAL into the scratch now,
10366 this should only be necessary if outval is a subreg
10367 of something larger than a word. */
10368 /* XXX Might this clobber base? I can't see how it can,
10369 since scratch is known to overlap with OUTVAL, and
10370 must be wider than a word. */
10371 emit_insn (gen_movhi (scratch_hi, outval));
10372 outval = scratch_hi;
10376 emit_set_insn (base_plus, base);
10379 else if (GET_CODE (base) == PLUS)
10381 /* The addend must be CONST_INT, or we would have dealt with it above. */
10382 HOST_WIDE_INT hi, lo;
10384 offset += INTVAL (XEXP (base, 1));
10385 base = XEXP (base, 0);
10387 /* Rework the address into a legal sequence of insns. */
10388 /* Valid range for lo is -4095 -> 4095 */
10391 : -((-offset) & 0xfff));
10393 /* Corner case, if lo is the max offset then we would be out of range
10394 once we have added the additional 1 below, so bump the msb into the
10395 pre-loading insn(s). */
10399 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10400 ^ (HOST_WIDE_INT) 0x80000000)
10401 - (HOST_WIDE_INT) 0x80000000);
10403 gcc_assert (hi + lo == offset);
10407 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10409 /* Be careful not to destroy OUTVAL. */
10410 if (reg_overlap_mentioned_p (base_plus, outval))
10412 /* Updating base_plus might destroy outval, see if we
10413 can swap the scratch and base_plus. */
10414 if (!reg_overlap_mentioned_p (scratch, outval))
10417 scratch = base_plus;
10422 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10424 /* Be conservative and copy outval into scratch now,
10425 this should only be necessary if outval is a
10426 subreg of something larger than a word. */
10427 /* XXX Might this clobber base? I can't see how it
10428 can, since scratch is known to overlap with
10430 emit_insn (gen_movhi (scratch_hi, outval));
10431 outval = scratch_hi;
10435 /* Get the base address; addsi3 knows how to handle constants
10436 that require more than one insn. */
10437 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10443 if (BYTES_BIG_ENDIAN)
10445 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10446 plus_constant (base, offset + 1)),
10447 gen_lowpart (QImode, outval)));
10448 emit_insn (gen_lshrsi3 (scratch,
10449 gen_rtx_SUBREG (SImode, outval, 0),
10451 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10452 gen_lowpart (QImode, scratch)));
10456 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10457 gen_lowpart (QImode, outval)));
10458 emit_insn (gen_lshrsi3 (scratch,
10459 gen_rtx_SUBREG (SImode, outval, 0),
10461 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10462 plus_constant (base, offset + 1)),
10463 gen_lowpart (QImode, scratch)));
10467 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10468 (padded to the size of a word) should be passed in a register. */
10471 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10473 if (TARGET_AAPCS_BASED)
10474 return must_pass_in_stack_var_size (mode, type);
10476 return must_pass_in_stack_var_size_or_pad (mode, type);
10480 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10481 Return true if an argument passed on the stack should be padded upwards,
10482 i.e. if the least-significant byte has useful data.
10483 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10484 aggregate types are placed in the lowest memory address. */
10487 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10489 if (!TARGET_AAPCS_BASED)
10490 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10492 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10499 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10500 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10501 byte of the register has useful data, and return the opposite if the
10502 most significant byte does.
10503 For AAPCS, small aggregates and small complex types are always padded
10507 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10508 tree type, int first ATTRIBUTE_UNUSED)
10510 if (TARGET_AAPCS_BASED
10511 && BYTES_BIG_ENDIAN
10512 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10513 && int_size_in_bytes (type) <= 4)
10516 /* Otherwise, use default padding. */
10517 return !BYTES_BIG_ENDIAN;
10521 /* Print a symbolic form of X to the debug file, F. */
10523 arm_print_value (FILE *f, rtx x)
10525 switch (GET_CODE (x))
10528 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10532 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10540 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10542 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10543 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10551 fprintf (f, "\"%s\"", XSTR (x, 0));
10555 fprintf (f, "`%s'", XSTR (x, 0));
10559 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10563 arm_print_value (f, XEXP (x, 0));
10567 arm_print_value (f, XEXP (x, 0));
10569 arm_print_value (f, XEXP (x, 1));
10577 fprintf (f, "????");
10582 /* Routines for manipulation of the constant pool. */
10584 /* Arm instructions cannot load a large constant directly into a
10585 register; they have to come from a pc relative load. The constant
10586 must therefore be placed in the addressable range of the pc
10587 relative load. Depending on the precise pc relative load
10588 instruction the range is somewhere between 256 bytes and 4k. This
10589 means that we often have to dump a constant inside a function, and
10590 generate code to branch around it.
10592 It is important to minimize this, since the branches will slow
10593 things down and make the code larger.
10595 Normally we can hide the table after an existing unconditional
10596 branch so that there is no interruption of the flow, but in the
10597 worst case the code looks like this:
10615 We fix this by performing a scan after scheduling, which notices
10616 which instructions need to have their operands fetched from the
10617 constant table and builds the table.
10619 The algorithm starts by building a table of all the constants that
10620 need fixing up and all the natural barriers in the function (places
10621 where a constant table can be dropped without breaking the flow).
10622 For each fixup we note how far the pc-relative replacement will be
10623 able to reach and the offset of the instruction into the function.
10625 Having built the table we then group the fixes together to form
10626 tables that are as large as possible (subject to addressing
10627 constraints) and emit each table of constants after the last
10628 barrier that is within range of all the instructions in the group.
10629 If a group does not contain a barrier, then we forcibly create one
10630 by inserting a jump instruction into the flow. Once the table has
10631 been inserted, the insns are then modified to reference the
10632 relevant entry in the pool.
10634 Possible enhancements to the algorithm (not implemented) are:
10636 1) For some processors and object formats, there may be benefit in
10637 aligning the pools to the start of cache lines; this alignment
10638 would need to be taken into account when calculating addressability
10641 /* These typedefs are located at the start of this file, so that
10642 they can be used in the prototypes there. This comment is to
10643 remind readers of that fact so that the following structures
10644 can be understood more easily.
10646 typedef struct minipool_node Mnode;
10647 typedef struct minipool_fixup Mfix; */
10649 struct minipool_node
10651 /* Doubly linked chain of entries. */
10654 /* The maximum offset into the code that this entry can be placed. While
10655 pushing fixes for forward references, all entries are sorted in order
10656 of increasing max_address. */
10657 HOST_WIDE_INT max_address;
10658 /* Similarly for an entry inserted for a backwards ref. */
10659 HOST_WIDE_INT min_address;
10660 /* The number of fixes referencing this entry. This can become zero
10661 if we "unpush" an entry. In this case we ignore the entry when we
10662 come to emit the code. */
10664 /* The offset from the start of the minipool. */
10665 HOST_WIDE_INT offset;
10666 /* The value in table. */
10668 /* The mode of value. */
10669 enum machine_mode mode;
10670 /* The size of the value. With iWMMXt enabled
10671 sizes > 4 also imply an alignment of 8-bytes. */
10675 struct minipool_fixup
10679 HOST_WIDE_INT address;
10681 enum machine_mode mode;
10685 HOST_WIDE_INT forwards;
10686 HOST_WIDE_INT backwards;
10689 /* Fixes less than a word need padding out to a word boundary. */
10690 #define MINIPOOL_FIX_SIZE(mode) \
10691 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10693 static Mnode * minipool_vector_head;
10694 static Mnode * minipool_vector_tail;
10695 static rtx minipool_vector_label;
10696 static int minipool_pad;
10698 /* The linked list of all minipool fixes required for this function. */
10699 Mfix * minipool_fix_head;
10700 Mfix * minipool_fix_tail;
10701 /* The fix entry for the current minipool, once it has been placed. */
10702 Mfix * minipool_barrier;
10704 /* Determines if INSN is the start of a jump table. Returns the end
10705 of the TABLE or NULL_RTX. */
10707 is_jump_table (rtx insn)
10711 if (GET_CODE (insn) == JUMP_INSN
10712 && JUMP_LABEL (insn) != NULL
10713 && ((table = next_real_insn (JUMP_LABEL (insn)))
10714 == next_real_insn (insn))
10716 && GET_CODE (table) == JUMP_INSN
10717 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10718 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10724 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10725 #define JUMP_TABLES_IN_TEXT_SECTION 0
10728 static HOST_WIDE_INT
10729 get_jump_table_size (rtx insn)
10731 /* ADDR_VECs only take room if read-only data does into the text
10733 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10735 rtx body = PATTERN (insn);
10736 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10737 HOST_WIDE_INT size;
10738 HOST_WIDE_INT modesize;
10740 modesize = GET_MODE_SIZE (GET_MODE (body));
10741 size = modesize * XVECLEN (body, elt);
10745 /* Round up size of TBB table to a halfword boundary. */
10746 size = (size + 1) & ~(HOST_WIDE_INT)1;
10749 /* No padding necessary for TBH. */
10752 /* Add two bytes for alignment on Thumb. */
10757 gcc_unreachable ();
10765 /* Move a minipool fix MP from its current location to before MAX_MP.
10766 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10767 constraints may need updating. */
10769 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10770 HOST_WIDE_INT max_address)
10772 /* The code below assumes these are different. */
10773 gcc_assert (mp != max_mp);
10775 if (max_mp == NULL)
10777 if (max_address < mp->max_address)
10778 mp->max_address = max_address;
10782 if (max_address > max_mp->max_address - mp->fix_size)
10783 mp->max_address = max_mp->max_address - mp->fix_size;
10785 mp->max_address = max_address;
10787 /* Unlink MP from its current position. Since max_mp is non-null,
10788 mp->prev must be non-null. */
10789 mp->prev->next = mp->next;
10790 if (mp->next != NULL)
10791 mp->next->prev = mp->prev;
10793 minipool_vector_tail = mp->prev;
10795 /* Re-insert it before MAX_MP. */
10797 mp->prev = max_mp->prev;
10800 if (mp->prev != NULL)
10801 mp->prev->next = mp;
10803 minipool_vector_head = mp;
10806 /* Save the new entry. */
10809 /* Scan over the preceding entries and adjust their addresses as
10811 while (mp->prev != NULL
10812 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10814 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10821 /* Add a constant to the minipool for a forward reference. Returns the
10822 node added or NULL if the constant will not fit in this pool. */
10824 add_minipool_forward_ref (Mfix *fix)
10826 /* If set, max_mp is the first pool_entry that has a lower
10827 constraint than the one we are trying to add. */
10828 Mnode * max_mp = NULL;
10829 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10832 /* If the minipool starts before the end of FIX->INSN then this FIX
10833 can not be placed into the current pool. Furthermore, adding the
10834 new constant pool entry may cause the pool to start FIX_SIZE bytes
10836 if (minipool_vector_head &&
10837 (fix->address + get_attr_length (fix->insn)
10838 >= minipool_vector_head->max_address - fix->fix_size))
10841 /* Scan the pool to see if a constant with the same value has
10842 already been added. While we are doing this, also note the
10843 location where we must insert the constant if it doesn't already
10845 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10847 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10848 && fix->mode == mp->mode
10849 && (GET_CODE (fix->value) != CODE_LABEL
10850 || (CODE_LABEL_NUMBER (fix->value)
10851 == CODE_LABEL_NUMBER (mp->value)))
10852 && rtx_equal_p (fix->value, mp->value))
10854 /* More than one fix references this entry. */
10856 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10859 /* Note the insertion point if necessary. */
10861 && mp->max_address > max_address)
10864 /* If we are inserting an 8-bytes aligned quantity and
10865 we have not already found an insertion point, then
10866 make sure that all such 8-byte aligned quantities are
10867 placed at the start of the pool. */
10868 if (ARM_DOUBLEWORD_ALIGN
10870 && fix->fix_size >= 8
10871 && mp->fix_size < 8)
10874 max_address = mp->max_address;
10878 /* The value is not currently in the minipool, so we need to create
10879 a new entry for it. If MAX_MP is NULL, the entry will be put on
10880 the end of the list since the placement is less constrained than
10881 any existing entry. Otherwise, we insert the new fix before
10882 MAX_MP and, if necessary, adjust the constraints on the other
10885 mp->fix_size = fix->fix_size;
10886 mp->mode = fix->mode;
10887 mp->value = fix->value;
10889 /* Not yet required for a backwards ref. */
10890 mp->min_address = -65536;
10892 if (max_mp == NULL)
10894 mp->max_address = max_address;
10896 mp->prev = minipool_vector_tail;
10898 if (mp->prev == NULL)
10900 minipool_vector_head = mp;
10901 minipool_vector_label = gen_label_rtx ();
10904 mp->prev->next = mp;
10906 minipool_vector_tail = mp;
10910 if (max_address > max_mp->max_address - mp->fix_size)
10911 mp->max_address = max_mp->max_address - mp->fix_size;
10913 mp->max_address = max_address;
10916 mp->prev = max_mp->prev;
10918 if (mp->prev != NULL)
10919 mp->prev->next = mp;
10921 minipool_vector_head = mp;
10924 /* Save the new entry. */
10927 /* Scan over the preceding entries and adjust their addresses as
10929 while (mp->prev != NULL
10930 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10932 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10940 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10941 HOST_WIDE_INT min_address)
10943 HOST_WIDE_INT offset;
10945 /* The code below assumes these are different. */
10946 gcc_assert (mp != min_mp);
10948 if (min_mp == NULL)
10950 if (min_address > mp->min_address)
10951 mp->min_address = min_address;
10955 /* We will adjust this below if it is too loose. */
10956 mp->min_address = min_address;
10958 /* Unlink MP from its current position. Since min_mp is non-null,
10959 mp->next must be non-null. */
10960 mp->next->prev = mp->prev;
10961 if (mp->prev != NULL)
10962 mp->prev->next = mp->next;
10964 minipool_vector_head = mp->next;
10966 /* Reinsert it after MIN_MP. */
10968 mp->next = min_mp->next;
10970 if (mp->next != NULL)
10971 mp->next->prev = mp;
10973 minipool_vector_tail = mp;
10979 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10981 mp->offset = offset;
10982 if (mp->refcount > 0)
10983 offset += mp->fix_size;
10985 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10986 mp->next->min_address = mp->min_address + mp->fix_size;
10992 /* Add a constant to the minipool for a backward reference. Returns the
10993 node added or NULL if the constant will not fit in this pool.
10995 Note that the code for insertion for a backwards reference can be
10996 somewhat confusing because the calculated offsets for each fix do
10997 not take into account the size of the pool (which is still under
11000 add_minipool_backward_ref (Mfix *fix)
11002 /* If set, min_mp is the last pool_entry that has a lower constraint
11003 than the one we are trying to add. */
11004 Mnode *min_mp = NULL;
11005 /* This can be negative, since it is only a constraint. */
11006 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11009 /* If we can't reach the current pool from this insn, or if we can't
11010 insert this entry at the end of the pool without pushing other
11011 fixes out of range, then we don't try. This ensures that we
11012 can't fail later on. */
11013 if (min_address >= minipool_barrier->address
11014 || (minipool_vector_tail->min_address + fix->fix_size
11015 >= minipool_barrier->address))
11018 /* Scan the pool to see if a constant with the same value has
11019 already been added. While we are doing this, also note the
11020 location where we must insert the constant if it doesn't already
11022 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11024 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11025 && fix->mode == mp->mode
11026 && (GET_CODE (fix->value) != CODE_LABEL
11027 || (CODE_LABEL_NUMBER (fix->value)
11028 == CODE_LABEL_NUMBER (mp->value)))
11029 && rtx_equal_p (fix->value, mp->value)
11030 /* Check that there is enough slack to move this entry to the
11031 end of the table (this is conservative). */
11032 && (mp->max_address
11033 > (minipool_barrier->address
11034 + minipool_vector_tail->offset
11035 + minipool_vector_tail->fix_size)))
11038 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11041 if (min_mp != NULL)
11042 mp->min_address += fix->fix_size;
11045 /* Note the insertion point if necessary. */
11046 if (mp->min_address < min_address)
11048 /* For now, we do not allow the insertion of 8-byte alignment
11049 requiring nodes anywhere but at the start of the pool. */
11050 if (ARM_DOUBLEWORD_ALIGN
11051 && fix->fix_size >= 8 && mp->fix_size < 8)
11056 else if (mp->max_address
11057 < minipool_barrier->address + mp->offset + fix->fix_size)
11059 /* Inserting before this entry would push the fix beyond
11060 its maximum address (which can happen if we have
11061 re-located a forwards fix); force the new fix to come
11063 if (ARM_DOUBLEWORD_ALIGN
11064 && fix->fix_size >= 8 && mp->fix_size < 8)
11069 min_address = mp->min_address + fix->fix_size;
11072 /* Do not insert a non-8-byte aligned quantity before 8-byte
11073 aligned quantities. */
11074 else if (ARM_DOUBLEWORD_ALIGN
11075 && fix->fix_size < 8
11076 && mp->fix_size >= 8)
11079 min_address = mp->min_address + fix->fix_size;
11084 /* We need to create a new entry. */
11086 mp->fix_size = fix->fix_size;
11087 mp->mode = fix->mode;
11088 mp->value = fix->value;
11090 mp->max_address = minipool_barrier->address + 65536;
11092 mp->min_address = min_address;
11094 if (min_mp == NULL)
11097 mp->next = minipool_vector_head;
11099 if (mp->next == NULL)
11101 minipool_vector_tail = mp;
11102 minipool_vector_label = gen_label_rtx ();
11105 mp->next->prev = mp;
11107 minipool_vector_head = mp;
11111 mp->next = min_mp->next;
11115 if (mp->next != NULL)
11116 mp->next->prev = mp;
11118 minipool_vector_tail = mp;
11121 /* Save the new entry. */
11129 /* Scan over the following entries and adjust their offsets. */
11130 while (mp->next != NULL)
11132 if (mp->next->min_address < mp->min_address + mp->fix_size)
11133 mp->next->min_address = mp->min_address + mp->fix_size;
11136 mp->next->offset = mp->offset + mp->fix_size;
11138 mp->next->offset = mp->offset;
11147 assign_minipool_offsets (Mfix *barrier)
11149 HOST_WIDE_INT offset = 0;
11152 minipool_barrier = barrier;
11154 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11156 mp->offset = offset;
11158 if (mp->refcount > 0)
11159 offset += mp->fix_size;
11163 /* Output the literal table */
11165 dump_minipool (rtx scan)
11171 if (ARM_DOUBLEWORD_ALIGN)
11172 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11173 if (mp->refcount > 0 && mp->fix_size >= 8)
11180 fprintf (dump_file,
11181 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11182 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11184 scan = emit_label_after (gen_label_rtx (), scan);
11185 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11186 scan = emit_label_after (minipool_vector_label, scan);
11188 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11190 if (mp->refcount > 0)
11194 fprintf (dump_file,
11195 ";; Offset %u, min %ld, max %ld ",
11196 (unsigned) mp->offset, (unsigned long) mp->min_address,
11197 (unsigned long) mp->max_address);
11198 arm_print_value (dump_file, mp->value);
11199 fputc ('\n', dump_file);
11202 switch (mp->fix_size)
11204 #ifdef HAVE_consttable_1
11206 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11210 #ifdef HAVE_consttable_2
11212 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11216 #ifdef HAVE_consttable_4
11218 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11222 #ifdef HAVE_consttable_8
11224 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11228 #ifdef HAVE_consttable_16
11230 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11235 gcc_unreachable ();
11243 minipool_vector_head = minipool_vector_tail = NULL;
11244 scan = emit_insn_after (gen_consttable_end (), scan);
11245 scan = emit_barrier_after (scan);
11248 /* Return the cost of forcibly inserting a barrier after INSN. */
11250 arm_barrier_cost (rtx insn)
11252 /* Basing the location of the pool on the loop depth is preferable,
11253 but at the moment, the basic block information seems to be
11254 corrupt by this stage of the compilation. */
11255 int base_cost = 50;
11256 rtx next = next_nonnote_insn (insn);
11258 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11261 switch (GET_CODE (insn))
11264 /* It will always be better to place the table before the label, rather
11273 return base_cost - 10;
11276 return base_cost + 10;
11280 /* Find the best place in the insn stream in the range
11281 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11282 Create the barrier by inserting a jump and add a new fix entry for
11285 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11287 HOST_WIDE_INT count = 0;
11289 rtx from = fix->insn;
11290 /* The instruction after which we will insert the jump. */
11291 rtx selected = NULL;
11293 /* The address at which the jump instruction will be placed. */
11294 HOST_WIDE_INT selected_address;
11296 HOST_WIDE_INT max_count = max_address - fix->address;
11297 rtx label = gen_label_rtx ();
11299 selected_cost = arm_barrier_cost (from);
11300 selected_address = fix->address;
11302 while (from && count < max_count)
11307 /* This code shouldn't have been called if there was a natural barrier
11309 gcc_assert (GET_CODE (from) != BARRIER);
11311 /* Count the length of this insn. */
11312 count += get_attr_length (from);
11314 /* If there is a jump table, add its length. */
11315 tmp = is_jump_table (from);
11318 count += get_jump_table_size (tmp);
11320 /* Jump tables aren't in a basic block, so base the cost on
11321 the dispatch insn. If we select this location, we will
11322 still put the pool after the table. */
11323 new_cost = arm_barrier_cost (from);
11325 if (count < max_count
11326 && (!selected || new_cost <= selected_cost))
11329 selected_cost = new_cost;
11330 selected_address = fix->address + count;
11333 /* Continue after the dispatch table. */
11334 from = NEXT_INSN (tmp);
11338 new_cost = arm_barrier_cost (from);
11340 if (count < max_count
11341 && (!selected || new_cost <= selected_cost))
11344 selected_cost = new_cost;
11345 selected_address = fix->address + count;
11348 from = NEXT_INSN (from);
11351 /* Make sure that we found a place to insert the jump. */
11352 gcc_assert (selected);
11354 /* Create a new JUMP_INSN that branches around a barrier. */
11355 from = emit_jump_insn_after (gen_jump (label), selected);
11356 JUMP_LABEL (from) = label;
11357 barrier = emit_barrier_after (from);
11358 emit_label_after (label, barrier);
11360 /* Create a minipool barrier entry for the new barrier. */
11361 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11362 new_fix->insn = barrier;
11363 new_fix->address = selected_address;
11364 new_fix->next = fix->next;
11365 fix->next = new_fix;
11370 /* Record that there is a natural barrier in the insn stream at
11373 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11375 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11378 fix->address = address;
11381 if (minipool_fix_head != NULL)
11382 minipool_fix_tail->next = fix;
11384 minipool_fix_head = fix;
11386 minipool_fix_tail = fix;
11389 /* Record INSN, which will need fixing up to load a value from the
11390 minipool. ADDRESS is the offset of the insn since the start of the
11391 function; LOC is a pointer to the part of the insn which requires
11392 fixing; VALUE is the constant that must be loaded, which is of type
11395 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11396 enum machine_mode mode, rtx value)
11398 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11401 fix->address = address;
11404 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11405 fix->value = value;
11406 fix->forwards = get_attr_pool_range (insn);
11407 fix->backwards = get_attr_neg_pool_range (insn);
11408 fix->minipool = NULL;
11410 /* If an insn doesn't have a range defined for it, then it isn't
11411 expecting to be reworked by this code. Better to stop now than
11412 to generate duff assembly code. */
11413 gcc_assert (fix->forwards || fix->backwards);
11415 /* If an entry requires 8-byte alignment then assume all constant pools
11416 require 4 bytes of padding. Trying to do this later on a per-pool
11417 basis is awkward because existing pool entries have to be modified. */
11418 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11423 fprintf (dump_file,
11424 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11425 GET_MODE_NAME (mode),
11426 INSN_UID (insn), (unsigned long) address,
11427 -1 * (long)fix->backwards, (long)fix->forwards);
11428 arm_print_value (dump_file, fix->value);
11429 fprintf (dump_file, "\n");
11432 /* Add it to the chain of fixes. */
11435 if (minipool_fix_head != NULL)
11436 minipool_fix_tail->next = fix;
11438 minipool_fix_head = fix;
11440 minipool_fix_tail = fix;
11443 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11444 Returns the number of insns needed, or 99 if we don't know how to
11447 arm_const_double_inline_cost (rtx val)
11449 rtx lowpart, highpart;
11450 enum machine_mode mode;
11452 mode = GET_MODE (val);
11454 if (mode == VOIDmode)
11457 gcc_assert (GET_MODE_SIZE (mode) == 8);
11459 lowpart = gen_lowpart (SImode, val);
11460 highpart = gen_highpart_mode (SImode, mode, val);
11462 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11463 gcc_assert (GET_CODE (highpart) == CONST_INT);
11465 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11466 NULL_RTX, NULL_RTX, 0, 0)
11467 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11468 NULL_RTX, NULL_RTX, 0, 0));
11471 /* Return true if it is worthwhile to split a 64-bit constant into two
11472 32-bit operations. This is the case if optimizing for size, or
11473 if we have load delay slots, or if one 32-bit part can be done with
11474 a single data operation. */
11476 arm_const_double_by_parts (rtx val)
11478 enum machine_mode mode = GET_MODE (val);
11481 if (optimize_size || arm_ld_sched)
11484 if (mode == VOIDmode)
11487 part = gen_highpart_mode (SImode, mode, val);
11489 gcc_assert (GET_CODE (part) == CONST_INT);
11491 if (const_ok_for_arm (INTVAL (part))
11492 || const_ok_for_arm (~INTVAL (part)))
11495 part = gen_lowpart (SImode, val);
11497 gcc_assert (GET_CODE (part) == CONST_INT);
11499 if (const_ok_for_arm (INTVAL (part))
11500 || const_ok_for_arm (~INTVAL (part)))
11506 /* Return true if it is possible to inline both the high and low parts
11507 of a 64-bit constant into 32-bit data processing instructions. */
11509 arm_const_double_by_immediates (rtx val)
11511 enum machine_mode mode = GET_MODE (val);
11514 if (mode == VOIDmode)
11517 part = gen_highpart_mode (SImode, mode, val);
11519 gcc_assert (GET_CODE (part) == CONST_INT);
11521 if (!const_ok_for_arm (INTVAL (part)))
11524 part = gen_lowpart (SImode, val);
11526 gcc_assert (GET_CODE (part) == CONST_INT);
11528 if (!const_ok_for_arm (INTVAL (part)))
11534 /* Scan INSN and note any of its operands that need fixing.
11535 If DO_PUSHES is false we do not actually push any of the fixups
11536 needed. The function returns TRUE if any fixups were needed/pushed.
11537 This is used by arm_memory_load_p() which needs to know about loads
11538 of constants that will be converted into minipool loads. */
11540 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11542 bool result = false;
11545 extract_insn (insn);
11547 if (!constrain_operands (1))
11548 fatal_insn_not_found (insn);
11550 if (recog_data.n_alternatives == 0)
11553 /* Fill in recog_op_alt with information about the constraints of
11555 preprocess_constraints ();
11557 for (opno = 0; opno < recog_data.n_operands; opno++)
11559 /* Things we need to fix can only occur in inputs. */
11560 if (recog_data.operand_type[opno] != OP_IN)
11563 /* If this alternative is a memory reference, then any mention
11564 of constants in this alternative is really to fool reload
11565 into allowing us to accept one there. We need to fix them up
11566 now so that we output the right code. */
11567 if (recog_op_alt[opno][which_alternative].memory_ok)
11569 rtx op = recog_data.operand[opno];
11571 if (CONSTANT_P (op))
11574 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11575 recog_data.operand_mode[opno], op);
11578 else if (GET_CODE (op) == MEM
11579 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11580 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11584 rtx cop = avoid_constant_pool_reference (op);
11586 /* Casting the address of something to a mode narrower
11587 than a word can cause avoid_constant_pool_reference()
11588 to return the pool reference itself. That's no good to
11589 us here. Lets just hope that we can use the
11590 constant pool value directly. */
11592 cop = get_pool_constant (XEXP (op, 0));
11594 push_minipool_fix (insn, address,
11595 recog_data.operand_loc[opno],
11596 recog_data.operand_mode[opno], cop);
11607 /* Convert instructions to their cc-clobbering variant if possible, since
11608 that allows us to use smaller encodings. */
11611 thumb2_reorg (void)
11616 INIT_REG_SET (&live);
11618 /* We are freeing block_for_insn in the toplev to keep compatibility
11619 with old MDEP_REORGS that are not CFG based. Recompute it now. */
11620 compute_bb_for_insn ();
11626 COPY_REG_SET (&live, DF_LR_OUT (bb));
11627 df_simulate_initialize_backwards (bb, &live);
11628 FOR_BB_INSNS_REVERSE (bb, insn)
11630 if (NONJUMP_INSN_P (insn)
11631 && !REGNO_REG_SET_P (&live, CC_REGNUM))
11633 rtx pat = PATTERN (insn);
11634 if (GET_CODE (pat) == SET
11635 && low_register_operand (XEXP (pat, 0), SImode)
11636 && thumb_16bit_operator (XEXP (pat, 1), SImode)
11637 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
11638 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
11640 rtx dst = XEXP (pat, 0);
11641 rtx src = XEXP (pat, 1);
11642 rtx op0 = XEXP (src, 0);
11643 if (rtx_equal_p (dst, op0)
11644 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
11646 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
11647 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
11648 rtvec vec = gen_rtvec (2, pat, clobber);
11649 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
11650 INSN_CODE (insn) = -1;
11654 if (NONDEBUG_INSN_P (insn))
11655 df_simulate_one_insn_backwards (bb, insn, &live);
11658 CLEAR_REG_SET (&live);
11661 /* Gcc puts the pool in the wrong place for ARM, since we can only
11662 load addresses a limited distance around the pc. We do some
11663 special munging to move the constant pool values to the correct
11664 point in the code. */
11669 HOST_WIDE_INT address = 0;
11675 minipool_fix_head = minipool_fix_tail = NULL;
11677 /* The first insn must always be a note, or the code below won't
11678 scan it properly. */
11679 insn = get_insns ();
11680 gcc_assert (GET_CODE (insn) == NOTE);
11683 /* Scan all the insns and record the operands that will need fixing. */
11684 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11686 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11687 && (arm_cirrus_insn_p (insn)
11688 || GET_CODE (insn) == JUMP_INSN
11689 || arm_memory_load_p (insn)))
11690 cirrus_reorg (insn);
11692 if (GET_CODE (insn) == BARRIER)
11693 push_minipool_barrier (insn, address);
11694 else if (INSN_P (insn))
11698 note_invalid_constants (insn, address, true);
11699 address += get_attr_length (insn);
11701 /* If the insn is a vector jump, add the size of the table
11702 and skip the table. */
11703 if ((table = is_jump_table (insn)) != NULL)
11705 address += get_jump_table_size (table);
11711 fix = minipool_fix_head;
11713 /* Now scan the fixups and perform the required changes. */
11718 Mfix * last_added_fix;
11719 Mfix * last_barrier = NULL;
11722 /* Skip any further barriers before the next fix. */
11723 while (fix && GET_CODE (fix->insn) == BARRIER)
11726 /* No more fixes. */
11730 last_added_fix = NULL;
11732 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11734 if (GET_CODE (ftmp->insn) == BARRIER)
11736 if (ftmp->address >= minipool_vector_head->max_address)
11739 last_barrier = ftmp;
11741 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11744 last_added_fix = ftmp; /* Keep track of the last fix added. */
11747 /* If we found a barrier, drop back to that; any fixes that we
11748 could have reached but come after the barrier will now go in
11749 the next mini-pool. */
11750 if (last_barrier != NULL)
11752 /* Reduce the refcount for those fixes that won't go into this
11754 for (fdel = last_barrier->next;
11755 fdel && fdel != ftmp;
11758 fdel->minipool->refcount--;
11759 fdel->minipool = NULL;
11762 ftmp = last_barrier;
11766 /* ftmp is first fix that we can't fit into this pool and
11767 there no natural barriers that we could use. Insert a
11768 new barrier in the code somewhere between the previous
11769 fix and this one, and arrange to jump around it. */
11770 HOST_WIDE_INT max_address;
11772 /* The last item on the list of fixes must be a barrier, so
11773 we can never run off the end of the list of fixes without
11774 last_barrier being set. */
11777 max_address = minipool_vector_head->max_address;
11778 /* Check that there isn't another fix that is in range that
11779 we couldn't fit into this pool because the pool was
11780 already too large: we need to put the pool before such an
11781 instruction. The pool itself may come just after the
11782 fix because create_fix_barrier also allows space for a
11783 jump instruction. */
11784 if (ftmp->address < max_address)
11785 max_address = ftmp->address + 1;
11787 last_barrier = create_fix_barrier (last_added_fix, max_address);
11790 assign_minipool_offsets (last_barrier);
11794 if (GET_CODE (ftmp->insn) != BARRIER
11795 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11802 /* Scan over the fixes we have identified for this pool, fixing them
11803 up and adding the constants to the pool itself. */
11804 for (this_fix = fix; this_fix && ftmp != this_fix;
11805 this_fix = this_fix->next)
11806 if (GET_CODE (this_fix->insn) != BARRIER)
11809 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11810 minipool_vector_label),
11811 this_fix->minipool->offset);
11812 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11815 dump_minipool (last_barrier->insn);
11819 /* From now on we must synthesize any constants that we can't handle
11820 directly. This can happen if the RTL gets split during final
11821 instruction generation. */
11822 after_arm_reorg = 1;
11824 /* Free the minipool memory. */
11825 obstack_free (&minipool_obstack, minipool_startobj);
11828 /* Routines to output assembly language. */
11830 /* If the rtx is the correct value then return the string of the number.
11831 In this way we can ensure that valid double constants are generated even
11832 when cross compiling. */
11834 fp_immediate_constant (rtx x)
11839 if (!fp_consts_inited)
11842 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11843 for (i = 0; i < 8; i++)
11844 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11845 return strings_fp[i];
11847 gcc_unreachable ();
11850 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11851 static const char *
11852 fp_const_from_val (REAL_VALUE_TYPE *r)
11856 if (!fp_consts_inited)
11859 for (i = 0; i < 8; i++)
11860 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11861 return strings_fp[i];
11863 gcc_unreachable ();
11866 /* Output the operands of a LDM/STM instruction to STREAM.
11867 MASK is the ARM register set mask of which only bits 0-15 are important.
11868 REG is the base register, either the frame pointer or the stack pointer,
11869 INSTR is the possibly suffixed load or store instruction.
11870 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11873 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11874 unsigned long mask, int rfe)
11877 bool not_first = FALSE;
11879 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11880 fputc ('\t', stream);
11881 asm_fprintf (stream, instr, reg);
11882 fputc ('{', stream);
11884 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11885 if (mask & (1 << i))
11888 fprintf (stream, ", ");
11890 asm_fprintf (stream, "%r", i);
11895 fprintf (stream, "}^\n");
11897 fprintf (stream, "}\n");
11901 /* Output a FLDMD instruction to STREAM.
11902 BASE if the register containing the address.
11903 REG and COUNT specify the register range.
11904 Extra registers may be added to avoid hardware bugs.
11906 We output FLDMD even for ARMv5 VFP implementations. Although
11907 FLDMD is technically not supported until ARMv6, it is believed
11908 that all VFP implementations support its use in this context. */
11911 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11915 /* Workaround ARM10 VFPr1 bug. */
11916 if (count == 2 && !arm_arch6)
11923 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11924 load into multiple parts if we have to handle more than 16 registers. */
11927 vfp_output_fldmd (stream, base, reg, 16);
11928 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11932 fputc ('\t', stream);
11933 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11935 for (i = reg; i < reg + count; i++)
11938 fputs (", ", stream);
11939 asm_fprintf (stream, "d%d", i);
11941 fputs ("}\n", stream);
11946 /* Output the assembly for a store multiple. */
11949 vfp_output_fstmd (rtx * operands)
11956 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11957 p = strlen (pattern);
11959 gcc_assert (GET_CODE (operands[1]) == REG);
11961 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11962 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11964 p += sprintf (&pattern[p], ", d%d", base + i);
11966 strcpy (&pattern[p], "}");
11968 output_asm_insn (pattern, operands);
11973 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11974 number of bytes pushed. */
11977 vfp_emit_fstmd (int base_reg, int count)
11984 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11985 register pairs are stored by a store multiple insn. We avoid this
11986 by pushing an extra pair. */
11987 if (count == 2 && !arm_arch6)
11989 if (base_reg == LAST_VFP_REGNUM - 3)
11994 /* FSTMD may not store more than 16 doubleword registers at once. Split
11995 larger stores into multiple parts (up to a maximum of two, in
12000 /* NOTE: base_reg is an internal register number, so each D register
12002 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12003 saved += vfp_emit_fstmd (base_reg, 16);
12007 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12008 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12010 reg = gen_rtx_REG (DFmode, base_reg);
12013 XVECEXP (par, 0, 0)
12014 = gen_rtx_SET (VOIDmode,
12017 gen_rtx_PRE_MODIFY (Pmode,
12020 (stack_pointer_rtx,
12023 gen_rtx_UNSPEC (BLKmode,
12024 gen_rtvec (1, reg),
12025 UNSPEC_PUSH_MULT));
12027 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12028 plus_constant (stack_pointer_rtx, -(count * 8)));
12029 RTX_FRAME_RELATED_P (tmp) = 1;
12030 XVECEXP (dwarf, 0, 0) = tmp;
12032 tmp = gen_rtx_SET (VOIDmode,
12033 gen_frame_mem (DFmode, stack_pointer_rtx),
12035 RTX_FRAME_RELATED_P (tmp) = 1;
12036 XVECEXP (dwarf, 0, 1) = tmp;
12038 for (i = 1; i < count; i++)
12040 reg = gen_rtx_REG (DFmode, base_reg);
12042 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12044 tmp = gen_rtx_SET (VOIDmode,
12045 gen_frame_mem (DFmode,
12046 plus_constant (stack_pointer_rtx,
12049 RTX_FRAME_RELATED_P (tmp) = 1;
12050 XVECEXP (dwarf, 0, i + 1) = tmp;
12053 par = emit_insn (par);
12054 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12055 RTX_FRAME_RELATED_P (par) = 1;
12060 /* Emit a call instruction with pattern PAT. ADDR is the address of
12061 the call target. */
12064 arm_emit_call_insn (rtx pat, rtx addr)
12068 insn = emit_call_insn (pat);
12070 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12071 If the call might use such an entry, add a use of the PIC register
12072 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12073 if (TARGET_VXWORKS_RTP
12075 && GET_CODE (addr) == SYMBOL_REF
12076 && (SYMBOL_REF_DECL (addr)
12077 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12078 : !SYMBOL_REF_LOCAL_P (addr)))
12080 require_pic_register ();
12081 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12085 /* Output a 'call' insn. */
12087 output_call (rtx *operands)
12089 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12091 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12092 if (REGNO (operands[0]) == LR_REGNUM)
12094 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12095 output_asm_insn ("mov%?\t%0, %|lr", operands);
12098 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12100 if (TARGET_INTERWORK || arm_arch4t)
12101 output_asm_insn ("bx%?\t%0", operands);
12103 output_asm_insn ("mov%?\t%|pc, %0", operands);
12108 /* Output a 'call' insn that is a reference in memory. This is
12109 disabled for ARMv5 and we prefer a blx instead because otherwise
12110 there's a significant performance overhead. */
12112 output_call_mem (rtx *operands)
12114 gcc_assert (!arm_arch5);
12115 if (TARGET_INTERWORK)
12117 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12118 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12119 output_asm_insn ("bx%?\t%|ip", operands);
12121 else if (regno_use_in (LR_REGNUM, operands[0]))
12123 /* LR is used in the memory address. We load the address in the
12124 first instruction. It's safe to use IP as the target of the
12125 load since the call will kill it anyway. */
12126 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12127 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12129 output_asm_insn ("bx%?\t%|ip", operands);
12131 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12135 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12136 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12143 /* Output a move from arm registers to an fpa registers.
12144 OPERANDS[0] is an fpa register.
12145 OPERANDS[1] is the first registers of an arm register pair. */
12147 output_mov_long_double_fpa_from_arm (rtx *operands)
12149 int arm_reg0 = REGNO (operands[1]);
12152 gcc_assert (arm_reg0 != IP_REGNUM);
12154 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12155 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12156 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12158 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12159 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12164 /* Output a move from an fpa register to arm registers.
12165 OPERANDS[0] is the first registers of an arm register pair.
12166 OPERANDS[1] is an fpa register. */
12168 output_mov_long_double_arm_from_fpa (rtx *operands)
12170 int arm_reg0 = REGNO (operands[0]);
12173 gcc_assert (arm_reg0 != IP_REGNUM);
12175 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12176 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12177 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12179 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12180 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12184 /* Output a move from arm registers to arm registers of a long double
12185 OPERANDS[0] is the destination.
12186 OPERANDS[1] is the source. */
12188 output_mov_long_double_arm_from_arm (rtx *operands)
12190 /* We have to be careful here because the two might overlap. */
12191 int dest_start = REGNO (operands[0]);
12192 int src_start = REGNO (operands[1]);
12196 if (dest_start < src_start)
12198 for (i = 0; i < 3; i++)
12200 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12201 ops[1] = gen_rtx_REG (SImode, src_start + i);
12202 output_asm_insn ("mov%?\t%0, %1", ops);
12207 for (i = 2; i >= 0; i--)
12209 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12210 ops[1] = gen_rtx_REG (SImode, src_start + i);
12211 output_asm_insn ("mov%?\t%0, %1", ops);
12219 arm_emit_movpair (rtx dest, rtx src)
12221 /* If the src is an immediate, simplify it. */
12222 if (CONST_INT_P (src))
12224 HOST_WIDE_INT val = INTVAL (src);
12225 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12226 if ((val >> 16) & 0x0000ffff)
12227 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12229 GEN_INT ((val >> 16) & 0x0000ffff));
12232 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12233 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12236 /* Output a move from arm registers to an fpa registers.
12237 OPERANDS[0] is an fpa register.
12238 OPERANDS[1] is the first registers of an arm register pair. */
12240 output_mov_double_fpa_from_arm (rtx *operands)
12242 int arm_reg0 = REGNO (operands[1]);
12245 gcc_assert (arm_reg0 != IP_REGNUM);
12247 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12248 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12249 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12250 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12254 /* Output a move from an fpa register to arm registers.
12255 OPERANDS[0] is the first registers of an arm register pair.
12256 OPERANDS[1] is an fpa register. */
12258 output_mov_double_arm_from_fpa (rtx *operands)
12260 int arm_reg0 = REGNO (operands[0]);
12263 gcc_assert (arm_reg0 != IP_REGNUM);
12265 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12266 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12267 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12268 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12272 /* Output a move between double words.
12273 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12274 or MEM<-REG and all MEMs must be offsettable addresses. */
12276 output_move_double (rtx *operands)
12278 enum rtx_code code0 = GET_CODE (operands[0]);
12279 enum rtx_code code1 = GET_CODE (operands[1]);
12284 unsigned int reg0 = REGNO (operands[0]);
12286 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12288 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12290 switch (GET_CODE (XEXP (operands[1], 0)))
12294 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12295 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12297 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12301 gcc_assert (TARGET_LDRD);
12302 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12307 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12309 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12314 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12316 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12320 gcc_assert (TARGET_LDRD);
12321 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12326 /* Autoicrement addressing modes should never have overlapping
12327 base and destination registers, and overlapping index registers
12328 are already prohibited, so this doesn't need to worry about
12330 otherops[0] = operands[0];
12331 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12332 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12334 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12336 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12338 /* Registers overlap so split out the increment. */
12339 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12340 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12344 /* Use a single insn if we can.
12345 FIXME: IWMMXT allows offsets larger than ldrd can
12346 handle, fix these up with a pair of ldr. */
12348 || GET_CODE (otherops[2]) != CONST_INT
12349 || (INTVAL (otherops[2]) > -256
12350 && INTVAL (otherops[2]) < 256))
12351 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12354 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12355 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12361 /* Use a single insn if we can.
12362 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12363 fix these up with a pair of ldr. */
12365 || GET_CODE (otherops[2]) != CONST_INT
12366 || (INTVAL (otherops[2]) > -256
12367 && INTVAL (otherops[2]) < 256))
12368 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12371 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12372 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12379 /* We might be able to use ldrd %0, %1 here. However the range is
12380 different to ldr/adr, and it is broken on some ARMv7-M
12381 implementations. */
12382 /* Use the second register of the pair to avoid problematic
12384 otherops[1] = operands[1];
12385 output_asm_insn ("adr%?\t%0, %1", otherops);
12386 operands[1] = otherops[0];
12388 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12390 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12393 /* ??? This needs checking for thumb2. */
12395 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12396 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12398 otherops[0] = operands[0];
12399 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12400 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12402 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12404 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12406 switch ((int) INTVAL (otherops[2]))
12409 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12414 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12419 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12423 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12424 operands[1] = otherops[0];
12426 && (GET_CODE (otherops[2]) == REG
12428 || (GET_CODE (otherops[2]) == CONST_INT
12429 && INTVAL (otherops[2]) > -256
12430 && INTVAL (otherops[2]) < 256)))
12432 if (reg_overlap_mentioned_p (operands[0],
12436 /* Swap base and index registers over to
12437 avoid a conflict. */
12439 otherops[1] = otherops[2];
12442 /* If both registers conflict, it will usually
12443 have been fixed by a splitter. */
12444 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12445 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12447 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12448 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12452 otherops[0] = operands[0];
12453 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12458 if (GET_CODE (otherops[2]) == CONST_INT)
12460 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12461 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12463 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12466 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12469 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12472 return "ldr%(d%)\t%0, [%1]";
12474 return "ldm%(ia%)\t%1, %M0";
12478 otherops[1] = adjust_address (operands[1], SImode, 4);
12479 /* Take care of overlapping base/data reg. */
12480 if (reg_mentioned_p (operands[0], operands[1]))
12482 output_asm_insn ("ldr%?\t%0, %1", otherops);
12483 output_asm_insn ("ldr%?\t%0, %1", operands);
12487 output_asm_insn ("ldr%?\t%0, %1", operands);
12488 output_asm_insn ("ldr%?\t%0, %1", otherops);
12495 /* Constraints should ensure this. */
12496 gcc_assert (code0 == MEM && code1 == REG);
12497 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12499 switch (GET_CODE (XEXP (operands[0], 0)))
12503 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12505 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12509 gcc_assert (TARGET_LDRD);
12510 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12515 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12517 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12522 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12524 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12528 gcc_assert (TARGET_LDRD);
12529 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12534 otherops[0] = operands[1];
12535 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12536 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12538 /* IWMMXT allows offsets larger than ldrd can handle,
12539 fix these up with a pair of ldr. */
12541 && GET_CODE (otherops[2]) == CONST_INT
12542 && (INTVAL(otherops[2]) <= -256
12543 || INTVAL(otherops[2]) >= 256))
12545 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12547 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12548 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12552 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12553 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12556 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12557 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12559 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12563 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12564 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12566 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12569 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12575 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12581 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12586 && (GET_CODE (otherops[2]) == REG
12588 || (GET_CODE (otherops[2]) == CONST_INT
12589 && INTVAL (otherops[2]) > -256
12590 && INTVAL (otherops[2]) < 256)))
12592 otherops[0] = operands[1];
12593 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12594 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12600 otherops[0] = adjust_address (operands[0], SImode, 4);
12601 otherops[1] = operands[1];
12602 output_asm_insn ("str%?\t%1, %0", operands);
12603 output_asm_insn ("str%?\t%H1, %0", otherops);
12610 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12611 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12614 output_move_quad (rtx *operands)
12616 if (REG_P (operands[0]))
12618 /* Load, or reg->reg move. */
12620 if (MEM_P (operands[1]))
12622 switch (GET_CODE (XEXP (operands[1], 0)))
12625 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12630 output_asm_insn ("adr%?\t%0, %1", operands);
12631 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12635 gcc_unreachable ();
12643 gcc_assert (REG_P (operands[1]));
12645 dest = REGNO (operands[0]);
12646 src = REGNO (operands[1]);
12648 /* This seems pretty dumb, but hopefully GCC won't try to do it
12651 for (i = 0; i < 4; i++)
12653 ops[0] = gen_rtx_REG (SImode, dest + i);
12654 ops[1] = gen_rtx_REG (SImode, src + i);
12655 output_asm_insn ("mov%?\t%0, %1", ops);
12658 for (i = 3; i >= 0; i--)
12660 ops[0] = gen_rtx_REG (SImode, dest + i);
12661 ops[1] = gen_rtx_REG (SImode, src + i);
12662 output_asm_insn ("mov%?\t%0, %1", ops);
12668 gcc_assert (MEM_P (operands[0]));
12669 gcc_assert (REG_P (operands[1]));
12670 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12672 switch (GET_CODE (XEXP (operands[0], 0)))
12675 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12679 gcc_unreachable ();
12686 /* Output a VFP load or store instruction. */
12689 output_move_vfp (rtx *operands)
12691 rtx reg, mem, addr, ops[2];
12692 int load = REG_P (operands[0]);
12693 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12694 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12697 enum machine_mode mode;
12699 reg = operands[!load];
12700 mem = operands[load];
12702 mode = GET_MODE (reg);
12704 gcc_assert (REG_P (reg));
12705 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12706 gcc_assert (mode == SFmode
12710 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12711 gcc_assert (MEM_P (mem));
12713 addr = XEXP (mem, 0);
12715 switch (GET_CODE (addr))
12718 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12719 ops[0] = XEXP (addr, 0);
12724 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12725 ops[0] = XEXP (addr, 0);
12730 templ = "f%s%c%%?\t%%%s0, %%1%s";
12736 sprintf (buff, templ,
12737 load ? "ld" : "st",
12740 integer_p ? "\t%@ int" : "");
12741 output_asm_insn (buff, ops);
12746 /* Output a Neon quad-word load or store, or a load or store for
12747 larger structure modes.
12749 WARNING: The ordering of elements is weird in big-endian mode,
12750 because we use VSTM, as required by the EABI. GCC RTL defines
12751 element ordering based on in-memory order. This can be differ
12752 from the architectural ordering of elements within a NEON register.
12753 The intrinsics defined in arm_neon.h use the NEON register element
12754 ordering, not the GCC RTL element ordering.
12756 For example, the in-memory ordering of a big-endian a quadword
12757 vector with 16-bit elements when stored from register pair {d0,d1}
12758 will be (lowest address first, d0[N] is NEON register element N):
12760 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12762 When necessary, quadword registers (dN, dN+1) are moved to ARM
12763 registers from rN in the order:
12765 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12767 So that STM/LDM can be used on vectors in ARM registers, and the
12768 same memory layout will result as if VSTM/VLDM were used. */
12771 output_move_neon (rtx *operands)
12773 rtx reg, mem, addr, ops[2];
12774 int regno, load = REG_P (operands[0]);
12777 enum machine_mode mode;
12779 reg = operands[!load];
12780 mem = operands[load];
12782 mode = GET_MODE (reg);
12784 gcc_assert (REG_P (reg));
12785 regno = REGNO (reg);
12786 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12787 || NEON_REGNO_OK_FOR_QUAD (regno));
12788 gcc_assert (VALID_NEON_DREG_MODE (mode)
12789 || VALID_NEON_QREG_MODE (mode)
12790 || VALID_NEON_STRUCT_MODE (mode));
12791 gcc_assert (MEM_P (mem));
12793 addr = XEXP (mem, 0);
12795 /* Strip off const from addresses like (const (plus (...))). */
12796 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12797 addr = XEXP (addr, 0);
12799 switch (GET_CODE (addr))
12802 templ = "v%smia%%?\t%%0!, %%h1";
12803 ops[0] = XEXP (addr, 0);
12808 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12809 templ = "v%smdb%%?\t%%0!, %%h1";
12810 ops[0] = XEXP (addr, 0);
12815 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12816 gcc_unreachable ();
12821 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12824 for (i = 0; i < nregs; i++)
12826 /* We're only using DImode here because it's a convenient size. */
12827 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12828 ops[1] = adjust_address (mem, DImode, 8 * i);
12829 if (reg_overlap_mentioned_p (ops[0], mem))
12831 gcc_assert (overlap == -1);
12836 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12837 output_asm_insn (buff, ops);
12842 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12843 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12844 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12845 output_asm_insn (buff, ops);
12852 templ = "v%smia%%?\t%%m0, %%h1";
12857 sprintf (buff, templ, load ? "ld" : "st");
12858 output_asm_insn (buff, ops);
12863 /* Compute and return the length of neon_mov<mode>, where <mode> is
12864 one of VSTRUCT modes: EI, OI, CI or XI. */
12866 arm_attr_length_move_neon (rtx insn)
12868 rtx reg, mem, addr;
12870 enum machine_mode mode;
12872 extract_insn_cached (insn);
12874 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
12876 mode = GET_MODE (recog_data.operand[0]);
12887 gcc_unreachable ();
12891 load = REG_P (recog_data.operand[0]);
12892 reg = recog_data.operand[!load];
12893 mem = recog_data.operand[load];
12895 gcc_assert (MEM_P (mem));
12897 mode = GET_MODE (reg);
12898 addr = XEXP (mem, 0);
12900 /* Strip off const from addresses like (const (plus (...))). */
12901 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12902 addr = XEXP (addr, 0);
12904 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
12906 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12913 /* Output an ADD r, s, #n where n may be too big for one instruction.
12914 If adding zero to one register, output nothing. */
12916 output_add_immediate (rtx *operands)
12918 HOST_WIDE_INT n = INTVAL (operands[2]);
12920 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12923 output_multi_immediate (operands,
12924 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12927 output_multi_immediate (operands,
12928 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12935 /* Output a multiple immediate operation.
12936 OPERANDS is the vector of operands referred to in the output patterns.
12937 INSTR1 is the output pattern to use for the first constant.
12938 INSTR2 is the output pattern to use for subsequent constants.
12939 IMMED_OP is the index of the constant slot in OPERANDS.
12940 N is the constant value. */
12941 static const char *
12942 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12943 int immed_op, HOST_WIDE_INT n)
12945 #if HOST_BITS_PER_WIDE_INT > 32
12951 /* Quick and easy output. */
12952 operands[immed_op] = const0_rtx;
12953 output_asm_insn (instr1, operands);
12958 const char * instr = instr1;
12960 /* Note that n is never zero here (which would give no output). */
12961 for (i = 0; i < 32; i += 2)
12965 operands[immed_op] = GEN_INT (n & (255 << i));
12966 output_asm_insn (instr, operands);
12976 /* Return the name of a shifter operation. */
12977 static const char *
12978 arm_shift_nmem(enum rtx_code code)
12983 return ARM_LSL_NAME;
12999 /* Return the appropriate ARM instruction for the operation code.
13000 The returned result should not be overwritten. OP is the rtx of the
13001 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13004 arithmetic_instr (rtx op, int shift_first_arg)
13006 switch (GET_CODE (op))
13012 return shift_first_arg ? "rsb" : "sub";
13027 return arm_shift_nmem(GET_CODE(op));
13030 gcc_unreachable ();
13034 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13035 for the operation code. The returned result should not be overwritten.
13036 OP is the rtx code of the shift.
13037 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13039 static const char *
13040 shift_op (rtx op, HOST_WIDE_INT *amountp)
13043 enum rtx_code code = GET_CODE (op);
13045 switch (GET_CODE (XEXP (op, 1)))
13053 *amountp = INTVAL (XEXP (op, 1));
13057 gcc_unreachable ();
13063 gcc_assert (*amountp != -1);
13064 *amountp = 32 - *amountp;
13067 /* Fall through. */
13073 mnem = arm_shift_nmem(code);
13077 /* We never have to worry about the amount being other than a
13078 power of 2, since this case can never be reloaded from a reg. */
13079 gcc_assert (*amountp != -1);
13080 *amountp = int_log2 (*amountp);
13081 return ARM_LSL_NAME;
13084 gcc_unreachable ();
13087 if (*amountp != -1)
13089 /* This is not 100% correct, but follows from the desire to merge
13090 multiplication by a power of 2 with the recognizer for a
13091 shift. >=32 is not a valid shift for "lsl", so we must try and
13092 output a shift that produces the correct arithmetical result.
13093 Using lsr #32 is identical except for the fact that the carry bit
13094 is not set correctly if we set the flags; but we never use the
13095 carry bit from such an operation, so we can ignore that. */
13096 if (code == ROTATERT)
13097 /* Rotate is just modulo 32. */
13099 else if (*amountp != (*amountp & 31))
13101 if (code == ASHIFT)
13106 /* Shifts of 0 are no-ops. */
13114 /* Obtain the shift from the POWER of two. */
13116 static HOST_WIDE_INT
13117 int_log2 (HOST_WIDE_INT power)
13119 HOST_WIDE_INT shift = 0;
13121 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13123 gcc_assert (shift <= 31);
13130 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13131 because /bin/as is horribly restrictive. The judgement about
13132 whether or not each character is 'printable' (and can be output as
13133 is) or not (and must be printed with an octal escape) must be made
13134 with reference to the *host* character set -- the situation is
13135 similar to that discussed in the comments above pp_c_char in
13136 c-pretty-print.c. */
13138 #define MAX_ASCII_LEN 51
13141 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13144 int len_so_far = 0;
13146 fputs ("\t.ascii\t\"", stream);
13148 for (i = 0; i < len; i++)
13152 if (len_so_far >= MAX_ASCII_LEN)
13154 fputs ("\"\n\t.ascii\t\"", stream);
13160 if (c == '\\' || c == '\"')
13162 putc ('\\', stream);
13170 fprintf (stream, "\\%03o", c);
13175 fputs ("\"\n", stream);
13178 /* Compute the register save mask for registers 0 through 12
13179 inclusive. This code is used by arm_compute_save_reg_mask. */
13181 static unsigned long
13182 arm_compute_save_reg0_reg12_mask (void)
13184 unsigned long func_type = arm_current_func_type ();
13185 unsigned long save_reg_mask = 0;
13188 if (IS_INTERRUPT (func_type))
13190 unsigned int max_reg;
13191 /* Interrupt functions must not corrupt any registers,
13192 even call clobbered ones. If this is a leaf function
13193 we can just examine the registers used by the RTL, but
13194 otherwise we have to assume that whatever function is
13195 called might clobber anything, and so we have to save
13196 all the call-clobbered registers as well. */
13197 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13198 /* FIQ handlers have registers r8 - r12 banked, so
13199 we only need to check r0 - r7, Normal ISRs only
13200 bank r14 and r15, so we must check up to r12.
13201 r13 is the stack pointer which is always preserved,
13202 so we do not need to consider it here. */
13207 for (reg = 0; reg <= max_reg; reg++)
13208 if (df_regs_ever_live_p (reg)
13209 || (! current_function_is_leaf && call_used_regs[reg]))
13210 save_reg_mask |= (1 << reg);
13212 /* Also save the pic base register if necessary. */
13214 && !TARGET_SINGLE_PIC_BASE
13215 && arm_pic_register != INVALID_REGNUM
13216 && crtl->uses_pic_offset_table)
13217 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13219 else if (IS_VOLATILE(func_type))
13221 /* For noreturn functions we historically omitted register saves
13222 altogether. However this really messes up debugging. As a
13223 compromise save just the frame pointers. Combined with the link
13224 register saved elsewhere this should be sufficient to get
13226 if (frame_pointer_needed)
13227 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13228 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13229 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13230 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13231 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13235 /* In the normal case we only need to save those registers
13236 which are call saved and which are used by this function. */
13237 for (reg = 0; reg <= 11; reg++)
13238 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13239 save_reg_mask |= (1 << reg);
13241 /* Handle the frame pointer as a special case. */
13242 if (frame_pointer_needed)
13243 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13245 /* If we aren't loading the PIC register,
13246 don't stack it even though it may be live. */
13248 && !TARGET_SINGLE_PIC_BASE
13249 && arm_pic_register != INVALID_REGNUM
13250 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13251 || crtl->uses_pic_offset_table))
13252 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13254 /* The prologue will copy SP into R0, so save it. */
13255 if (IS_STACKALIGN (func_type))
13256 save_reg_mask |= 1;
13259 /* Save registers so the exception handler can modify them. */
13260 if (crtl->calls_eh_return)
13266 reg = EH_RETURN_DATA_REGNO (i);
13267 if (reg == INVALID_REGNUM)
13269 save_reg_mask |= 1 << reg;
13273 return save_reg_mask;
13277 /* Compute the number of bytes used to store the static chain register on the
13278 stack, above the stack frame. We need to know this accurately to get the
13279 alignment of the rest of the stack frame correct. */
13281 static int arm_compute_static_chain_stack_bytes (void)
13283 unsigned long func_type = arm_current_func_type ();
13284 int static_chain_stack_bytes = 0;
13286 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13287 IS_NESTED (func_type) &&
13288 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13289 static_chain_stack_bytes = 4;
13291 return static_chain_stack_bytes;
13295 /* Compute a bit mask of which registers need to be
13296 saved on the stack for the current function.
13297 This is used by arm_get_frame_offsets, which may add extra registers. */
13299 static unsigned long
13300 arm_compute_save_reg_mask (void)
13302 unsigned int save_reg_mask = 0;
13303 unsigned long func_type = arm_current_func_type ();
13306 if (IS_NAKED (func_type))
13307 /* This should never really happen. */
13310 /* If we are creating a stack frame, then we must save the frame pointer,
13311 IP (which will hold the old stack pointer), LR and the PC. */
13312 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13314 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13317 | (1 << PC_REGNUM);
13319 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13321 /* Decide if we need to save the link register.
13322 Interrupt routines have their own banked link register,
13323 so they never need to save it.
13324 Otherwise if we do not use the link register we do not need to save
13325 it. If we are pushing other registers onto the stack however, we
13326 can save an instruction in the epilogue by pushing the link register
13327 now and then popping it back into the PC. This incurs extra memory
13328 accesses though, so we only do it when optimizing for size, and only
13329 if we know that we will not need a fancy return sequence. */
13330 if (df_regs_ever_live_p (LR_REGNUM)
13333 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13334 && !crtl->calls_eh_return))
13335 save_reg_mask |= 1 << LR_REGNUM;
13337 if (cfun->machine->lr_save_eliminated)
13338 save_reg_mask &= ~ (1 << LR_REGNUM);
13340 if (TARGET_REALLY_IWMMXT
13341 && ((bit_count (save_reg_mask)
13342 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13343 arm_compute_static_chain_stack_bytes())
13346 /* The total number of registers that are going to be pushed
13347 onto the stack is odd. We need to ensure that the stack
13348 is 64-bit aligned before we start to save iWMMXt registers,
13349 and also before we start to create locals. (A local variable
13350 might be a double or long long which we will load/store using
13351 an iWMMXt instruction). Therefore we need to push another
13352 ARM register, so that the stack will be 64-bit aligned. We
13353 try to avoid using the arg registers (r0 -r3) as they might be
13354 used to pass values in a tail call. */
13355 for (reg = 4; reg <= 12; reg++)
13356 if ((save_reg_mask & (1 << reg)) == 0)
13360 save_reg_mask |= (1 << reg);
13363 cfun->machine->sibcall_blocked = 1;
13364 save_reg_mask |= (1 << 3);
13368 /* We may need to push an additional register for use initializing the
13369 PIC base register. */
13370 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13371 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13373 reg = thumb_find_work_register (1 << 4);
13374 if (!call_used_regs[reg])
13375 save_reg_mask |= (1 << reg);
13378 return save_reg_mask;
13382 /* Compute a bit mask of which registers need to be
13383 saved on the stack for the current function. */
13384 static unsigned long
13385 thumb1_compute_save_reg_mask (void)
13387 unsigned long mask;
13391 for (reg = 0; reg < 12; reg ++)
13392 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13396 && !TARGET_SINGLE_PIC_BASE
13397 && arm_pic_register != INVALID_REGNUM
13398 && crtl->uses_pic_offset_table)
13399 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13401 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13402 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13403 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13405 /* LR will also be pushed if any lo regs are pushed. */
13406 if (mask & 0xff || thumb_force_lr_save ())
13407 mask |= (1 << LR_REGNUM);
13409 /* Make sure we have a low work register if we need one.
13410 We will need one if we are going to push a high register,
13411 but we are not currently intending to push a low register. */
13412 if ((mask & 0xff) == 0
13413 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13415 /* Use thumb_find_work_register to choose which register
13416 we will use. If the register is live then we will
13417 have to push it. Use LAST_LO_REGNUM as our fallback
13418 choice for the register to select. */
13419 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13420 /* Make sure the register returned by thumb_find_work_register is
13421 not part of the return value. */
13422 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13423 reg = LAST_LO_REGNUM;
13425 if (! call_used_regs[reg])
13429 /* The 504 below is 8 bytes less than 512 because there are two possible
13430 alignment words. We can't tell here if they will be present or not so we
13431 have to play it safe and assume that they are. */
13432 if ((CALLER_INTERWORKING_SLOT_SIZE +
13433 ROUND_UP_WORD (get_frame_size ()) +
13434 crtl->outgoing_args_size) >= 504)
13436 /* This is the same as the code in thumb1_expand_prologue() which
13437 determines which register to use for stack decrement. */
13438 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13439 if (mask & (1 << reg))
13442 if (reg > LAST_LO_REGNUM)
13444 /* Make sure we have a register available for stack decrement. */
13445 mask |= 1 << LAST_LO_REGNUM;
13453 /* Return the number of bytes required to save VFP registers. */
13455 arm_get_vfp_saved_size (void)
13457 unsigned int regno;
13462 /* Space for saved VFP registers. */
13463 if (TARGET_HARD_FLOAT && TARGET_VFP)
13466 for (regno = FIRST_VFP_REGNUM;
13467 regno < LAST_VFP_REGNUM;
13470 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13471 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13475 /* Workaround ARM10 VFPr1 bug. */
13476 if (count == 2 && !arm_arch6)
13478 saved += count * 8;
13487 if (count == 2 && !arm_arch6)
13489 saved += count * 8;
13496 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13497 everything bar the final return instruction. */
13499 output_return_instruction (rtx operand, int really_return, int reverse)
13501 char conditional[10];
13504 unsigned long live_regs_mask;
13505 unsigned long func_type;
13506 arm_stack_offsets *offsets;
13508 func_type = arm_current_func_type ();
13510 if (IS_NAKED (func_type))
13513 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13515 /* If this function was declared non-returning, and we have
13516 found a tail call, then we have to trust that the called
13517 function won't return. */
13522 /* Otherwise, trap an attempted return by aborting. */
13524 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13526 assemble_external_libcall (ops[1]);
13527 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13533 gcc_assert (!cfun->calls_alloca || really_return);
13535 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13537 cfun->machine->return_used_this_function = 1;
13539 offsets = arm_get_frame_offsets ();
13540 live_regs_mask = offsets->saved_regs_mask;
13542 if (live_regs_mask)
13544 const char * return_reg;
13546 /* If we do not have any special requirements for function exit
13547 (e.g. interworking) then we can load the return address
13548 directly into the PC. Otherwise we must load it into LR. */
13550 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13551 return_reg = reg_names[PC_REGNUM];
13553 return_reg = reg_names[LR_REGNUM];
13555 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13557 /* There are three possible reasons for the IP register
13558 being saved. 1) a stack frame was created, in which case
13559 IP contains the old stack pointer, or 2) an ISR routine
13560 corrupted it, or 3) it was saved to align the stack on
13561 iWMMXt. In case 1, restore IP into SP, otherwise just
13563 if (frame_pointer_needed)
13565 live_regs_mask &= ~ (1 << IP_REGNUM);
13566 live_regs_mask |= (1 << SP_REGNUM);
13569 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13572 /* On some ARM architectures it is faster to use LDR rather than
13573 LDM to load a single register. On other architectures, the
13574 cost is the same. In 26 bit mode, or for exception handlers,
13575 we have to use LDM to load the PC so that the CPSR is also
13577 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13578 if (live_regs_mask == (1U << reg))
13581 if (reg <= LAST_ARM_REGNUM
13582 && (reg != LR_REGNUM
13584 || ! IS_INTERRUPT (func_type)))
13586 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13587 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13594 /* Generate the load multiple instruction to restore the
13595 registers. Note we can get here, even if
13596 frame_pointer_needed is true, but only if sp already
13597 points to the base of the saved core registers. */
13598 if (live_regs_mask & (1 << SP_REGNUM))
13600 unsigned HOST_WIDE_INT stack_adjust;
13602 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13603 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13605 if (stack_adjust && arm_arch5 && TARGET_ARM)
13606 if (TARGET_UNIFIED_ASM)
13607 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13609 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13612 /* If we can't use ldmib (SA110 bug),
13613 then try to pop r3 instead. */
13615 live_regs_mask |= 1 << 3;
13617 if (TARGET_UNIFIED_ASM)
13618 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13620 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13624 if (TARGET_UNIFIED_ASM)
13625 sprintf (instr, "pop%s\t{", conditional);
13627 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13629 p = instr + strlen (instr);
13631 for (reg = 0; reg <= SP_REGNUM; reg++)
13632 if (live_regs_mask & (1 << reg))
13634 int l = strlen (reg_names[reg]);
13640 memcpy (p, ", ", 2);
13644 memcpy (p, "%|", 2);
13645 memcpy (p + 2, reg_names[reg], l);
13649 if (live_regs_mask & (1 << LR_REGNUM))
13651 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13652 /* If returning from an interrupt, restore the CPSR. */
13653 if (IS_INTERRUPT (func_type))
13660 output_asm_insn (instr, & operand);
13662 /* See if we need to generate an extra instruction to
13663 perform the actual function return. */
13665 && func_type != ARM_FT_INTERWORKED
13666 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13668 /* The return has already been handled
13669 by loading the LR into the PC. */
13676 switch ((int) ARM_FUNC_TYPE (func_type))
13680 /* ??? This is wrong for unified assembly syntax. */
13681 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13684 case ARM_FT_INTERWORKED:
13685 sprintf (instr, "bx%s\t%%|lr", conditional);
13688 case ARM_FT_EXCEPTION:
13689 /* ??? This is wrong for unified assembly syntax. */
13690 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13694 /* Use bx if it's available. */
13695 if (arm_arch5 || arm_arch4t)
13696 sprintf (instr, "bx%s\t%%|lr", conditional);
13698 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13702 output_asm_insn (instr, & operand);
13708 /* Write the function name into the code section, directly preceding
13709 the function prologue.
13711 Code will be output similar to this:
13713 .ascii "arm_poke_function_name", 0
13716 .word 0xff000000 + (t1 - t0)
13717 arm_poke_function_name
13719 stmfd sp!, {fp, ip, lr, pc}
13722 When performing a stack backtrace, code can inspect the value
13723 of 'pc' stored at 'fp' + 0. If the trace function then looks
13724 at location pc - 12 and the top 8 bits are set, then we know
13725 that there is a function name embedded immediately preceding this
13726 location and has length ((pc[-3]) & 0xff000000).
13728 We assume that pc is declared as a pointer to an unsigned long.
13730 It is of no benefit to output the function name if we are assembling
13731 a leaf function. These function types will not contain a stack
13732 backtrace structure, therefore it is not possible to determine the
13735 arm_poke_function_name (FILE *stream, const char *name)
13737 unsigned long alignlength;
13738 unsigned long length;
13741 length = strlen (name) + 1;
13742 alignlength = ROUND_UP_WORD (length);
13744 ASM_OUTPUT_ASCII (stream, name, length);
13745 ASM_OUTPUT_ALIGN (stream, 2);
13746 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13747 assemble_aligned_integer (UNITS_PER_WORD, x);
13750 /* Place some comments into the assembler stream
13751 describing the current function. */
13753 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13755 unsigned long func_type;
13759 thumb1_output_function_prologue (f, frame_size);
13763 /* Sanity check. */
13764 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13766 func_type = arm_current_func_type ();
13768 switch ((int) ARM_FUNC_TYPE (func_type))
13771 case ARM_FT_NORMAL:
13773 case ARM_FT_INTERWORKED:
13774 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13777 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13780 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13782 case ARM_FT_EXCEPTION:
13783 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13787 if (IS_NAKED (func_type))
13788 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13790 if (IS_VOLATILE (func_type))
13791 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13793 if (IS_NESTED (func_type))
13794 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13795 if (IS_STACKALIGN (func_type))
13796 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13798 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13800 crtl->args.pretend_args_size, frame_size);
13802 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13803 frame_pointer_needed,
13804 cfun->machine->uses_anonymous_args);
13806 if (cfun->machine->lr_save_eliminated)
13807 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13809 if (crtl->calls_eh_return)
13810 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13815 arm_output_epilogue (rtx sibling)
13818 unsigned long saved_regs_mask;
13819 unsigned long func_type;
13820 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13821 frame that is $fp + 4 for a non-variadic function. */
13822 int floats_offset = 0;
13824 FILE * f = asm_out_file;
13825 unsigned int lrm_count = 0;
13826 int really_return = (sibling == NULL);
13828 arm_stack_offsets *offsets;
13830 /* If we have already generated the return instruction
13831 then it is futile to generate anything else. */
13832 if (use_return_insn (FALSE, sibling) &&
13833 (cfun->machine->return_used_this_function != 0))
13836 func_type = arm_current_func_type ();
13838 if (IS_NAKED (func_type))
13839 /* Naked functions don't have epilogues. */
13842 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13846 /* A volatile function should never return. Call abort. */
13847 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13848 assemble_external_libcall (op);
13849 output_asm_insn ("bl\t%a0", &op);
13854 /* If we are throwing an exception, then we really must be doing a
13855 return, so we can't tail-call. */
13856 gcc_assert (!crtl->calls_eh_return || really_return);
13858 offsets = arm_get_frame_offsets ();
13859 saved_regs_mask = offsets->saved_regs_mask;
13862 lrm_count = bit_count (saved_regs_mask);
13864 floats_offset = offsets->saved_args;
13865 /* Compute how far away the floats will be. */
13866 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13867 if (saved_regs_mask & (1 << reg))
13868 floats_offset += 4;
13870 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13872 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13873 int vfp_offset = offsets->frame;
13875 if (TARGET_FPA_EMU2)
13877 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13878 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13880 floats_offset += 12;
13881 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13882 reg, FP_REGNUM, floats_offset - vfp_offset);
13887 start_reg = LAST_FPA_REGNUM;
13889 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13891 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13893 floats_offset += 12;
13895 /* We can't unstack more than four registers at once. */
13896 if (start_reg - reg == 3)
13898 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13899 reg, FP_REGNUM, floats_offset - vfp_offset);
13900 start_reg = reg - 1;
13905 if (reg != start_reg)
13906 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13907 reg + 1, start_reg - reg,
13908 FP_REGNUM, floats_offset - vfp_offset);
13909 start_reg = reg - 1;
13913 /* Just in case the last register checked also needs unstacking. */
13914 if (reg != start_reg)
13915 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13916 reg + 1, start_reg - reg,
13917 FP_REGNUM, floats_offset - vfp_offset);
13920 if (TARGET_HARD_FLOAT && TARGET_VFP)
13924 /* The fldmd insns do not have base+offset addressing
13925 modes, so we use IP to hold the address. */
13926 saved_size = arm_get_vfp_saved_size ();
13928 if (saved_size > 0)
13930 floats_offset += saved_size;
13931 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13932 FP_REGNUM, floats_offset - vfp_offset);
13934 start_reg = FIRST_VFP_REGNUM;
13935 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13937 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13938 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13940 if (start_reg != reg)
13941 vfp_output_fldmd (f, IP_REGNUM,
13942 (start_reg - FIRST_VFP_REGNUM) / 2,
13943 (reg - start_reg) / 2);
13944 start_reg = reg + 2;
13947 if (start_reg != reg)
13948 vfp_output_fldmd (f, IP_REGNUM,
13949 (start_reg - FIRST_VFP_REGNUM) / 2,
13950 (reg - start_reg) / 2);
13955 /* The frame pointer is guaranteed to be non-double-word aligned.
13956 This is because it is set to (old_stack_pointer - 4) and the
13957 old_stack_pointer was double word aligned. Thus the offset to
13958 the iWMMXt registers to be loaded must also be non-double-word
13959 sized, so that the resultant address *is* double-word aligned.
13960 We can ignore floats_offset since that was already included in
13961 the live_regs_mask. */
13962 lrm_count += (lrm_count % 2 ? 2 : 1);
13964 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13965 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13967 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13968 reg, FP_REGNUM, lrm_count * 4);
13973 /* saved_regs_mask should contain the IP, which at the time of stack
13974 frame generation actually contains the old stack pointer. So a
13975 quick way to unwind the stack is just pop the IP register directly
13976 into the stack pointer. */
13977 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13978 saved_regs_mask &= ~ (1 << IP_REGNUM);
13979 saved_regs_mask |= (1 << SP_REGNUM);
13981 /* There are two registers left in saved_regs_mask - LR and PC. We
13982 only need to restore the LR register (the return address), but to
13983 save time we can load it directly into the PC, unless we need a
13984 special function exit sequence, or we are not really returning. */
13986 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13987 && !crtl->calls_eh_return)
13988 /* Delete the LR from the register mask, so that the LR on
13989 the stack is loaded into the PC in the register mask. */
13990 saved_regs_mask &= ~ (1 << LR_REGNUM);
13992 saved_regs_mask &= ~ (1 << PC_REGNUM);
13994 /* We must use SP as the base register, because SP is one of the
13995 registers being restored. If an interrupt or page fault
13996 happens in the ldm instruction, the SP might or might not
13997 have been restored. That would be bad, as then SP will no
13998 longer indicate the safe area of stack, and we can get stack
13999 corruption. Using SP as the base register means that it will
14000 be reset correctly to the original value, should an interrupt
14001 occur. If the stack pointer already points at the right
14002 place, then omit the subtraction. */
14003 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14004 || cfun->calls_alloca)
14005 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14006 4 * bit_count (saved_regs_mask));
14007 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14009 if (IS_INTERRUPT (func_type))
14010 /* Interrupt handlers will have pushed the
14011 IP onto the stack, so restore it now. */
14012 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14016 /* This branch is executed for ARM mode (non-apcs frames) and
14017 Thumb-2 mode. Frame layout is essentially the same for those
14018 cases, except that in ARM mode frame pointer points to the
14019 first saved register, while in Thumb-2 mode the frame pointer points
14020 to the last saved register.
14022 It is possible to make frame pointer point to last saved
14023 register in both cases, and remove some conditionals below.
14024 That means that fp setup in prologue would be just "mov fp, sp"
14025 and sp restore in epilogue would be just "mov sp, fp", whereas
14026 now we have to use add/sub in those cases. However, the value
14027 of that would be marginal, as both mov and add/sub are 32-bit
14028 in ARM mode, and it would require extra conditionals
14029 in arm_expand_prologue to distingish ARM-apcs-frame case
14030 (where frame pointer is required to point at first register)
14031 and ARM-non-apcs-frame. Therefore, such change is postponed
14032 until real need arise. */
14033 unsigned HOST_WIDE_INT amount;
14035 /* Restore stack pointer if necessary. */
14036 if (TARGET_ARM && frame_pointer_needed)
14038 operands[0] = stack_pointer_rtx;
14039 operands[1] = hard_frame_pointer_rtx;
14041 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14042 output_add_immediate (operands);
14046 if (frame_pointer_needed)
14048 /* For Thumb-2 restore sp from the frame pointer.
14049 Operand restrictions mean we have to incrememnt FP, then copy
14051 amount = offsets->locals_base - offsets->saved_regs;
14052 operands[0] = hard_frame_pointer_rtx;
14056 unsigned long count;
14057 operands[0] = stack_pointer_rtx;
14058 amount = offsets->outgoing_args - offsets->saved_regs;
14059 /* pop call clobbered registers if it avoids a
14060 separate stack adjustment. */
14061 count = offsets->saved_regs - offsets->saved_args;
14064 && !crtl->calls_eh_return
14065 && bit_count(saved_regs_mask) * 4 == count
14066 && !IS_INTERRUPT (func_type)
14067 && !crtl->tail_call_emit)
14069 unsigned long mask;
14070 mask = (1 << (arm_size_return_regs() / 4)) - 1;
14072 mask &= ~saved_regs_mask;
14074 while (bit_count (mask) * 4 > amount)
14076 while ((mask & (1 << reg)) == 0)
14078 mask &= ~(1 << reg);
14080 if (bit_count (mask) * 4 == amount) {
14082 saved_regs_mask |= mask;
14089 operands[1] = operands[0];
14090 operands[2] = GEN_INT (amount);
14091 output_add_immediate (operands);
14093 if (frame_pointer_needed)
14094 asm_fprintf (f, "\tmov\t%r, %r\n",
14095 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14098 if (TARGET_FPA_EMU2)
14100 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14101 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14102 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14107 start_reg = FIRST_FPA_REGNUM;
14109 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14111 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14113 if (reg - start_reg == 3)
14115 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14116 start_reg, SP_REGNUM);
14117 start_reg = reg + 1;
14122 if (reg != start_reg)
14123 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14124 start_reg, reg - start_reg,
14127 start_reg = reg + 1;
14131 /* Just in case the last register checked also needs unstacking. */
14132 if (reg != start_reg)
14133 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14134 start_reg, reg - start_reg, SP_REGNUM);
14137 if (TARGET_HARD_FLOAT && TARGET_VFP)
14139 int end_reg = LAST_VFP_REGNUM + 1;
14141 /* Scan the registers in reverse order. We need to match
14142 any groupings made in the prologue and generate matching
14144 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14146 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14147 && (!df_regs_ever_live_p (reg + 1)
14148 || call_used_regs[reg + 1]))
14150 if (end_reg > reg + 2)
14151 vfp_output_fldmd (f, SP_REGNUM,
14152 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14153 (end_reg - (reg + 2)) / 2);
14157 if (end_reg > reg + 2)
14158 vfp_output_fldmd (f, SP_REGNUM, 0,
14159 (end_reg - (reg + 2)) / 2);
14163 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14164 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14165 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14167 /* If we can, restore the LR into the PC. */
14168 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14169 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14170 && !IS_STACKALIGN (func_type)
14172 && crtl->args.pretend_args_size == 0
14173 && saved_regs_mask & (1 << LR_REGNUM)
14174 && !crtl->calls_eh_return)
14176 saved_regs_mask &= ~ (1 << LR_REGNUM);
14177 saved_regs_mask |= (1 << PC_REGNUM);
14178 rfe = IS_INTERRUPT (func_type);
14183 /* Load the registers off the stack. If we only have one register
14184 to load use the LDR instruction - it is faster. For Thumb-2
14185 always use pop and the assembler will pick the best instruction.*/
14186 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14187 && !IS_INTERRUPT(func_type))
14189 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14191 else if (saved_regs_mask)
14193 if (saved_regs_mask & (1 << SP_REGNUM))
14194 /* Note - write back to the stack register is not enabled
14195 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14196 in the list of registers and if we add writeback the
14197 instruction becomes UNPREDICTABLE. */
14198 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14200 else if (TARGET_ARM)
14201 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14204 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14207 if (crtl->args.pretend_args_size)
14209 /* Unwind the pre-pushed regs. */
14210 operands[0] = operands[1] = stack_pointer_rtx;
14211 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14212 output_add_immediate (operands);
14216 /* We may have already restored PC directly from the stack. */
14217 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14220 /* Stack adjustment for exception handler. */
14221 if (crtl->calls_eh_return)
14222 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14223 ARM_EH_STACKADJ_REGNUM);
14225 /* Generate the return instruction. */
14226 switch ((int) ARM_FUNC_TYPE (func_type))
14230 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14233 case ARM_FT_EXCEPTION:
14234 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14237 case ARM_FT_INTERWORKED:
14238 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14242 if (IS_STACKALIGN (func_type))
14244 /* See comment in arm_expand_prologue. */
14245 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14247 if (arm_arch5 || arm_arch4t)
14248 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14250 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14258 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14259 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14261 arm_stack_offsets *offsets;
14267 /* Emit any call-via-reg trampolines that are needed for v4t support
14268 of call_reg and call_value_reg type insns. */
14269 for (regno = 0; regno < LR_REGNUM; regno++)
14271 rtx label = cfun->machine->call_via[regno];
14275 switch_to_section (function_section (current_function_decl));
14276 targetm.asm_out.internal_label (asm_out_file, "L",
14277 CODE_LABEL_NUMBER (label));
14278 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14282 /* ??? Probably not safe to set this here, since it assumes that a
14283 function will be emitted as assembly immediately after we generate
14284 RTL for it. This does not happen for inline functions. */
14285 cfun->machine->return_used_this_function = 0;
14287 else /* TARGET_32BIT */
14289 /* We need to take into account any stack-frame rounding. */
14290 offsets = arm_get_frame_offsets ();
14292 gcc_assert (!use_return_insn (FALSE, NULL)
14293 || (cfun->machine->return_used_this_function != 0)
14294 || offsets->saved_regs == offsets->outgoing_args
14295 || frame_pointer_needed);
14297 /* Reset the ARM-specific per-function variables. */
14298 after_arm_reorg = 0;
14302 /* Generate and emit an insn that we will recognize as a push_multi.
14303 Unfortunately, since this insn does not reflect very well the actual
14304 semantics of the operation, we need to annotate the insn for the benefit
14305 of DWARF2 frame unwind information. */
14307 emit_multi_reg_push (unsigned long mask)
14310 int num_dwarf_regs;
14314 int dwarf_par_index;
14317 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14318 if (mask & (1 << i))
14321 gcc_assert (num_regs && num_regs <= 16);
14323 /* We don't record the PC in the dwarf frame information. */
14324 num_dwarf_regs = num_regs;
14325 if (mask & (1 << PC_REGNUM))
14328 /* For the body of the insn we are going to generate an UNSPEC in
14329 parallel with several USEs. This allows the insn to be recognized
14330 by the push_multi pattern in the arm.md file.
14332 The body of the insn looks something like this:
14335 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14336 (const_int:SI <num>)))
14337 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14343 For the frame note however, we try to be more explicit and actually
14344 show each register being stored into the stack frame, plus a (single)
14345 decrement of the stack pointer. We do it this way in order to be
14346 friendly to the stack unwinding code, which only wants to see a single
14347 stack decrement per instruction. The RTL we generate for the note looks
14348 something like this:
14351 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14352 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14353 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14354 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14358 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14359 instead we'd have a parallel expression detailing all
14360 the stores to the various memory addresses so that debug
14361 information is more up-to-date. Remember however while writing
14362 this to take care of the constraints with the push instruction.
14364 Note also that this has to be taken care of for the VFP registers.
14366 For more see PR43399. */
14368 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14369 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14370 dwarf_par_index = 1;
14372 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14374 if (mask & (1 << i))
14376 reg = gen_rtx_REG (SImode, i);
14378 XVECEXP (par, 0, 0)
14379 = gen_rtx_SET (VOIDmode,
14382 gen_rtx_PRE_MODIFY (Pmode,
14385 (stack_pointer_rtx,
14388 gen_rtx_UNSPEC (BLKmode,
14389 gen_rtvec (1, reg),
14390 UNSPEC_PUSH_MULT));
14392 if (i != PC_REGNUM)
14394 tmp = gen_rtx_SET (VOIDmode,
14395 gen_frame_mem (SImode, stack_pointer_rtx),
14397 RTX_FRAME_RELATED_P (tmp) = 1;
14398 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14406 for (j = 1, i++; j < num_regs; i++)
14408 if (mask & (1 << i))
14410 reg = gen_rtx_REG (SImode, i);
14412 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14414 if (i != PC_REGNUM)
14417 = gen_rtx_SET (VOIDmode,
14420 plus_constant (stack_pointer_rtx,
14423 RTX_FRAME_RELATED_P (tmp) = 1;
14424 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14431 par = emit_insn (par);
14433 tmp = gen_rtx_SET (VOIDmode,
14435 plus_constant (stack_pointer_rtx, -4 * num_regs));
14436 RTX_FRAME_RELATED_P (tmp) = 1;
14437 XVECEXP (dwarf, 0, 0) = tmp;
14439 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14444 /* Calculate the size of the return value that is passed in registers. */
14446 arm_size_return_regs (void)
14448 enum machine_mode mode;
14450 if (crtl->return_rtx != 0)
14451 mode = GET_MODE (crtl->return_rtx);
14453 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14455 return GET_MODE_SIZE (mode);
14459 emit_sfm (int base_reg, int count)
14466 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14467 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14469 reg = gen_rtx_REG (XFmode, base_reg++);
14471 XVECEXP (par, 0, 0)
14472 = gen_rtx_SET (VOIDmode,
14475 gen_rtx_PRE_MODIFY (Pmode,
14478 (stack_pointer_rtx,
14481 gen_rtx_UNSPEC (BLKmode,
14482 gen_rtvec (1, reg),
14483 UNSPEC_PUSH_MULT));
14484 tmp = gen_rtx_SET (VOIDmode,
14485 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14486 RTX_FRAME_RELATED_P (tmp) = 1;
14487 XVECEXP (dwarf, 0, 1) = tmp;
14489 for (i = 1; i < count; i++)
14491 reg = gen_rtx_REG (XFmode, base_reg++);
14492 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14494 tmp = gen_rtx_SET (VOIDmode,
14495 gen_frame_mem (XFmode,
14496 plus_constant (stack_pointer_rtx,
14499 RTX_FRAME_RELATED_P (tmp) = 1;
14500 XVECEXP (dwarf, 0, i + 1) = tmp;
14503 tmp = gen_rtx_SET (VOIDmode,
14505 plus_constant (stack_pointer_rtx, -12 * count));
14507 RTX_FRAME_RELATED_P (tmp) = 1;
14508 XVECEXP (dwarf, 0, 0) = tmp;
14510 par = emit_insn (par);
14511 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14517 /* Return true if the current function needs to save/restore LR. */
14520 thumb_force_lr_save (void)
14522 return !cfun->machine->lr_save_eliminated
14523 && (!leaf_function_p ()
14524 || thumb_far_jump_used_p ()
14525 || df_regs_ever_live_p (LR_REGNUM));
14529 /* Compute the distance from register FROM to register TO.
14530 These can be the arg pointer (26), the soft frame pointer (25),
14531 the stack pointer (13) or the hard frame pointer (11).
14532 In thumb mode r7 is used as the soft frame pointer, if needed.
14533 Typical stack layout looks like this:
14535 old stack pointer -> | |
14538 | | saved arguments for
14539 | | vararg functions
14542 hard FP & arg pointer -> | | \
14550 soft frame pointer -> | | /
14555 locals base pointer -> | | /
14560 current stack pointer -> | | /
14563 For a given function some or all of these stack components
14564 may not be needed, giving rise to the possibility of
14565 eliminating some of the registers.
14567 The values returned by this function must reflect the behavior
14568 of arm_expand_prologue() and arm_compute_save_reg_mask().
14570 The sign of the number returned reflects the direction of stack
14571 growth, so the values are positive for all eliminations except
14572 from the soft frame pointer to the hard frame pointer.
14574 SFP may point just inside the local variables block to ensure correct
14578 /* Calculate stack offsets. These are used to calculate register elimination
14579 offsets and in prologue/epilogue code. Also calculates which registers
14580 should be saved. */
14582 static arm_stack_offsets *
14583 arm_get_frame_offsets (void)
14585 struct arm_stack_offsets *offsets;
14586 unsigned long func_type;
14590 HOST_WIDE_INT frame_size;
14593 offsets = &cfun->machine->stack_offsets;
14595 /* We need to know if we are a leaf function. Unfortunately, it
14596 is possible to be called after start_sequence has been called,
14597 which causes get_insns to return the insns for the sequence,
14598 not the function, which will cause leaf_function_p to return
14599 the incorrect result.
14601 to know about leaf functions once reload has completed, and the
14602 frame size cannot be changed after that time, so we can safely
14603 use the cached value. */
14605 if (reload_completed)
14608 /* Initially this is the size of the local variables. It will translated
14609 into an offset once we have determined the size of preceding data. */
14610 frame_size = ROUND_UP_WORD (get_frame_size ());
14612 leaf = leaf_function_p ();
14614 /* Space for variadic functions. */
14615 offsets->saved_args = crtl->args.pretend_args_size;
14617 /* In Thumb mode this is incorrect, but never used. */
14618 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14619 arm_compute_static_chain_stack_bytes();
14623 unsigned int regno;
14625 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14626 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14627 saved = core_saved;
14629 /* We know that SP will be doubleword aligned on entry, and we must
14630 preserve that condition at any subroutine call. We also require the
14631 soft frame pointer to be doubleword aligned. */
14633 if (TARGET_REALLY_IWMMXT)
14635 /* Check for the call-saved iWMMXt registers. */
14636 for (regno = FIRST_IWMMXT_REGNUM;
14637 regno <= LAST_IWMMXT_REGNUM;
14639 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14643 func_type = arm_current_func_type ();
14644 if (! IS_VOLATILE (func_type))
14646 /* Space for saved FPA registers. */
14647 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14648 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14651 /* Space for saved VFP registers. */
14652 if (TARGET_HARD_FLOAT && TARGET_VFP)
14653 saved += arm_get_vfp_saved_size ();
14656 else /* TARGET_THUMB1 */
14658 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14659 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14660 saved = core_saved;
14661 if (TARGET_BACKTRACE)
14665 /* Saved registers include the stack frame. */
14666 offsets->saved_regs = offsets->saved_args + saved +
14667 arm_compute_static_chain_stack_bytes();
14668 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14669 /* A leaf function does not need any stack alignment if it has nothing
14671 if (leaf && frame_size == 0)
14673 offsets->outgoing_args = offsets->soft_frame;
14674 offsets->locals_base = offsets->soft_frame;
14678 /* Ensure SFP has the correct alignment. */
14679 if (ARM_DOUBLEWORD_ALIGN
14680 && (offsets->soft_frame & 7))
14682 offsets->soft_frame += 4;
14683 /* Try to align stack by pushing an extra reg. Don't bother doing this
14684 when there is a stack frame as the alignment will be rolled into
14685 the normal stack adjustment. */
14686 if (frame_size + crtl->outgoing_args_size == 0)
14690 /* If it is safe to use r3, then do so. This sometimes
14691 generates better code on Thumb-2 by avoiding the need to
14692 use 32-bit push/pop instructions. */
14693 if (!crtl->tail_call_emit
14694 && arm_size_return_regs () <= 12
14695 && (offsets->saved_regs_mask & (1 << 3)) == 0)
14700 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14702 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14711 offsets->saved_regs += 4;
14712 offsets->saved_regs_mask |= (1 << reg);
14717 offsets->locals_base = offsets->soft_frame + frame_size;
14718 offsets->outgoing_args = (offsets->locals_base
14719 + crtl->outgoing_args_size);
14721 if (ARM_DOUBLEWORD_ALIGN)
14723 /* Ensure SP remains doubleword aligned. */
14724 if (offsets->outgoing_args & 7)
14725 offsets->outgoing_args += 4;
14726 gcc_assert (!(offsets->outgoing_args & 7));
14733 /* Calculate the relative offsets for the different stack pointers. Positive
14734 offsets are in the direction of stack growth. */
14737 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14739 arm_stack_offsets *offsets;
14741 offsets = arm_get_frame_offsets ();
14743 /* OK, now we have enough information to compute the distances.
14744 There must be an entry in these switch tables for each pair
14745 of registers in ELIMINABLE_REGS, even if some of the entries
14746 seem to be redundant or useless. */
14749 case ARG_POINTER_REGNUM:
14752 case THUMB_HARD_FRAME_POINTER_REGNUM:
14755 case FRAME_POINTER_REGNUM:
14756 /* This is the reverse of the soft frame pointer
14757 to hard frame pointer elimination below. */
14758 return offsets->soft_frame - offsets->saved_args;
14760 case ARM_HARD_FRAME_POINTER_REGNUM:
14761 /* This is only non-zero in the case where the static chain register
14762 is stored above the frame. */
14763 return offsets->frame - offsets->saved_args - 4;
14765 case STACK_POINTER_REGNUM:
14766 /* If nothing has been pushed on the stack at all
14767 then this will return -4. This *is* correct! */
14768 return offsets->outgoing_args - (offsets->saved_args + 4);
14771 gcc_unreachable ();
14773 gcc_unreachable ();
14775 case FRAME_POINTER_REGNUM:
14778 case THUMB_HARD_FRAME_POINTER_REGNUM:
14781 case ARM_HARD_FRAME_POINTER_REGNUM:
14782 /* The hard frame pointer points to the top entry in the
14783 stack frame. The soft frame pointer to the bottom entry
14784 in the stack frame. If there is no stack frame at all,
14785 then they are identical. */
14787 return offsets->frame - offsets->soft_frame;
14789 case STACK_POINTER_REGNUM:
14790 return offsets->outgoing_args - offsets->soft_frame;
14793 gcc_unreachable ();
14795 gcc_unreachable ();
14798 /* You cannot eliminate from the stack pointer.
14799 In theory you could eliminate from the hard frame
14800 pointer to the stack pointer, but this will never
14801 happen, since if a stack frame is not needed the
14802 hard frame pointer will never be used. */
14803 gcc_unreachable ();
14807 /* Given FROM and TO register numbers, say whether this elimination is
14808 allowed. Frame pointer elimination is automatically handled.
14810 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14811 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14812 pointer, we must eliminate FRAME_POINTER_REGNUM into
14813 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14814 ARG_POINTER_REGNUM. */
14817 arm_can_eliminate (const int from, const int to)
14819 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14820 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14821 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14822 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14826 /* Emit RTL to save coprocessor registers on function entry. Returns the
14827 number of bytes pushed. */
14830 arm_save_coproc_regs(void)
14832 int saved_size = 0;
14834 unsigned start_reg;
14837 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14838 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14840 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14841 insn = gen_rtx_MEM (V2SImode, insn);
14842 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14843 RTX_FRAME_RELATED_P (insn) = 1;
14847 /* Save any floating point call-saved registers used by this
14849 if (TARGET_FPA_EMU2)
14851 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14852 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14854 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14855 insn = gen_rtx_MEM (XFmode, insn);
14856 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14857 RTX_FRAME_RELATED_P (insn) = 1;
14863 start_reg = LAST_FPA_REGNUM;
14865 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14867 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14869 if (start_reg - reg == 3)
14871 insn = emit_sfm (reg, 4);
14872 RTX_FRAME_RELATED_P (insn) = 1;
14874 start_reg = reg - 1;
14879 if (start_reg != reg)
14881 insn = emit_sfm (reg + 1, start_reg - reg);
14882 RTX_FRAME_RELATED_P (insn) = 1;
14883 saved_size += (start_reg - reg) * 12;
14885 start_reg = reg - 1;
14889 if (start_reg != reg)
14891 insn = emit_sfm (reg + 1, start_reg - reg);
14892 saved_size += (start_reg - reg) * 12;
14893 RTX_FRAME_RELATED_P (insn) = 1;
14896 if (TARGET_HARD_FLOAT && TARGET_VFP)
14898 start_reg = FIRST_VFP_REGNUM;
14900 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14902 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14903 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14905 if (start_reg != reg)
14906 saved_size += vfp_emit_fstmd (start_reg,
14907 (reg - start_reg) / 2);
14908 start_reg = reg + 2;
14911 if (start_reg != reg)
14912 saved_size += vfp_emit_fstmd (start_reg,
14913 (reg - start_reg) / 2);
14919 /* Set the Thumb frame pointer from the stack pointer. */
14922 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14924 HOST_WIDE_INT amount;
14927 amount = offsets->outgoing_args - offsets->locals_base;
14929 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14930 stack_pointer_rtx, GEN_INT (amount)));
14933 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14934 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14935 expects the first two operands to be the same. */
14938 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14940 hard_frame_pointer_rtx));
14944 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14945 hard_frame_pointer_rtx,
14946 stack_pointer_rtx));
14948 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14949 plus_constant (stack_pointer_rtx, amount));
14950 RTX_FRAME_RELATED_P (dwarf) = 1;
14951 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14954 RTX_FRAME_RELATED_P (insn) = 1;
14957 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14960 arm_expand_prologue (void)
14965 unsigned long live_regs_mask;
14966 unsigned long func_type;
14968 int saved_pretend_args = 0;
14969 int saved_regs = 0;
14970 unsigned HOST_WIDE_INT args_to_push;
14971 arm_stack_offsets *offsets;
14973 func_type = arm_current_func_type ();
14975 /* Naked functions don't have prologues. */
14976 if (IS_NAKED (func_type))
14979 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14980 args_to_push = crtl->args.pretend_args_size;
14982 /* Compute which register we will have to save onto the stack. */
14983 offsets = arm_get_frame_offsets ();
14984 live_regs_mask = offsets->saved_regs_mask;
14986 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14988 if (IS_STACKALIGN (func_type))
14993 /* Handle a word-aligned stack pointer. We generate the following:
14998 <save and restore r0 in normal prologue/epilogue>
15002 The unwinder doesn't need to know about the stack realignment.
15003 Just tell it we saved SP in r0. */
15004 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15006 r0 = gen_rtx_REG (SImode, 0);
15007 r1 = gen_rtx_REG (SImode, 1);
15008 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15009 compiler won't choke. */
15010 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15011 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15012 insn = gen_movsi (r0, stack_pointer_rtx);
15013 RTX_FRAME_RELATED_P (insn) = 1;
15014 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15016 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15017 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15020 /* For APCS frames, if IP register is clobbered
15021 when creating frame, save that register in a special
15023 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15025 if (IS_INTERRUPT (func_type))
15027 /* Interrupt functions must not corrupt any registers.
15028 Creating a frame pointer however, corrupts the IP
15029 register, so we must push it first. */
15030 insn = emit_multi_reg_push (1 << IP_REGNUM);
15032 /* Do not set RTX_FRAME_RELATED_P on this insn.
15033 The dwarf stack unwinding code only wants to see one
15034 stack decrement per function, and this is not it. If
15035 this instruction is labeled as being part of the frame
15036 creation sequence then dwarf2out_frame_debug_expr will
15037 die when it encounters the assignment of IP to FP
15038 later on, since the use of SP here establishes SP as
15039 the CFA register and not IP.
15041 Anyway this instruction is not really part of the stack
15042 frame creation although it is part of the prologue. */
15044 else if (IS_NESTED (func_type))
15046 /* The Static chain register is the same as the IP register
15047 used as a scratch register during stack frame creation.
15048 To get around this need to find somewhere to store IP
15049 whilst the frame is being created. We try the following
15052 1. The last argument register.
15053 2. A slot on the stack above the frame. (This only
15054 works if the function is not a varargs function).
15055 3. Register r3, after pushing the argument registers
15058 Note - we only need to tell the dwarf2 backend about the SP
15059 adjustment in the second variant; the static chain register
15060 doesn't need to be unwound, as it doesn't contain a value
15061 inherited from the caller. */
15063 if (df_regs_ever_live_p (3) == false)
15064 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15065 else if (args_to_push == 0)
15069 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15072 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15073 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15076 /* Just tell the dwarf backend that we adjusted SP. */
15077 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15078 plus_constant (stack_pointer_rtx,
15080 RTX_FRAME_RELATED_P (insn) = 1;
15081 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15085 /* Store the args on the stack. */
15086 if (cfun->machine->uses_anonymous_args)
15087 insn = emit_multi_reg_push
15088 ((0xf0 >> (args_to_push / 4)) & 0xf);
15091 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15092 GEN_INT (- args_to_push)));
15094 RTX_FRAME_RELATED_P (insn) = 1;
15096 saved_pretend_args = 1;
15097 fp_offset = args_to_push;
15100 /* Now reuse r3 to preserve IP. */
15101 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15105 insn = emit_set_insn (ip_rtx,
15106 plus_constant (stack_pointer_rtx, fp_offset));
15107 RTX_FRAME_RELATED_P (insn) = 1;
15112 /* Push the argument registers, or reserve space for them. */
15113 if (cfun->machine->uses_anonymous_args)
15114 insn = emit_multi_reg_push
15115 ((0xf0 >> (args_to_push / 4)) & 0xf);
15118 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15119 GEN_INT (- args_to_push)));
15120 RTX_FRAME_RELATED_P (insn) = 1;
15123 /* If this is an interrupt service routine, and the link register
15124 is going to be pushed, and we're not generating extra
15125 push of IP (needed when frame is needed and frame layout if apcs),
15126 subtracting four from LR now will mean that the function return
15127 can be done with a single instruction. */
15128 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15129 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15130 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15133 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15135 emit_set_insn (lr, plus_constant (lr, -4));
15138 if (live_regs_mask)
15140 saved_regs += bit_count (live_regs_mask) * 4;
15141 if (optimize_size && !frame_pointer_needed
15142 && saved_regs == offsets->saved_regs - offsets->saved_args)
15144 /* If no coprocessor registers are being pushed and we don't have
15145 to worry about a frame pointer then push extra registers to
15146 create the stack frame. This is done is a way that does not
15147 alter the frame layout, so is independent of the epilogue. */
15151 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15153 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15154 if (frame && n * 4 >= frame)
15157 live_regs_mask |= (1 << n) - 1;
15158 saved_regs += frame;
15161 insn = emit_multi_reg_push (live_regs_mask);
15162 RTX_FRAME_RELATED_P (insn) = 1;
15165 if (! IS_VOLATILE (func_type))
15166 saved_regs += arm_save_coproc_regs ();
15168 if (frame_pointer_needed && TARGET_ARM)
15170 /* Create the new frame pointer. */
15171 if (TARGET_APCS_FRAME)
15173 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15174 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15175 RTX_FRAME_RELATED_P (insn) = 1;
15177 if (IS_NESTED (func_type))
15179 /* Recover the static chain register. */
15180 if (!df_regs_ever_live_p (3)
15181 || saved_pretend_args)
15182 insn = gen_rtx_REG (SImode, 3);
15183 else /* if (crtl->args.pretend_args_size == 0) */
15185 insn = plus_constant (hard_frame_pointer_rtx, 4);
15186 insn = gen_frame_mem (SImode, insn);
15188 emit_set_insn (ip_rtx, insn);
15189 /* Add a USE to stop propagate_one_insn() from barfing. */
15190 emit_insn (gen_prologue_use (ip_rtx));
15195 insn = GEN_INT (saved_regs - 4);
15196 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15197 stack_pointer_rtx, insn));
15198 RTX_FRAME_RELATED_P (insn) = 1;
15202 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15204 /* This add can produce multiple insns for a large constant, so we
15205 need to get tricky. */
15206 rtx last = get_last_insn ();
15208 amount = GEN_INT (offsets->saved_args + saved_regs
15209 - offsets->outgoing_args);
15211 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15215 last = last ? NEXT_INSN (last) : get_insns ();
15216 RTX_FRAME_RELATED_P (last) = 1;
15218 while (last != insn);
15220 /* If the frame pointer is needed, emit a special barrier that
15221 will prevent the scheduler from moving stores to the frame
15222 before the stack adjustment. */
15223 if (frame_pointer_needed)
15224 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15225 hard_frame_pointer_rtx));
15229 if (frame_pointer_needed && TARGET_THUMB2)
15230 thumb_set_frame_pointer (offsets);
15232 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15234 unsigned long mask;
15236 mask = live_regs_mask;
15237 mask &= THUMB2_WORK_REGS;
15238 if (!IS_NESTED (func_type))
15239 mask |= (1 << IP_REGNUM);
15240 arm_load_pic_register (mask);
15243 /* If we are profiling, make sure no instructions are scheduled before
15244 the call to mcount. Similarly if the user has requested no
15245 scheduling in the prolog. Similarly if we want non-call exceptions
15246 using the EABI unwinder, to prevent faulting instructions from being
15247 swapped with a stack adjustment. */
15248 if (crtl->profile || !TARGET_SCHED_PROLOG
15249 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15250 emit_insn (gen_blockage ());
15252 /* If the link register is being kept alive, with the return address in it,
15253 then make sure that it does not get reused by the ce2 pass. */
15254 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15255 cfun->machine->lr_save_eliminated = 1;
15258 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15260 arm_print_condition (FILE *stream)
15262 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15264 /* Branch conversion is not implemented for Thumb-2. */
15267 output_operand_lossage ("predicated Thumb instruction");
15270 if (current_insn_predicate != NULL)
15272 output_operand_lossage
15273 ("predicated instruction in conditional sequence");
15277 fputs (arm_condition_codes[arm_current_cc], stream);
15279 else if (current_insn_predicate)
15281 enum arm_cond_code code;
15285 output_operand_lossage ("predicated Thumb instruction");
15289 code = get_arm_condition_code (current_insn_predicate);
15290 fputs (arm_condition_codes[code], stream);
15295 /* If CODE is 'd', then the X is a condition operand and the instruction
15296 should only be executed if the condition is true.
15297 if CODE is 'D', then the X is a condition operand and the instruction
15298 should only be executed if the condition is false: however, if the mode
15299 of the comparison is CCFPEmode, then always execute the instruction -- we
15300 do this because in these circumstances !GE does not necessarily imply LT;
15301 in these cases the instruction pattern will take care to make sure that
15302 an instruction containing %d will follow, thereby undoing the effects of
15303 doing this instruction unconditionally.
15304 If CODE is 'N' then X is a floating point operand that must be negated
15306 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15307 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15309 arm_print_operand (FILE *stream, rtx x, int code)
15314 fputs (ASM_COMMENT_START, stream);
15318 fputs (user_label_prefix, stream);
15322 fputs (REGISTER_PREFIX, stream);
15326 arm_print_condition (stream);
15330 /* Nothing in unified syntax, otherwise the current condition code. */
15331 if (!TARGET_UNIFIED_ASM)
15332 arm_print_condition (stream);
15336 /* The current condition code in unified syntax, otherwise nothing. */
15337 if (TARGET_UNIFIED_ASM)
15338 arm_print_condition (stream);
15342 /* The current condition code for a condition code setting instruction.
15343 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15344 if (TARGET_UNIFIED_ASM)
15346 fputc('s', stream);
15347 arm_print_condition (stream);
15351 arm_print_condition (stream);
15352 fputc('s', stream);
15357 /* If the instruction is conditionally executed then print
15358 the current condition code, otherwise print 's'. */
15359 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15360 if (current_insn_predicate)
15361 arm_print_condition (stream);
15363 fputc('s', stream);
15366 /* %# is a "break" sequence. It doesn't output anything, but is used to
15367 separate e.g. operand numbers from following text, if that text consists
15368 of further digits which we don't want to be part of the operand
15376 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15377 r = real_value_negate (&r);
15378 fprintf (stream, "%s", fp_const_from_val (&r));
15382 /* An integer or symbol address without a preceding # sign. */
15384 switch (GET_CODE (x))
15387 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15391 output_addr_const (stream, x);
15395 gcc_unreachable ();
15400 if (GET_CODE (x) == CONST_INT)
15403 val = ARM_SIGN_EXTEND (~INTVAL (x));
15404 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15408 putc ('~', stream);
15409 output_addr_const (stream, x);
15414 /* The low 16 bits of an immediate constant. */
15415 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15419 fprintf (stream, "%s", arithmetic_instr (x, 1));
15422 /* Truncate Cirrus shift counts. */
15424 if (GET_CODE (x) == CONST_INT)
15426 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15429 arm_print_operand (stream, x, 0);
15433 fprintf (stream, "%s", arithmetic_instr (x, 0));
15441 if (!shift_operator (x, SImode))
15443 output_operand_lossage ("invalid shift operand");
15447 shift = shift_op (x, &val);
15451 fprintf (stream, ", %s ", shift);
15453 arm_print_operand (stream, XEXP (x, 1), 0);
15455 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15460 /* An explanation of the 'Q', 'R' and 'H' register operands:
15462 In a pair of registers containing a DI or DF value the 'Q'
15463 operand returns the register number of the register containing
15464 the least significant part of the value. The 'R' operand returns
15465 the register number of the register containing the most
15466 significant part of the value.
15468 The 'H' operand returns the higher of the two register numbers.
15469 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15470 same as the 'Q' operand, since the most significant part of the
15471 value is held in the lower number register. The reverse is true
15472 on systems where WORDS_BIG_ENDIAN is false.
15474 The purpose of these operands is to distinguish between cases
15475 where the endian-ness of the values is important (for example
15476 when they are added together), and cases where the endian-ness
15477 is irrelevant, but the order of register operations is important.
15478 For example when loading a value from memory into a register
15479 pair, the endian-ness does not matter. Provided that the value
15480 from the lower memory address is put into the lower numbered
15481 register, and the value from the higher address is put into the
15482 higher numbered register, the load will work regardless of whether
15483 the value being loaded is big-wordian or little-wordian. The
15484 order of the two register loads can matter however, if the address
15485 of the memory location is actually held in one of the registers
15486 being overwritten by the load.
15488 The 'Q' and 'R' constraints are also available for 64-bit
15491 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15493 rtx part = gen_lowpart (SImode, x);
15494 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15498 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15500 output_operand_lossage ("invalid operand for code '%c'", code);
15504 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15508 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15510 enum machine_mode mode = GET_MODE (x);
15513 if (mode == VOIDmode)
15515 part = gen_highpart_mode (SImode, mode, x);
15516 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15520 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15522 output_operand_lossage ("invalid operand for code '%c'", code);
15526 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15530 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15532 output_operand_lossage ("invalid operand for code '%c'", code);
15536 asm_fprintf (stream, "%r", REGNO (x) + 1);
15540 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15542 output_operand_lossage ("invalid operand for code '%c'", code);
15546 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15550 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15552 output_operand_lossage ("invalid operand for code '%c'", code);
15556 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15560 asm_fprintf (stream, "%r",
15561 GET_CODE (XEXP (x, 0)) == REG
15562 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15566 asm_fprintf (stream, "{%r-%r}",
15568 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15571 /* Like 'M', but writing doubleword vector registers, for use by Neon
15575 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15576 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15578 asm_fprintf (stream, "{d%d}", regno);
15580 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15585 /* CONST_TRUE_RTX means always -- that's the default. */
15586 if (x == const_true_rtx)
15589 if (!COMPARISON_P (x))
15591 output_operand_lossage ("invalid operand for code '%c'", code);
15595 fputs (arm_condition_codes[get_arm_condition_code (x)],
15600 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15601 want to do that. */
15602 if (x == const_true_rtx)
15604 output_operand_lossage ("instruction never executed");
15607 if (!COMPARISON_P (x))
15609 output_operand_lossage ("invalid operand for code '%c'", code);
15613 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15614 (get_arm_condition_code (x))],
15618 /* Cirrus registers can be accessed in a variety of ways:
15619 single floating point (f)
15620 double floating point (d)
15622 64bit integer (dx). */
15623 case 'W': /* Cirrus register in F mode. */
15624 case 'X': /* Cirrus register in D mode. */
15625 case 'Y': /* Cirrus register in FX mode. */
15626 case 'Z': /* Cirrus register in DX mode. */
15627 gcc_assert (GET_CODE (x) == REG
15628 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15630 fprintf (stream, "mv%s%s",
15632 : code == 'X' ? "d"
15633 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15637 /* Print cirrus register in the mode specified by the register's mode. */
15640 int mode = GET_MODE (x);
15642 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15644 output_operand_lossage ("invalid operand for code '%c'", code);
15648 fprintf (stream, "mv%s%s",
15649 mode == DFmode ? "d"
15650 : mode == SImode ? "fx"
15651 : mode == DImode ? "dx"
15652 : "f", reg_names[REGNO (x)] + 2);
15658 if (GET_CODE (x) != REG
15659 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15660 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15661 /* Bad value for wCG register number. */
15663 output_operand_lossage ("invalid operand for code '%c'", code);
15668 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15671 /* Print an iWMMXt control register name. */
15673 if (GET_CODE (x) != CONST_INT
15675 || INTVAL (x) >= 16)
15676 /* Bad value for wC register number. */
15678 output_operand_lossage ("invalid operand for code '%c'", code);
15684 static const char * wc_reg_names [16] =
15686 "wCID", "wCon", "wCSSF", "wCASF",
15687 "wC4", "wC5", "wC6", "wC7",
15688 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15689 "wC12", "wC13", "wC14", "wC15"
15692 fprintf (stream, wc_reg_names [INTVAL (x)]);
15696 /* Print the high single-precision register of a VFP double-precision
15700 int mode = GET_MODE (x);
15703 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15705 output_operand_lossage ("invalid operand for code '%c'", code);
15710 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15712 output_operand_lossage ("invalid operand for code '%c'", code);
15716 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15720 /* Print a VFP/Neon double precision or quad precision register name. */
15724 int mode = GET_MODE (x);
15725 int is_quad = (code == 'q');
15728 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15730 output_operand_lossage ("invalid operand for code '%c'", code);
15734 if (GET_CODE (x) != REG
15735 || !IS_VFP_REGNUM (REGNO (x)))
15737 output_operand_lossage ("invalid operand for code '%c'", code);
15742 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15743 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15745 output_operand_lossage ("invalid operand for code '%c'", code);
15749 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15750 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15754 /* These two codes print the low/high doubleword register of a Neon quad
15755 register, respectively. For pair-structure types, can also print
15756 low/high quadword registers. */
15760 int mode = GET_MODE (x);
15763 if ((GET_MODE_SIZE (mode) != 16
15764 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15766 output_operand_lossage ("invalid operand for code '%c'", code);
15771 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15773 output_operand_lossage ("invalid operand for code '%c'", code);
15777 if (GET_MODE_SIZE (mode) == 16)
15778 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15779 + (code == 'f' ? 1 : 0));
15781 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15782 + (code == 'f' ? 1 : 0));
15786 /* Print a VFPv3 floating-point constant, represented as an integer
15790 int index = vfp3_const_double_index (x);
15791 gcc_assert (index != -1);
15792 fprintf (stream, "%d", index);
15796 /* Print bits representing opcode features for Neon.
15798 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15799 and polynomials as unsigned.
15801 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15803 Bit 2 is 1 for rounding functions, 0 otherwise. */
15805 /* Identify the type as 's', 'u', 'p' or 'f'. */
15808 HOST_WIDE_INT bits = INTVAL (x);
15809 fputc ("uspf"[bits & 3], stream);
15813 /* Likewise, but signed and unsigned integers are both 'i'. */
15816 HOST_WIDE_INT bits = INTVAL (x);
15817 fputc ("iipf"[bits & 3], stream);
15821 /* As for 'T', but emit 'u' instead of 'p'. */
15824 HOST_WIDE_INT bits = INTVAL (x);
15825 fputc ("usuf"[bits & 3], stream);
15829 /* Bit 2: rounding (vs none). */
15832 HOST_WIDE_INT bits = INTVAL (x);
15833 fputs ((bits & 4) != 0 ? "r" : "", stream);
15837 /* Memory operand for vld1/vst1 instruction. */
15841 bool postinc = FALSE;
15842 gcc_assert (GET_CODE (x) == MEM);
15843 addr = XEXP (x, 0);
15844 if (GET_CODE (addr) == POST_INC)
15847 addr = XEXP (addr, 0);
15849 asm_fprintf (stream, "[%r]", REGNO (addr));
15851 fputs("!", stream);
15855 /* Translate an S register number into a D register number and element index. */
15858 int mode = GET_MODE (x);
15861 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15863 output_operand_lossage ("invalid operand for code '%c'", code);
15868 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15870 output_operand_lossage ("invalid operand for code '%c'", code);
15874 regno = regno - FIRST_VFP_REGNUM;
15875 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15879 /* Register specifier for vld1.16/vst1.16. Translate the S register
15880 number into a D register number and element index. */
15883 int mode = GET_MODE (x);
15886 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15888 output_operand_lossage ("invalid operand for code '%c'", code);
15893 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15895 output_operand_lossage ("invalid operand for code '%c'", code);
15899 regno = regno - FIRST_VFP_REGNUM;
15900 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15907 output_operand_lossage ("missing operand");
15911 switch (GET_CODE (x))
15914 asm_fprintf (stream, "%r", REGNO (x));
15918 output_memory_reference_mode = GET_MODE (x);
15919 output_address (XEXP (x, 0));
15926 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15927 sizeof (fpstr), 0, 1);
15928 fprintf (stream, "#%s", fpstr);
15931 fprintf (stream, "#%s", fp_immediate_constant (x));
15935 gcc_assert (GET_CODE (x) != NEG);
15936 fputc ('#', stream);
15937 if (GET_CODE (x) == HIGH)
15939 fputs (":lower16:", stream);
15943 output_addr_const (stream, x);
15949 /* Target hook for printing a memory address. */
15951 arm_print_operand_address (FILE *stream, rtx x)
15955 int is_minus = GET_CODE (x) == MINUS;
15957 if (GET_CODE (x) == REG)
15958 asm_fprintf (stream, "[%r, #0]", REGNO (x));
15959 else if (GET_CODE (x) == PLUS || is_minus)
15961 rtx base = XEXP (x, 0);
15962 rtx index = XEXP (x, 1);
15963 HOST_WIDE_INT offset = 0;
15964 if (GET_CODE (base) != REG
15965 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
15967 /* Ensure that BASE is a register. */
15968 /* (one of them must be). */
15969 /* Also ensure the SP is not used as in index register. */
15974 switch (GET_CODE (index))
15977 offset = INTVAL (index);
15980 asm_fprintf (stream, "[%r, #%wd]",
15981 REGNO (base), offset);
15985 asm_fprintf (stream, "[%r, %s%r]",
15986 REGNO (base), is_minus ? "-" : "",
15996 asm_fprintf (stream, "[%r, %s%r",
15997 REGNO (base), is_minus ? "-" : "",
15998 REGNO (XEXP (index, 0)));
15999 arm_print_operand (stream, index, 'S');
16000 fputs ("]", stream);
16005 gcc_unreachable ();
16008 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16009 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16011 extern enum machine_mode output_memory_reference_mode;
16013 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16015 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16016 asm_fprintf (stream, "[%r, #%s%d]!",
16017 REGNO (XEXP (x, 0)),
16018 GET_CODE (x) == PRE_DEC ? "-" : "",
16019 GET_MODE_SIZE (output_memory_reference_mode));
16021 asm_fprintf (stream, "[%r], #%s%d",
16022 REGNO (XEXP (x, 0)),
16023 GET_CODE (x) == POST_DEC ? "-" : "",
16024 GET_MODE_SIZE (output_memory_reference_mode));
16026 else if (GET_CODE (x) == PRE_MODIFY)
16028 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16029 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16030 asm_fprintf (stream, "#%wd]!",
16031 INTVAL (XEXP (XEXP (x, 1), 1)));
16033 asm_fprintf (stream, "%r]!",
16034 REGNO (XEXP (XEXP (x, 1), 1)));
16036 else if (GET_CODE (x) == POST_MODIFY)
16038 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16039 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16040 asm_fprintf (stream, "#%wd",
16041 INTVAL (XEXP (XEXP (x, 1), 1)));
16043 asm_fprintf (stream, "%r",
16044 REGNO (XEXP (XEXP (x, 1), 1)));
16046 else output_addr_const (stream, x);
16050 if (GET_CODE (x) == REG)
16051 asm_fprintf (stream, "[%r]", REGNO (x));
16052 else if (GET_CODE (x) == POST_INC)
16053 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16054 else if (GET_CODE (x) == PLUS)
16056 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16057 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16058 asm_fprintf (stream, "[%r, #%wd]",
16059 REGNO (XEXP (x, 0)),
16060 INTVAL (XEXP (x, 1)));
16062 asm_fprintf (stream, "[%r, %r]",
16063 REGNO (XEXP (x, 0)),
16064 REGNO (XEXP (x, 1)));
16067 output_addr_const (stream, x);
16071 /* Target hook for indicating whether a punctuation character for
16072 TARGET_PRINT_OPERAND is valid. */
16074 arm_print_operand_punct_valid_p (unsigned char code)
16076 return (code == '@' || code == '|' || code == '.'
16077 || code == '(' || code == ')' || code == '#'
16078 || (TARGET_32BIT && (code == '?'))
16079 || (TARGET_THUMB2 && (code == '!'))
16080 || (TARGET_THUMB && (code == '_')));
16083 /* Target hook for assembling integer objects. The ARM version needs to
16084 handle word-sized values specially. */
16086 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16088 enum machine_mode mode;
16090 if (size == UNITS_PER_WORD && aligned_p)
16092 fputs ("\t.word\t", asm_out_file);
16093 output_addr_const (asm_out_file, x);
16095 /* Mark symbols as position independent. We only do this in the
16096 .text segment, not in the .data segment. */
16097 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16098 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16100 /* See legitimize_pic_address for an explanation of the
16101 TARGET_VXWORKS_RTP check. */
16102 if (TARGET_VXWORKS_RTP
16103 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16104 fputs ("(GOT)", asm_out_file);
16106 fputs ("(GOTOFF)", asm_out_file);
16108 fputc ('\n', asm_out_file);
16112 mode = GET_MODE (x);
16114 if (arm_vector_mode_supported_p (mode))
16118 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16120 units = CONST_VECTOR_NUNITS (x);
16121 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16123 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16124 for (i = 0; i < units; i++)
16126 rtx elt = CONST_VECTOR_ELT (x, i);
16128 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16131 for (i = 0; i < units; i++)
16133 rtx elt = CONST_VECTOR_ELT (x, i);
16134 REAL_VALUE_TYPE rval;
16136 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16139 (rval, GET_MODE_INNER (mode),
16140 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16146 return default_assemble_integer (x, size, aligned_p);
16150 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16154 if (!TARGET_AAPCS_BASED)
16157 default_named_section_asm_out_constructor
16158 : default_named_section_asm_out_destructor) (symbol, priority);
16162 /* Put these in the .init_array section, using a special relocation. */
16163 if (priority != DEFAULT_INIT_PRIORITY)
16166 sprintf (buf, "%s.%.5u",
16167 is_ctor ? ".init_array" : ".fini_array",
16169 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16176 switch_to_section (s);
16177 assemble_align (POINTER_SIZE);
16178 fputs ("\t.word\t", asm_out_file);
16179 output_addr_const (asm_out_file, symbol);
16180 fputs ("(target1)\n", asm_out_file);
16183 /* Add a function to the list of static constructors. */
16186 arm_elf_asm_constructor (rtx symbol, int priority)
16188 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16191 /* Add a function to the list of static destructors. */
16194 arm_elf_asm_destructor (rtx symbol, int priority)
16196 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16199 /* A finite state machine takes care of noticing whether or not instructions
16200 can be conditionally executed, and thus decrease execution time and code
16201 size by deleting branch instructions. The fsm is controlled by
16202 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16204 /* The state of the fsm controlling condition codes are:
16205 0: normal, do nothing special
16206 1: make ASM_OUTPUT_OPCODE not output this instruction
16207 2: make ASM_OUTPUT_OPCODE not output this instruction
16208 3: make instructions conditional
16209 4: make instructions conditional
16211 State transitions (state->state by whom under condition):
16212 0 -> 1 final_prescan_insn if the `target' is a label
16213 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16214 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16215 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16216 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16217 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16218 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16219 (the target insn is arm_target_insn).
16221 If the jump clobbers the conditions then we use states 2 and 4.
16223 A similar thing can be done with conditional return insns.
16225 XXX In case the `target' is an unconditional branch, this conditionalising
16226 of the instructions always reduces code size, but not always execution
16227 time. But then, I want to reduce the code size to somewhere near what
16228 /bin/cc produces. */
16230 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16231 instructions. When a COND_EXEC instruction is seen the subsequent
16232 instructions are scanned so that multiple conditional instructions can be
16233 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16234 specify the length and true/false mask for the IT block. These will be
16235 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16237 /* Returns the index of the ARM condition code string in
16238 `arm_condition_codes'. COMPARISON should be an rtx like
16239 `(eq (...) (...))'. */
16240 static enum arm_cond_code
16241 get_arm_condition_code (rtx comparison)
16243 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16244 enum arm_cond_code code;
16245 enum rtx_code comp_code = GET_CODE (comparison);
16247 if (GET_MODE_CLASS (mode) != MODE_CC)
16248 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16249 XEXP (comparison, 1));
16253 case CC_DNEmode: code = ARM_NE; goto dominance;
16254 case CC_DEQmode: code = ARM_EQ; goto dominance;
16255 case CC_DGEmode: code = ARM_GE; goto dominance;
16256 case CC_DGTmode: code = ARM_GT; goto dominance;
16257 case CC_DLEmode: code = ARM_LE; goto dominance;
16258 case CC_DLTmode: code = ARM_LT; goto dominance;
16259 case CC_DGEUmode: code = ARM_CS; goto dominance;
16260 case CC_DGTUmode: code = ARM_HI; goto dominance;
16261 case CC_DLEUmode: code = ARM_LS; goto dominance;
16262 case CC_DLTUmode: code = ARM_CC;
16265 gcc_assert (comp_code == EQ || comp_code == NE);
16267 if (comp_code == EQ)
16268 return ARM_INVERSE_CONDITION_CODE (code);
16274 case NE: return ARM_NE;
16275 case EQ: return ARM_EQ;
16276 case GE: return ARM_PL;
16277 case LT: return ARM_MI;
16278 default: gcc_unreachable ();
16284 case NE: return ARM_NE;
16285 case EQ: return ARM_EQ;
16286 default: gcc_unreachable ();
16292 case NE: return ARM_MI;
16293 case EQ: return ARM_PL;
16294 default: gcc_unreachable ();
16299 /* These encodings assume that AC=1 in the FPA system control
16300 byte. This allows us to handle all cases except UNEQ and
16304 case GE: return ARM_GE;
16305 case GT: return ARM_GT;
16306 case LE: return ARM_LS;
16307 case LT: return ARM_MI;
16308 case NE: return ARM_NE;
16309 case EQ: return ARM_EQ;
16310 case ORDERED: return ARM_VC;
16311 case UNORDERED: return ARM_VS;
16312 case UNLT: return ARM_LT;
16313 case UNLE: return ARM_LE;
16314 case UNGT: return ARM_HI;
16315 case UNGE: return ARM_PL;
16316 /* UNEQ and LTGT do not have a representation. */
16317 case UNEQ: /* Fall through. */
16318 case LTGT: /* Fall through. */
16319 default: gcc_unreachable ();
16325 case NE: return ARM_NE;
16326 case EQ: return ARM_EQ;
16327 case GE: return ARM_LE;
16328 case GT: return ARM_LT;
16329 case LE: return ARM_GE;
16330 case LT: return ARM_GT;
16331 case GEU: return ARM_LS;
16332 case GTU: return ARM_CC;
16333 case LEU: return ARM_CS;
16334 case LTU: return ARM_HI;
16335 default: gcc_unreachable ();
16341 case LTU: return ARM_CS;
16342 case GEU: return ARM_CC;
16343 default: gcc_unreachable ();
16349 case NE: return ARM_NE;
16350 case EQ: return ARM_EQ;
16351 case GEU: return ARM_CS;
16352 case GTU: return ARM_HI;
16353 case LEU: return ARM_LS;
16354 case LTU: return ARM_CC;
16355 default: gcc_unreachable ();
16361 case GE: return ARM_GE;
16362 case LT: return ARM_LT;
16363 case GEU: return ARM_CS;
16364 case LTU: return ARM_CC;
16365 default: gcc_unreachable ();
16371 case NE: return ARM_NE;
16372 case EQ: return ARM_EQ;
16373 case GE: return ARM_GE;
16374 case GT: return ARM_GT;
16375 case LE: return ARM_LE;
16376 case LT: return ARM_LT;
16377 case GEU: return ARM_CS;
16378 case GTU: return ARM_HI;
16379 case LEU: return ARM_LS;
16380 case LTU: return ARM_CC;
16381 default: gcc_unreachable ();
16384 default: gcc_unreachable ();
16388 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16391 thumb2_final_prescan_insn (rtx insn)
16393 rtx first_insn = insn;
16394 rtx body = PATTERN (insn);
16396 enum arm_cond_code code;
16400 /* Remove the previous insn from the count of insns to be output. */
16401 if (arm_condexec_count)
16402 arm_condexec_count--;
16404 /* Nothing to do if we are already inside a conditional block. */
16405 if (arm_condexec_count)
16408 if (GET_CODE (body) != COND_EXEC)
16411 /* Conditional jumps are implemented directly. */
16412 if (GET_CODE (insn) == JUMP_INSN)
16415 predicate = COND_EXEC_TEST (body);
16416 arm_current_cc = get_arm_condition_code (predicate);
16418 n = get_attr_ce_count (insn);
16419 arm_condexec_count = 1;
16420 arm_condexec_mask = (1 << n) - 1;
16421 arm_condexec_masklen = n;
16422 /* See if subsequent instructions can be combined into the same block. */
16425 insn = next_nonnote_insn (insn);
16427 /* Jumping into the middle of an IT block is illegal, so a label or
16428 barrier terminates the block. */
16429 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16432 body = PATTERN (insn);
16433 /* USE and CLOBBER aren't really insns, so just skip them. */
16434 if (GET_CODE (body) == USE
16435 || GET_CODE (body) == CLOBBER)
16438 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16439 if (GET_CODE (body) != COND_EXEC)
16441 /* Allow up to 4 conditionally executed instructions in a block. */
16442 n = get_attr_ce_count (insn);
16443 if (arm_condexec_masklen + n > 4)
16446 predicate = COND_EXEC_TEST (body);
16447 code = get_arm_condition_code (predicate);
16448 mask = (1 << n) - 1;
16449 if (arm_current_cc == code)
16450 arm_condexec_mask |= (mask << arm_condexec_masklen);
16451 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16454 arm_condexec_count++;
16455 arm_condexec_masklen += n;
16457 /* A jump must be the last instruction in a conditional block. */
16458 if (GET_CODE(insn) == JUMP_INSN)
16461 /* Restore recog_data (getting the attributes of other insns can
16462 destroy this array, but final.c assumes that it remains intact
16463 across this call). */
16464 extract_constrain_insn_cached (first_insn);
16468 arm_final_prescan_insn (rtx insn)
16470 /* BODY will hold the body of INSN. */
16471 rtx body = PATTERN (insn);
16473 /* This will be 1 if trying to repeat the trick, and things need to be
16474 reversed if it appears to fail. */
16477 /* If we start with a return insn, we only succeed if we find another one. */
16478 int seeking_return = 0;
16480 /* START_INSN will hold the insn from where we start looking. This is the
16481 first insn after the following code_label if REVERSE is true. */
16482 rtx start_insn = insn;
16484 /* If in state 4, check if the target branch is reached, in order to
16485 change back to state 0. */
16486 if (arm_ccfsm_state == 4)
16488 if (insn == arm_target_insn)
16490 arm_target_insn = NULL;
16491 arm_ccfsm_state = 0;
16496 /* If in state 3, it is possible to repeat the trick, if this insn is an
16497 unconditional branch to a label, and immediately following this branch
16498 is the previous target label which is only used once, and the label this
16499 branch jumps to is not too far off. */
16500 if (arm_ccfsm_state == 3)
16502 if (simplejump_p (insn))
16504 start_insn = next_nonnote_insn (start_insn);
16505 if (GET_CODE (start_insn) == BARRIER)
16507 /* XXX Isn't this always a barrier? */
16508 start_insn = next_nonnote_insn (start_insn);
16510 if (GET_CODE (start_insn) == CODE_LABEL
16511 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16512 && LABEL_NUSES (start_insn) == 1)
16517 else if (GET_CODE (body) == RETURN)
16519 start_insn = next_nonnote_insn (start_insn);
16520 if (GET_CODE (start_insn) == BARRIER)
16521 start_insn = next_nonnote_insn (start_insn);
16522 if (GET_CODE (start_insn) == CODE_LABEL
16523 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16524 && LABEL_NUSES (start_insn) == 1)
16527 seeking_return = 1;
16536 gcc_assert (!arm_ccfsm_state || reverse);
16537 if (GET_CODE (insn) != JUMP_INSN)
16540 /* This jump might be paralleled with a clobber of the condition codes
16541 the jump should always come first */
16542 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16543 body = XVECEXP (body, 0, 0);
16546 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16547 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16550 int fail = FALSE, succeed = FALSE;
16551 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16552 int then_not_else = TRUE;
16553 rtx this_insn = start_insn, label = 0;
16555 /* Register the insn jumped to. */
16558 if (!seeking_return)
16559 label = XEXP (SET_SRC (body), 0);
16561 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16562 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16563 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16565 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16566 then_not_else = FALSE;
16568 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16569 seeking_return = 1;
16570 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16572 seeking_return = 1;
16573 then_not_else = FALSE;
16576 gcc_unreachable ();
16578 /* See how many insns this branch skips, and what kind of insns. If all
16579 insns are okay, and the label or unconditional branch to the same
16580 label is not too far away, succeed. */
16581 for (insns_skipped = 0;
16582 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16586 this_insn = next_nonnote_insn (this_insn);
16590 switch (GET_CODE (this_insn))
16593 /* Succeed if it is the target label, otherwise fail since
16594 control falls in from somewhere else. */
16595 if (this_insn == label)
16597 arm_ccfsm_state = 1;
16605 /* Succeed if the following insn is the target label.
16607 If return insns are used then the last insn in a function
16608 will be a barrier. */
16609 this_insn = next_nonnote_insn (this_insn);
16610 if (this_insn && this_insn == label)
16612 arm_ccfsm_state = 1;
16620 /* The AAPCS says that conditional calls should not be
16621 used since they make interworking inefficient (the
16622 linker can't transform BL<cond> into BLX). That's
16623 only a problem if the machine has BLX. */
16630 /* Succeed if the following insn is the target label, or
16631 if the following two insns are a barrier and the
16633 this_insn = next_nonnote_insn (this_insn);
16634 if (this_insn && GET_CODE (this_insn) == BARRIER)
16635 this_insn = next_nonnote_insn (this_insn);
16637 if (this_insn && this_insn == label
16638 && insns_skipped < max_insns_skipped)
16640 arm_ccfsm_state = 1;
16648 /* If this is an unconditional branch to the same label, succeed.
16649 If it is to another label, do nothing. If it is conditional,
16651 /* XXX Probably, the tests for SET and the PC are
16654 scanbody = PATTERN (this_insn);
16655 if (GET_CODE (scanbody) == SET
16656 && GET_CODE (SET_DEST (scanbody)) == PC)
16658 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16659 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16661 arm_ccfsm_state = 2;
16664 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16667 /* Fail if a conditional return is undesirable (e.g. on a
16668 StrongARM), but still allow this if optimizing for size. */
16669 else if (GET_CODE (scanbody) == RETURN
16670 && !use_return_insn (TRUE, NULL)
16673 else if (GET_CODE (scanbody) == RETURN
16676 arm_ccfsm_state = 2;
16679 else if (GET_CODE (scanbody) == PARALLEL)
16681 switch (get_attr_conds (this_insn))
16691 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16696 /* Instructions using or affecting the condition codes make it
16698 scanbody = PATTERN (this_insn);
16699 if (!(GET_CODE (scanbody) == SET
16700 || GET_CODE (scanbody) == PARALLEL)
16701 || get_attr_conds (this_insn) != CONDS_NOCOND)
16704 /* A conditional cirrus instruction must be followed by
16705 a non Cirrus instruction. However, since we
16706 conditionalize instructions in this function and by
16707 the time we get here we can't add instructions
16708 (nops), because shorten_branches() has already been
16709 called, we will disable conditionalizing Cirrus
16710 instructions to be safe. */
16711 if (GET_CODE (scanbody) != USE
16712 && GET_CODE (scanbody) != CLOBBER
16713 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16723 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16724 arm_target_label = CODE_LABEL_NUMBER (label);
16727 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16729 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16731 this_insn = next_nonnote_insn (this_insn);
16732 gcc_assert (!this_insn
16733 || (GET_CODE (this_insn) != BARRIER
16734 && GET_CODE (this_insn) != CODE_LABEL));
16738 /* Oh, dear! we ran off the end.. give up. */
16739 extract_constrain_insn_cached (insn);
16740 arm_ccfsm_state = 0;
16741 arm_target_insn = NULL;
16744 arm_target_insn = this_insn;
16747 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16750 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16752 if (reverse || then_not_else)
16753 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16756 /* Restore recog_data (getting the attributes of other insns can
16757 destroy this array, but final.c assumes that it remains intact
16758 across this call. */
16759 extract_constrain_insn_cached (insn);
16763 /* Output IT instructions. */
16765 thumb2_asm_output_opcode (FILE * stream)
16770 if (arm_condexec_mask)
16772 for (n = 0; n < arm_condexec_masklen; n++)
16773 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16775 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16776 arm_condition_codes[arm_current_cc]);
16777 arm_condexec_mask = 0;
16781 /* Returns true if REGNO is a valid register
16782 for holding a quantity of type MODE. */
16784 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16786 if (GET_MODE_CLASS (mode) == MODE_CC)
16787 return (regno == CC_REGNUM
16788 || (TARGET_HARD_FLOAT && TARGET_VFP
16789 && regno == VFPCC_REGNUM));
16792 /* For the Thumb we only allow values bigger than SImode in
16793 registers 0 - 6, so that there is always a second low
16794 register available to hold the upper part of the value.
16795 We probably we ought to ensure that the register is the
16796 start of an even numbered register pair. */
16797 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16799 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16800 && IS_CIRRUS_REGNUM (regno))
16801 /* We have outlawed SI values in Cirrus registers because they
16802 reside in the lower 32 bits, but SF values reside in the
16803 upper 32 bits. This causes gcc all sorts of grief. We can't
16804 even split the registers into pairs because Cirrus SI values
16805 get sign extended to 64bits-- aldyh. */
16806 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16808 if (TARGET_HARD_FLOAT && TARGET_VFP
16809 && IS_VFP_REGNUM (regno))
16811 if (mode == SFmode || mode == SImode)
16812 return VFP_REGNO_OK_FOR_SINGLE (regno);
16814 if (mode == DFmode)
16815 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16817 /* VFP registers can hold HFmode values, but there is no point in
16818 putting them there unless we have hardware conversion insns. */
16819 if (mode == HFmode)
16820 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16823 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16824 || (VALID_NEON_QREG_MODE (mode)
16825 && NEON_REGNO_OK_FOR_QUAD (regno))
16826 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16827 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16828 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16829 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16830 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16835 if (TARGET_REALLY_IWMMXT)
16837 if (IS_IWMMXT_GR_REGNUM (regno))
16838 return mode == SImode;
16840 if (IS_IWMMXT_REGNUM (regno))
16841 return VALID_IWMMXT_REG_MODE (mode);
16844 /* We allow almost any value to be stored in the general registers.
16845 Restrict doubleword quantities to even register pairs so that we can
16846 use ldrd. Do not allow very large Neon structure opaque modes in
16847 general registers; they would use too many. */
16848 if (regno <= LAST_ARM_REGNUM)
16849 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16850 && ARM_NUM_REGS (mode) <= 4;
16852 if (regno == FRAME_POINTER_REGNUM
16853 || regno == ARG_POINTER_REGNUM)
16854 /* We only allow integers in the fake hard registers. */
16855 return GET_MODE_CLASS (mode) == MODE_INT;
16857 /* The only registers left are the FPA registers
16858 which we only allow to hold FP values. */
16859 return (TARGET_HARD_FLOAT && TARGET_FPA
16860 && GET_MODE_CLASS (mode) == MODE_FLOAT
16861 && regno >= FIRST_FPA_REGNUM
16862 && regno <= LAST_FPA_REGNUM);
16865 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16866 not used in arm mode. */
16869 arm_regno_class (int regno)
16873 if (regno == STACK_POINTER_REGNUM)
16875 if (regno == CC_REGNUM)
16882 if (TARGET_THUMB2 && regno < 8)
16885 if ( regno <= LAST_ARM_REGNUM
16886 || regno == FRAME_POINTER_REGNUM
16887 || regno == ARG_POINTER_REGNUM)
16888 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16890 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16891 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16893 if (IS_CIRRUS_REGNUM (regno))
16894 return CIRRUS_REGS;
16896 if (IS_VFP_REGNUM (regno))
16898 if (regno <= D7_VFP_REGNUM)
16899 return VFP_D0_D7_REGS;
16900 else if (regno <= LAST_LO_VFP_REGNUM)
16901 return VFP_LO_REGS;
16903 return VFP_HI_REGS;
16906 if (IS_IWMMXT_REGNUM (regno))
16907 return IWMMXT_REGS;
16909 if (IS_IWMMXT_GR_REGNUM (regno))
16910 return IWMMXT_GR_REGS;
16915 /* Handle a special case when computing the offset
16916 of an argument from the frame pointer. */
16918 arm_debugger_arg_offset (int value, rtx addr)
16922 /* We are only interested if dbxout_parms() failed to compute the offset. */
16926 /* We can only cope with the case where the address is held in a register. */
16927 if (GET_CODE (addr) != REG)
16930 /* If we are using the frame pointer to point at the argument, then
16931 an offset of 0 is correct. */
16932 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16935 /* If we are using the stack pointer to point at the
16936 argument, then an offset of 0 is correct. */
16937 /* ??? Check this is consistent with thumb2 frame layout. */
16938 if ((TARGET_THUMB || !frame_pointer_needed)
16939 && REGNO (addr) == SP_REGNUM)
16942 /* Oh dear. The argument is pointed to by a register rather
16943 than being held in a register, or being stored at a known
16944 offset from the frame pointer. Since GDB only understands
16945 those two kinds of argument we must translate the address
16946 held in the register into an offset from the frame pointer.
16947 We do this by searching through the insns for the function
16948 looking to see where this register gets its value. If the
16949 register is initialized from the frame pointer plus an offset
16950 then we are in luck and we can continue, otherwise we give up.
16952 This code is exercised by producing debugging information
16953 for a function with arguments like this:
16955 double func (double a, double b, int c, double d) {return d;}
16957 Without this code the stab for parameter 'd' will be set to
16958 an offset of 0 from the frame pointer, rather than 8. */
16960 /* The if() statement says:
16962 If the insn is a normal instruction
16963 and if the insn is setting the value in a register
16964 and if the register being set is the register holding the address of the argument
16965 and if the address is computing by an addition
16966 that involves adding to a register
16967 which is the frame pointer
16972 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16974 if ( GET_CODE (insn) == INSN
16975 && GET_CODE (PATTERN (insn)) == SET
16976 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16977 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16978 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16979 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16980 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16983 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16992 warning (0, "unable to compute real location of stacked parameter");
16993 value = 8; /* XXX magic hack */
16999 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17002 if ((MASK) & insn_flags) \
17003 add_builtin_function ((NAME), (TYPE), (CODE), \
17004 BUILT_IN_MD, NULL, NULL_TREE); \
17008 struct builtin_description
17010 const unsigned int mask;
17011 const enum insn_code icode;
17012 const char * const name;
17013 const enum arm_builtins code;
17014 const enum rtx_code comparison;
17015 const unsigned int flag;
17018 static const struct builtin_description bdesc_2arg[] =
17020 #define IWMMXT_BUILTIN(code, string, builtin) \
17021 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17022 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17024 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17025 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17026 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17027 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17028 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17029 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17030 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17031 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17032 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17033 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17034 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17035 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17036 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17037 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17038 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17039 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17040 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17041 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17042 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17043 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17044 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17045 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17046 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17047 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17048 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17049 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17050 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17051 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17052 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17053 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17054 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17055 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17056 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17057 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17058 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17059 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17060 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17061 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17062 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17063 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17064 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17065 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17066 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17067 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17068 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17069 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17070 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17071 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17072 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17073 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17074 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17075 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17076 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17077 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17078 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17079 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17080 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17081 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17083 #define IWMMXT_BUILTIN2(code, builtin) \
17084 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17086 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17087 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17088 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17089 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17090 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17091 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17092 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17093 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17094 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17095 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17096 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17097 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17098 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17099 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17100 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17101 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17102 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17103 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17104 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17105 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17106 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17107 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17108 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17109 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17110 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17111 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17112 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17113 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17114 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17115 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17116 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17117 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17120 static const struct builtin_description bdesc_1arg[] =
17122 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17123 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17124 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17125 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17126 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17127 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17128 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17129 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17130 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17131 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17132 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17133 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17134 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17135 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17136 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17137 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17138 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17139 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17142 /* Set up all the iWMMXt builtins. This is
17143 not called if TARGET_IWMMXT is zero. */
17146 arm_init_iwmmxt_builtins (void)
17148 const struct builtin_description * d;
17150 tree endlink = void_list_node;
17152 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17153 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17154 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17157 = build_function_type (integer_type_node,
17158 tree_cons (NULL_TREE, integer_type_node, endlink));
17159 tree v8qi_ftype_v8qi_v8qi_int
17160 = build_function_type (V8QI_type_node,
17161 tree_cons (NULL_TREE, V8QI_type_node,
17162 tree_cons (NULL_TREE, V8QI_type_node,
17163 tree_cons (NULL_TREE,
17166 tree v4hi_ftype_v4hi_int
17167 = build_function_type (V4HI_type_node,
17168 tree_cons (NULL_TREE, V4HI_type_node,
17169 tree_cons (NULL_TREE, integer_type_node,
17171 tree v2si_ftype_v2si_int
17172 = build_function_type (V2SI_type_node,
17173 tree_cons (NULL_TREE, V2SI_type_node,
17174 tree_cons (NULL_TREE, integer_type_node,
17176 tree v2si_ftype_di_di
17177 = build_function_type (V2SI_type_node,
17178 tree_cons (NULL_TREE, long_long_integer_type_node,
17179 tree_cons (NULL_TREE, long_long_integer_type_node,
17181 tree di_ftype_di_int
17182 = build_function_type (long_long_integer_type_node,
17183 tree_cons (NULL_TREE, long_long_integer_type_node,
17184 tree_cons (NULL_TREE, integer_type_node,
17186 tree di_ftype_di_int_int
17187 = build_function_type (long_long_integer_type_node,
17188 tree_cons (NULL_TREE, long_long_integer_type_node,
17189 tree_cons (NULL_TREE, integer_type_node,
17190 tree_cons (NULL_TREE,
17193 tree int_ftype_v8qi
17194 = build_function_type (integer_type_node,
17195 tree_cons (NULL_TREE, V8QI_type_node,
17197 tree int_ftype_v4hi
17198 = build_function_type (integer_type_node,
17199 tree_cons (NULL_TREE, V4HI_type_node,
17201 tree int_ftype_v2si
17202 = build_function_type (integer_type_node,
17203 tree_cons (NULL_TREE, V2SI_type_node,
17205 tree int_ftype_v8qi_int
17206 = build_function_type (integer_type_node,
17207 tree_cons (NULL_TREE, V8QI_type_node,
17208 tree_cons (NULL_TREE, integer_type_node,
17210 tree int_ftype_v4hi_int
17211 = build_function_type (integer_type_node,
17212 tree_cons (NULL_TREE, V4HI_type_node,
17213 tree_cons (NULL_TREE, integer_type_node,
17215 tree int_ftype_v2si_int
17216 = build_function_type (integer_type_node,
17217 tree_cons (NULL_TREE, V2SI_type_node,
17218 tree_cons (NULL_TREE, integer_type_node,
17220 tree v8qi_ftype_v8qi_int_int
17221 = build_function_type (V8QI_type_node,
17222 tree_cons (NULL_TREE, V8QI_type_node,
17223 tree_cons (NULL_TREE, integer_type_node,
17224 tree_cons (NULL_TREE,
17227 tree v4hi_ftype_v4hi_int_int
17228 = build_function_type (V4HI_type_node,
17229 tree_cons (NULL_TREE, V4HI_type_node,
17230 tree_cons (NULL_TREE, integer_type_node,
17231 tree_cons (NULL_TREE,
17234 tree v2si_ftype_v2si_int_int
17235 = build_function_type (V2SI_type_node,
17236 tree_cons (NULL_TREE, V2SI_type_node,
17237 tree_cons (NULL_TREE, integer_type_node,
17238 tree_cons (NULL_TREE,
17241 /* Miscellaneous. */
17242 tree v8qi_ftype_v4hi_v4hi
17243 = build_function_type (V8QI_type_node,
17244 tree_cons (NULL_TREE, V4HI_type_node,
17245 tree_cons (NULL_TREE, V4HI_type_node,
17247 tree v4hi_ftype_v2si_v2si
17248 = build_function_type (V4HI_type_node,
17249 tree_cons (NULL_TREE, V2SI_type_node,
17250 tree_cons (NULL_TREE, V2SI_type_node,
17252 tree v2si_ftype_v4hi_v4hi
17253 = build_function_type (V2SI_type_node,
17254 tree_cons (NULL_TREE, V4HI_type_node,
17255 tree_cons (NULL_TREE, V4HI_type_node,
17257 tree v2si_ftype_v8qi_v8qi
17258 = build_function_type (V2SI_type_node,
17259 tree_cons (NULL_TREE, V8QI_type_node,
17260 tree_cons (NULL_TREE, V8QI_type_node,
17262 tree v4hi_ftype_v4hi_di
17263 = build_function_type (V4HI_type_node,
17264 tree_cons (NULL_TREE, V4HI_type_node,
17265 tree_cons (NULL_TREE,
17266 long_long_integer_type_node,
17268 tree v2si_ftype_v2si_di
17269 = build_function_type (V2SI_type_node,
17270 tree_cons (NULL_TREE, V2SI_type_node,
17271 tree_cons (NULL_TREE,
17272 long_long_integer_type_node,
17274 tree void_ftype_int_int
17275 = build_function_type (void_type_node,
17276 tree_cons (NULL_TREE, integer_type_node,
17277 tree_cons (NULL_TREE, integer_type_node,
17280 = build_function_type (long_long_unsigned_type_node, endlink);
17282 = build_function_type (long_long_integer_type_node,
17283 tree_cons (NULL_TREE, V8QI_type_node,
17286 = build_function_type (long_long_integer_type_node,
17287 tree_cons (NULL_TREE, V4HI_type_node,
17290 = build_function_type (long_long_integer_type_node,
17291 tree_cons (NULL_TREE, V2SI_type_node,
17293 tree v2si_ftype_v4hi
17294 = build_function_type (V2SI_type_node,
17295 tree_cons (NULL_TREE, V4HI_type_node,
17297 tree v4hi_ftype_v8qi
17298 = build_function_type (V4HI_type_node,
17299 tree_cons (NULL_TREE, V8QI_type_node,
17302 tree di_ftype_di_v4hi_v4hi
17303 = build_function_type (long_long_unsigned_type_node,
17304 tree_cons (NULL_TREE,
17305 long_long_unsigned_type_node,
17306 tree_cons (NULL_TREE, V4HI_type_node,
17307 tree_cons (NULL_TREE,
17311 tree di_ftype_v4hi_v4hi
17312 = build_function_type (long_long_unsigned_type_node,
17313 tree_cons (NULL_TREE, V4HI_type_node,
17314 tree_cons (NULL_TREE, V4HI_type_node,
17317 /* Normal vector binops. */
17318 tree v8qi_ftype_v8qi_v8qi
17319 = build_function_type (V8QI_type_node,
17320 tree_cons (NULL_TREE, V8QI_type_node,
17321 tree_cons (NULL_TREE, V8QI_type_node,
17323 tree v4hi_ftype_v4hi_v4hi
17324 = build_function_type (V4HI_type_node,
17325 tree_cons (NULL_TREE, V4HI_type_node,
17326 tree_cons (NULL_TREE, V4HI_type_node,
17328 tree v2si_ftype_v2si_v2si
17329 = build_function_type (V2SI_type_node,
17330 tree_cons (NULL_TREE, V2SI_type_node,
17331 tree_cons (NULL_TREE, V2SI_type_node,
17333 tree di_ftype_di_di
17334 = build_function_type (long_long_unsigned_type_node,
17335 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17336 tree_cons (NULL_TREE,
17337 long_long_unsigned_type_node,
17340 /* Add all builtins that are more or less simple operations on two
17342 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17344 /* Use one of the operands; the target can have a different mode for
17345 mask-generating compares. */
17346 enum machine_mode mode;
17352 mode = insn_data[d->icode].operand[1].mode;
17357 type = v8qi_ftype_v8qi_v8qi;
17360 type = v4hi_ftype_v4hi_v4hi;
17363 type = v2si_ftype_v2si_v2si;
17366 type = di_ftype_di_di;
17370 gcc_unreachable ();
17373 def_mbuiltin (d->mask, d->name, type, d->code);
17376 /* Add the remaining MMX insns with somewhat more complicated types. */
17377 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17378 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17379 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17381 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17382 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17383 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17384 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17385 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17386 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17388 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17389 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17390 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17391 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17392 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17393 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17395 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17396 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17397 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17398 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17400 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17405 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17407 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17412 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17469 arm_init_tls_builtins (void)
17473 ftype = build_function_type (ptr_type_node, void_list_node);
17474 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17475 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17477 TREE_NOTHROW (decl) = 1;
17478 TREE_READONLY (decl) = 1;
17481 enum neon_builtin_type_bits {
17497 #define v8qi_UP T_V8QI
17498 #define v4hi_UP T_V4HI
17499 #define v2si_UP T_V2SI
17500 #define v2sf_UP T_V2SF
17502 #define v16qi_UP T_V16QI
17503 #define v8hi_UP T_V8HI
17504 #define v4si_UP T_V4SI
17505 #define v4sf_UP T_V4SF
17506 #define v2di_UP T_V2DI
17511 #define UP(X) X##_UP
17546 NEON_LOADSTRUCTLANE,
17548 NEON_STORESTRUCTLANE,
17557 const neon_itype itype;
17559 const enum insn_code codes[T_MAX];
17560 const unsigned int num_vars;
17561 unsigned int base_fcode;
17562 } neon_builtin_datum;
17564 #define CF(N,X) CODE_FOR_neon_##N##X
17566 #define VAR1(T, N, A) \
17567 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17568 #define VAR2(T, N, A, B) \
17569 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17570 #define VAR3(T, N, A, B, C) \
17571 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17572 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17573 #define VAR4(T, N, A, B, C, D) \
17574 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17575 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17576 #define VAR5(T, N, A, B, C, D, E) \
17577 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17578 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17579 #define VAR6(T, N, A, B, C, D, E, F) \
17580 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17581 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17582 #define VAR7(T, N, A, B, C, D, E, F, G) \
17583 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17584 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17586 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17587 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17589 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17590 CF (N, G), CF (N, H) }, 8, 0
17591 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17592 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17593 | UP (H) | UP (I), \
17594 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17595 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17596 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17597 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17598 | UP (H) | UP (I) | UP (J), \
17599 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17600 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17602 /* The mode entries in the following table correspond to the "key" type of the
17603 instruction variant, i.e. equivalent to that which would be specified after
17604 the assembler mnemonic, which usually refers to the last vector operand.
17605 (Signed/unsigned/polynomial types are not differentiated between though, and
17606 are all mapped onto the same mode for a given element size.) The modes
17607 listed per instruction should be the same as those defined for that
17608 instruction's pattern in neon.md.
17609 WARNING: Variants should be listed in the same increasing order as
17610 neon_builtin_type_bits. */
17612 static neon_builtin_datum neon_builtin_data[] =
17614 { VAR10 (BINOP, vadd,
17615 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17616 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17617 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17618 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17619 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17620 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17621 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17622 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17623 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17624 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17625 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17626 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17627 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17628 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17629 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17630 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17631 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17632 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17633 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17634 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17635 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17636 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17637 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17638 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17639 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17640 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17641 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17642 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17643 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17644 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17645 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17646 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17647 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17648 { VAR10 (BINOP, vsub,
17649 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17650 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17651 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17652 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17653 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17654 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17655 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17656 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17657 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17658 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17659 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17660 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17661 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17662 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17663 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17664 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17665 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17666 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17667 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17668 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17669 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17670 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17671 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17672 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17673 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17674 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17675 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17676 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17677 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17678 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17679 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17680 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17681 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17682 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17683 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17684 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17685 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17686 /* FIXME: vget_lane supports more variants than this! */
17687 { VAR10 (GETLANE, vget_lane,
17688 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17689 { VAR10 (SETLANE, vset_lane,
17690 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17691 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17692 { VAR10 (DUP, vdup_n,
17693 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17694 { VAR10 (DUPLANE, vdup_lane,
17695 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17696 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17697 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17698 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17699 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17700 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17701 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17702 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17703 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17704 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17705 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17706 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17707 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17708 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17709 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17710 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17711 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17712 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17713 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17714 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17715 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17716 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17717 { VAR10 (BINOP, vext,
17718 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17719 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17720 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17721 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17722 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17723 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17724 { VAR10 (SELECT, vbsl,
17725 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17726 { VAR1 (VTBL, vtbl1, v8qi) },
17727 { VAR1 (VTBL, vtbl2, v8qi) },
17728 { VAR1 (VTBL, vtbl3, v8qi) },
17729 { VAR1 (VTBL, vtbl4, v8qi) },
17730 { VAR1 (VTBX, vtbx1, v8qi) },
17731 { VAR1 (VTBX, vtbx2, v8qi) },
17732 { VAR1 (VTBX, vtbx3, v8qi) },
17733 { VAR1 (VTBX, vtbx4, v8qi) },
17734 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17735 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17736 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17737 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17738 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17739 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17740 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17741 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17742 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17743 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17744 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17745 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17746 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17747 { VAR10 (LOAD1, vld1,
17748 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17749 { VAR10 (LOAD1LANE, vld1_lane,
17750 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17751 { VAR10 (LOAD1, vld1_dup,
17752 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17753 { VAR10 (STORE1, vst1,
17754 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17755 { VAR10 (STORE1LANE, vst1_lane,
17756 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17757 { VAR9 (LOADSTRUCT,
17758 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17759 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17760 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17761 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17762 { VAR9 (STORESTRUCT, vst2,
17763 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17764 { VAR7 (STORESTRUCTLANE, vst2_lane,
17765 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17766 { VAR9 (LOADSTRUCT,
17767 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17768 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17769 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17770 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17771 { VAR9 (STORESTRUCT, vst3,
17772 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17773 { VAR7 (STORESTRUCTLANE, vst3_lane,
17774 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17775 { VAR9 (LOADSTRUCT, vld4,
17776 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17777 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17778 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17779 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17780 { VAR9 (STORESTRUCT, vst4,
17781 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17782 { VAR7 (STORESTRUCTLANE, vst4_lane,
17783 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17784 { VAR10 (LOGICBINOP, vand,
17785 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17786 { VAR10 (LOGICBINOP, vorr,
17787 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17788 { VAR10 (BINOP, veor,
17789 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17790 { VAR10 (LOGICBINOP, vbic,
17791 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17792 { VAR10 (LOGICBINOP, vorn,
17793 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17809 arm_init_neon_builtins (void)
17811 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17813 tree neon_intQI_type_node;
17814 tree neon_intHI_type_node;
17815 tree neon_polyQI_type_node;
17816 tree neon_polyHI_type_node;
17817 tree neon_intSI_type_node;
17818 tree neon_intDI_type_node;
17819 tree neon_float_type_node;
17821 tree intQI_pointer_node;
17822 tree intHI_pointer_node;
17823 tree intSI_pointer_node;
17824 tree intDI_pointer_node;
17825 tree float_pointer_node;
17827 tree const_intQI_node;
17828 tree const_intHI_node;
17829 tree const_intSI_node;
17830 tree const_intDI_node;
17831 tree const_float_node;
17833 tree const_intQI_pointer_node;
17834 tree const_intHI_pointer_node;
17835 tree const_intSI_pointer_node;
17836 tree const_intDI_pointer_node;
17837 tree const_float_pointer_node;
17839 tree V8QI_type_node;
17840 tree V4HI_type_node;
17841 tree V2SI_type_node;
17842 tree V2SF_type_node;
17843 tree V16QI_type_node;
17844 tree V8HI_type_node;
17845 tree V4SI_type_node;
17846 tree V4SF_type_node;
17847 tree V2DI_type_node;
17849 tree intUQI_type_node;
17850 tree intUHI_type_node;
17851 tree intUSI_type_node;
17852 tree intUDI_type_node;
17854 tree intEI_type_node;
17855 tree intOI_type_node;
17856 tree intCI_type_node;
17857 tree intXI_type_node;
17859 tree V8QI_pointer_node;
17860 tree V4HI_pointer_node;
17861 tree V2SI_pointer_node;
17862 tree V2SF_pointer_node;
17863 tree V16QI_pointer_node;
17864 tree V8HI_pointer_node;
17865 tree V4SI_pointer_node;
17866 tree V4SF_pointer_node;
17867 tree V2DI_pointer_node;
17869 tree void_ftype_pv8qi_v8qi_v8qi;
17870 tree void_ftype_pv4hi_v4hi_v4hi;
17871 tree void_ftype_pv2si_v2si_v2si;
17872 tree void_ftype_pv2sf_v2sf_v2sf;
17873 tree void_ftype_pdi_di_di;
17874 tree void_ftype_pv16qi_v16qi_v16qi;
17875 tree void_ftype_pv8hi_v8hi_v8hi;
17876 tree void_ftype_pv4si_v4si_v4si;
17877 tree void_ftype_pv4sf_v4sf_v4sf;
17878 tree void_ftype_pv2di_v2di_v2di;
17880 tree reinterp_ftype_dreg[5][5];
17881 tree reinterp_ftype_qreg[5][5];
17882 tree dreg_types[5], qreg_types[5];
17884 /* Create distinguished type nodes for NEON vector element types,
17885 and pointers to values of such types, so we can detect them later. */
17886 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17887 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17888 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17889 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17890 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17891 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17892 neon_float_type_node = make_node (REAL_TYPE);
17893 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17894 layout_type (neon_float_type_node);
17896 /* Define typedefs which exactly correspond to the modes we are basing vector
17897 types on. If you change these names you'll need to change
17898 the table used by arm_mangle_type too. */
17899 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17900 "__builtin_neon_qi");
17901 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17902 "__builtin_neon_hi");
17903 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17904 "__builtin_neon_si");
17905 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17906 "__builtin_neon_sf");
17907 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17908 "__builtin_neon_di");
17909 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17910 "__builtin_neon_poly8");
17911 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17912 "__builtin_neon_poly16");
17914 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17915 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17916 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17917 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17918 float_pointer_node = build_pointer_type (neon_float_type_node);
17920 /* Next create constant-qualified versions of the above types. */
17921 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17923 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17925 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17927 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17929 const_float_node = build_qualified_type (neon_float_type_node,
17932 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17933 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17934 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17935 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17936 const_float_pointer_node = build_pointer_type (const_float_node);
17938 /* Now create vector types based on our NEON element types. */
17939 /* 64-bit vectors. */
17941 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17943 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17945 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17947 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17948 /* 128-bit vectors. */
17950 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17952 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17954 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17956 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17958 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17960 /* Unsigned integer types for various mode sizes. */
17961 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17962 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17963 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17964 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17966 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17967 "__builtin_neon_uqi");
17968 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17969 "__builtin_neon_uhi");
17970 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17971 "__builtin_neon_usi");
17972 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17973 "__builtin_neon_udi");
17975 /* Opaque integer types for structures of vectors. */
17976 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17977 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17978 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17979 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17981 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17982 "__builtin_neon_ti");
17983 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17984 "__builtin_neon_ei");
17985 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17986 "__builtin_neon_oi");
17987 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17988 "__builtin_neon_ci");
17989 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17990 "__builtin_neon_xi");
17992 /* Pointers to vector types. */
17993 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17994 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17995 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17996 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17997 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17998 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17999 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18000 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18001 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18003 /* Operations which return results as pairs. */
18004 void_ftype_pv8qi_v8qi_v8qi =
18005 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18006 V8QI_type_node, NULL);
18007 void_ftype_pv4hi_v4hi_v4hi =
18008 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18009 V4HI_type_node, NULL);
18010 void_ftype_pv2si_v2si_v2si =
18011 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18012 V2SI_type_node, NULL);
18013 void_ftype_pv2sf_v2sf_v2sf =
18014 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18015 V2SF_type_node, NULL);
18016 void_ftype_pdi_di_di =
18017 build_function_type_list (void_type_node, intDI_pointer_node,
18018 neon_intDI_type_node, neon_intDI_type_node, NULL);
18019 void_ftype_pv16qi_v16qi_v16qi =
18020 build_function_type_list (void_type_node, V16QI_pointer_node,
18021 V16QI_type_node, V16QI_type_node, NULL);
18022 void_ftype_pv8hi_v8hi_v8hi =
18023 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18024 V8HI_type_node, NULL);
18025 void_ftype_pv4si_v4si_v4si =
18026 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18027 V4SI_type_node, NULL);
18028 void_ftype_pv4sf_v4sf_v4sf =
18029 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18030 V4SF_type_node, NULL);
18031 void_ftype_pv2di_v2di_v2di =
18032 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18033 V2DI_type_node, NULL);
18035 dreg_types[0] = V8QI_type_node;
18036 dreg_types[1] = V4HI_type_node;
18037 dreg_types[2] = V2SI_type_node;
18038 dreg_types[3] = V2SF_type_node;
18039 dreg_types[4] = neon_intDI_type_node;
18041 qreg_types[0] = V16QI_type_node;
18042 qreg_types[1] = V8HI_type_node;
18043 qreg_types[2] = V4SI_type_node;
18044 qreg_types[3] = V4SF_type_node;
18045 qreg_types[4] = V2DI_type_node;
18047 for (i = 0; i < 5; i++)
18050 for (j = 0; j < 5; j++)
18052 reinterp_ftype_dreg[i][j]
18053 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18054 reinterp_ftype_qreg[i][j]
18055 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18059 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18061 neon_builtin_datum *d = &neon_builtin_data[i];
18062 unsigned int j, codeidx = 0;
18064 d->base_fcode = fcode;
18066 for (j = 0; j < T_MAX; j++)
18068 const char* const modenames[] = {
18069 "v8qi", "v4hi", "v2si", "v2sf", "di",
18070 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18074 enum insn_code icode;
18075 int is_load = 0, is_store = 0;
18077 if ((d->bits & (1 << j)) == 0)
18080 icode = d->codes[codeidx++];
18085 case NEON_LOAD1LANE:
18086 case NEON_LOADSTRUCT:
18087 case NEON_LOADSTRUCTLANE:
18089 /* Fall through. */
18091 case NEON_STORE1LANE:
18092 case NEON_STORESTRUCT:
18093 case NEON_STORESTRUCTLANE:
18096 /* Fall through. */
18099 case NEON_LOGICBINOP:
18100 case NEON_SHIFTINSERT:
18107 case NEON_SHIFTIMM:
18108 case NEON_SHIFTACC:
18114 case NEON_LANEMULL:
18115 case NEON_LANEMULH:
18117 case NEON_SCALARMUL:
18118 case NEON_SCALARMULL:
18119 case NEON_SCALARMULH:
18120 case NEON_SCALARMAC:
18126 tree return_type = void_type_node, args = void_list_node;
18128 /* Build a function type directly from the insn_data for this
18129 builtin. The build_function_type() function takes care of
18130 removing duplicates for us. */
18131 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18135 if (is_load && k == 1)
18137 /* Neon load patterns always have the memory operand
18138 (a SImode pointer) in the operand 1 position. We
18139 want a const pointer to the element type in that
18141 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18147 eltype = const_intQI_pointer_node;
18152 eltype = const_intHI_pointer_node;
18157 eltype = const_intSI_pointer_node;
18162 eltype = const_float_pointer_node;
18167 eltype = const_intDI_pointer_node;
18170 default: gcc_unreachable ();
18173 else if (is_store && k == 0)
18175 /* Similarly, Neon store patterns use operand 0 as
18176 the memory location to store to (a SImode pointer).
18177 Use a pointer to the element type of the store in
18179 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18185 eltype = intQI_pointer_node;
18190 eltype = intHI_pointer_node;
18195 eltype = intSI_pointer_node;
18200 eltype = float_pointer_node;
18205 eltype = intDI_pointer_node;
18208 default: gcc_unreachable ();
18213 switch (insn_data[icode].operand[k].mode)
18215 case VOIDmode: eltype = void_type_node; break;
18217 case QImode: eltype = neon_intQI_type_node; break;
18218 case HImode: eltype = neon_intHI_type_node; break;
18219 case SImode: eltype = neon_intSI_type_node; break;
18220 case SFmode: eltype = neon_float_type_node; break;
18221 case DImode: eltype = neon_intDI_type_node; break;
18222 case TImode: eltype = intTI_type_node; break;
18223 case EImode: eltype = intEI_type_node; break;
18224 case OImode: eltype = intOI_type_node; break;
18225 case CImode: eltype = intCI_type_node; break;
18226 case XImode: eltype = intXI_type_node; break;
18227 /* 64-bit vectors. */
18228 case V8QImode: eltype = V8QI_type_node; break;
18229 case V4HImode: eltype = V4HI_type_node; break;
18230 case V2SImode: eltype = V2SI_type_node; break;
18231 case V2SFmode: eltype = V2SF_type_node; break;
18232 /* 128-bit vectors. */
18233 case V16QImode: eltype = V16QI_type_node; break;
18234 case V8HImode: eltype = V8HI_type_node; break;
18235 case V4SImode: eltype = V4SI_type_node; break;
18236 case V4SFmode: eltype = V4SF_type_node; break;
18237 case V2DImode: eltype = V2DI_type_node; break;
18238 default: gcc_unreachable ();
18242 if (k == 0 && !is_store)
18243 return_type = eltype;
18245 args = tree_cons (NULL_TREE, eltype, args);
18248 ftype = build_function_type (return_type, args);
18252 case NEON_RESULTPAIR:
18254 switch (insn_data[icode].operand[1].mode)
18256 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18257 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18258 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18259 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18260 case DImode: ftype = void_ftype_pdi_di_di; break;
18261 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18262 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18263 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18264 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18265 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18266 default: gcc_unreachable ();
18271 case NEON_REINTERP:
18273 /* We iterate over 5 doubleword types, then 5 quadword
18276 switch (insn_data[icode].operand[0].mode)
18278 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18279 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18280 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18281 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18282 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18283 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18284 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18285 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18286 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18287 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18288 default: gcc_unreachable ();
18294 gcc_unreachable ();
18297 gcc_assert (ftype != NULL);
18299 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18301 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18308 arm_init_fp16_builtins (void)
18310 tree fp16_type = make_node (REAL_TYPE);
18311 TYPE_PRECISION (fp16_type) = 16;
18312 layout_type (fp16_type);
18313 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18317 arm_init_builtins (void)
18319 arm_init_tls_builtins ();
18321 if (TARGET_REALLY_IWMMXT)
18322 arm_init_iwmmxt_builtins ();
18325 arm_init_neon_builtins ();
18327 if (arm_fp16_format)
18328 arm_init_fp16_builtins ();
18331 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18333 static const char *
18334 arm_invalid_parameter_type (const_tree t)
18336 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18337 return N_("function parameters cannot have __fp16 type");
18341 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18343 static const char *
18344 arm_invalid_return_type (const_tree t)
18346 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18347 return N_("functions cannot return __fp16 type");
18351 /* Implement TARGET_PROMOTED_TYPE. */
18354 arm_promoted_type (const_tree t)
18356 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18357 return float_type_node;
18361 /* Implement TARGET_CONVERT_TO_TYPE.
18362 Specifically, this hook implements the peculiarity of the ARM
18363 half-precision floating-point C semantics that requires conversions between
18364 __fp16 to or from double to do an intermediate conversion to float. */
18367 arm_convert_to_type (tree type, tree expr)
18369 tree fromtype = TREE_TYPE (expr);
18370 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18372 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18373 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18374 return convert (type, convert (float_type_node, expr));
18378 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18379 This simply adds HFmode as a supported mode; even though we don't
18380 implement arithmetic on this type directly, it's supported by
18381 optabs conversions, much the way the double-word arithmetic is
18382 special-cased in the default hook. */
18385 arm_scalar_mode_supported_p (enum machine_mode mode)
18387 if (mode == HFmode)
18388 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18390 return default_scalar_mode_supported_p (mode);
18393 /* Errors in the source file can cause expand_expr to return const0_rtx
18394 where we expect a vector. To avoid crashing, use one of the vector
18395 clear instructions. */
18398 safe_vector_operand (rtx x, enum machine_mode mode)
18400 if (x != const0_rtx)
18402 x = gen_reg_rtx (mode);
18404 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18405 : gen_rtx_SUBREG (DImode, x, 0)));
18409 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18412 arm_expand_binop_builtin (enum insn_code icode,
18413 tree exp, rtx target)
18416 tree arg0 = CALL_EXPR_ARG (exp, 0);
18417 tree arg1 = CALL_EXPR_ARG (exp, 1);
18418 rtx op0 = expand_normal (arg0);
18419 rtx op1 = expand_normal (arg1);
18420 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18421 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18422 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18424 if (VECTOR_MODE_P (mode0))
18425 op0 = safe_vector_operand (op0, mode0);
18426 if (VECTOR_MODE_P (mode1))
18427 op1 = safe_vector_operand (op1, mode1);
18430 || GET_MODE (target) != tmode
18431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18432 target = gen_reg_rtx (tmode);
18434 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18436 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18437 op0 = copy_to_mode_reg (mode0, op0);
18438 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18439 op1 = copy_to_mode_reg (mode1, op1);
18441 pat = GEN_FCN (icode) (target, op0, op1);
18448 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18451 arm_expand_unop_builtin (enum insn_code icode,
18452 tree exp, rtx target, int do_load)
18455 tree arg0 = CALL_EXPR_ARG (exp, 0);
18456 rtx op0 = expand_normal (arg0);
18457 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18458 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18461 || GET_MODE (target) != tmode
18462 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18463 target = gen_reg_rtx (tmode);
18465 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18468 if (VECTOR_MODE_P (mode0))
18469 op0 = safe_vector_operand (op0, mode0);
18471 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18472 op0 = copy_to_mode_reg (mode0, op0);
18475 pat = GEN_FCN (icode) (target, op0);
18483 neon_builtin_compare (const void *a, const void *b)
18485 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18486 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18487 unsigned int soughtcode = key->base_fcode;
18489 if (soughtcode >= memb->base_fcode
18490 && soughtcode < memb->base_fcode + memb->num_vars)
18492 else if (soughtcode < memb->base_fcode)
18498 static enum insn_code
18499 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18501 neon_builtin_datum key, *found;
18504 key.base_fcode = fcode;
18505 found = (neon_builtin_datum *)
18506 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18507 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18508 gcc_assert (found);
18509 idx = fcode - (int) found->base_fcode;
18510 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18513 *itype = found->itype;
18515 return found->codes[idx];
18519 NEON_ARG_COPY_TO_REG,
18524 #define NEON_MAX_BUILTIN_ARGS 5
18526 /* Expand a Neon builtin. */
18528 arm_expand_neon_args (rtx target, int icode, int have_retval,
18533 tree arg[NEON_MAX_BUILTIN_ARGS];
18534 rtx op[NEON_MAX_BUILTIN_ARGS];
18535 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18536 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18541 || GET_MODE (target) != tmode
18542 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18543 target = gen_reg_rtx (tmode);
18545 va_start (ap, exp);
18549 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18551 if (thisarg == NEON_ARG_STOP)
18555 arg[argc] = CALL_EXPR_ARG (exp, argc);
18556 op[argc] = expand_normal (arg[argc]);
18557 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18561 case NEON_ARG_COPY_TO_REG:
18562 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18563 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18564 (op[argc], mode[argc]))
18565 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18568 case NEON_ARG_CONSTANT:
18569 /* FIXME: This error message is somewhat unhelpful. */
18570 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18571 (op[argc], mode[argc]))
18572 error ("argument must be a constant");
18575 case NEON_ARG_STOP:
18576 gcc_unreachable ();
18589 pat = GEN_FCN (icode) (target, op[0]);
18593 pat = GEN_FCN (icode) (target, op[0], op[1]);
18597 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18601 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18605 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18609 gcc_unreachable ();
18615 pat = GEN_FCN (icode) (op[0]);
18619 pat = GEN_FCN (icode) (op[0], op[1]);
18623 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18627 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18631 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18635 gcc_unreachable ();
18646 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18647 constants defined per-instruction or per instruction-variant. Instead, the
18648 required info is looked up in the table neon_builtin_data. */
18650 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18653 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18660 return arm_expand_neon_args (target, icode, 1, exp,
18661 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18665 case NEON_SCALARMUL:
18666 case NEON_SCALARMULL:
18667 case NEON_SCALARMULH:
18668 case NEON_SHIFTINSERT:
18669 case NEON_LOGICBINOP:
18670 return arm_expand_neon_args (target, icode, 1, exp,
18671 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18675 return arm_expand_neon_args (target, icode, 1, exp,
18676 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18677 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18681 case NEON_SHIFTIMM:
18682 return arm_expand_neon_args (target, icode, 1, exp,
18683 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18687 return arm_expand_neon_args (target, icode, 1, exp,
18688 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18692 case NEON_REINTERP:
18693 return arm_expand_neon_args (target, icode, 1, exp,
18694 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18698 return arm_expand_neon_args (target, icode, 1, exp,
18699 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18701 case NEON_RESULTPAIR:
18702 return arm_expand_neon_args (target, icode, 0, exp,
18703 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18707 case NEON_LANEMULL:
18708 case NEON_LANEMULH:
18709 return arm_expand_neon_args (target, icode, 1, exp,
18710 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18711 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18714 return arm_expand_neon_args (target, icode, 1, exp,
18715 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18716 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18718 case NEON_SHIFTACC:
18719 return arm_expand_neon_args (target, icode, 1, exp,
18720 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18721 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18723 case NEON_SCALARMAC:
18724 return arm_expand_neon_args (target, icode, 1, exp,
18725 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18726 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18730 return arm_expand_neon_args (target, icode, 1, exp,
18731 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18735 case NEON_LOADSTRUCT:
18736 return arm_expand_neon_args (target, icode, 1, exp,
18737 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18739 case NEON_LOAD1LANE:
18740 case NEON_LOADSTRUCTLANE:
18741 return arm_expand_neon_args (target, icode, 1, exp,
18742 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18746 case NEON_STORESTRUCT:
18747 return arm_expand_neon_args (target, icode, 0, exp,
18748 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18750 case NEON_STORE1LANE:
18751 case NEON_STORESTRUCTLANE:
18752 return arm_expand_neon_args (target, icode, 0, exp,
18753 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18757 gcc_unreachable ();
18760 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18762 neon_reinterpret (rtx dest, rtx src)
18764 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18767 /* Emit code to place a Neon pair result in memory locations (with equal
18770 neon_emit_pair_result_insn (enum machine_mode mode,
18771 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18774 rtx mem = gen_rtx_MEM (mode, destaddr);
18775 rtx tmp1 = gen_reg_rtx (mode);
18776 rtx tmp2 = gen_reg_rtx (mode);
18778 emit_insn (intfn (tmp1, op1, tmp2, op2));
18780 emit_move_insn (mem, tmp1);
18781 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18782 emit_move_insn (mem, tmp2);
18785 /* Set up operands for a register copy from src to dest, taking care not to
18786 clobber registers in the process.
18787 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18788 be called with a large N, so that should be OK. */
18791 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18793 unsigned int copied = 0, opctr = 0;
18794 unsigned int done = (1 << count) - 1;
18797 while (copied != done)
18799 for (i = 0; i < count; i++)
18803 for (j = 0; good && j < count; j++)
18804 if (i != j && (copied & (1 << j)) == 0
18805 && reg_overlap_mentioned_p (src[j], dest[i]))
18810 operands[opctr++] = dest[i];
18811 operands[opctr++] = src[i];
18817 gcc_assert (opctr == count * 2);
18820 /* Expand an expression EXP that calls a built-in function,
18821 with result going to TARGET if that's convenient
18822 (and in mode MODE if that's convenient).
18823 SUBTARGET may be used as the target for computing one of EXP's operands.
18824 IGNORE is nonzero if the value is to be ignored. */
18827 arm_expand_builtin (tree exp,
18829 rtx subtarget ATTRIBUTE_UNUSED,
18830 enum machine_mode mode ATTRIBUTE_UNUSED,
18831 int ignore ATTRIBUTE_UNUSED)
18833 const struct builtin_description * d;
18834 enum insn_code icode;
18835 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18843 int fcode = DECL_FUNCTION_CODE (fndecl);
18845 enum machine_mode tmode;
18846 enum machine_mode mode0;
18847 enum machine_mode mode1;
18848 enum machine_mode mode2;
18850 if (fcode >= ARM_BUILTIN_NEON_BASE)
18851 return arm_expand_neon_builtin (fcode, exp, target);
18855 case ARM_BUILTIN_TEXTRMSB:
18856 case ARM_BUILTIN_TEXTRMUB:
18857 case ARM_BUILTIN_TEXTRMSH:
18858 case ARM_BUILTIN_TEXTRMUH:
18859 case ARM_BUILTIN_TEXTRMSW:
18860 case ARM_BUILTIN_TEXTRMUW:
18861 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18862 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18863 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18864 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18865 : CODE_FOR_iwmmxt_textrmw);
18867 arg0 = CALL_EXPR_ARG (exp, 0);
18868 arg1 = CALL_EXPR_ARG (exp, 1);
18869 op0 = expand_normal (arg0);
18870 op1 = expand_normal (arg1);
18871 tmode = insn_data[icode].operand[0].mode;
18872 mode0 = insn_data[icode].operand[1].mode;
18873 mode1 = insn_data[icode].operand[2].mode;
18875 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18876 op0 = copy_to_mode_reg (mode0, op0);
18877 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18879 /* @@@ better error message */
18880 error ("selector must be an immediate");
18881 return gen_reg_rtx (tmode);
18884 || GET_MODE (target) != tmode
18885 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18886 target = gen_reg_rtx (tmode);
18887 pat = GEN_FCN (icode) (target, op0, op1);
18893 case ARM_BUILTIN_TINSRB:
18894 case ARM_BUILTIN_TINSRH:
18895 case ARM_BUILTIN_TINSRW:
18896 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18897 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18898 : CODE_FOR_iwmmxt_tinsrw);
18899 arg0 = CALL_EXPR_ARG (exp, 0);
18900 arg1 = CALL_EXPR_ARG (exp, 1);
18901 arg2 = CALL_EXPR_ARG (exp, 2);
18902 op0 = expand_normal (arg0);
18903 op1 = expand_normal (arg1);
18904 op2 = expand_normal (arg2);
18905 tmode = insn_data[icode].operand[0].mode;
18906 mode0 = insn_data[icode].operand[1].mode;
18907 mode1 = insn_data[icode].operand[2].mode;
18908 mode2 = insn_data[icode].operand[3].mode;
18910 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18911 op0 = copy_to_mode_reg (mode0, op0);
18912 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18913 op1 = copy_to_mode_reg (mode1, op1);
18914 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18916 /* @@@ better error message */
18917 error ("selector must be an immediate");
18921 || GET_MODE (target) != tmode
18922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18923 target = gen_reg_rtx (tmode);
18924 pat = GEN_FCN (icode) (target, op0, op1, op2);
18930 case ARM_BUILTIN_SETWCX:
18931 arg0 = CALL_EXPR_ARG (exp, 0);
18932 arg1 = CALL_EXPR_ARG (exp, 1);
18933 op0 = force_reg (SImode, expand_normal (arg0));
18934 op1 = expand_normal (arg1);
18935 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18938 case ARM_BUILTIN_GETWCX:
18939 arg0 = CALL_EXPR_ARG (exp, 0);
18940 op0 = expand_normal (arg0);
18941 target = gen_reg_rtx (SImode);
18942 emit_insn (gen_iwmmxt_tmrc (target, op0));
18945 case ARM_BUILTIN_WSHUFH:
18946 icode = CODE_FOR_iwmmxt_wshufh;
18947 arg0 = CALL_EXPR_ARG (exp, 0);
18948 arg1 = CALL_EXPR_ARG (exp, 1);
18949 op0 = expand_normal (arg0);
18950 op1 = expand_normal (arg1);
18951 tmode = insn_data[icode].operand[0].mode;
18952 mode1 = insn_data[icode].operand[1].mode;
18953 mode2 = insn_data[icode].operand[2].mode;
18955 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18956 op0 = copy_to_mode_reg (mode1, op0);
18957 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18959 /* @@@ better error message */
18960 error ("mask must be an immediate");
18964 || GET_MODE (target) != tmode
18965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18966 target = gen_reg_rtx (tmode);
18967 pat = GEN_FCN (icode) (target, op0, op1);
18973 case ARM_BUILTIN_WSADB:
18974 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18975 case ARM_BUILTIN_WSADH:
18976 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18977 case ARM_BUILTIN_WSADBZ:
18978 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18979 case ARM_BUILTIN_WSADHZ:
18980 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18982 /* Several three-argument builtins. */
18983 case ARM_BUILTIN_WMACS:
18984 case ARM_BUILTIN_WMACU:
18985 case ARM_BUILTIN_WALIGN:
18986 case ARM_BUILTIN_TMIA:
18987 case ARM_BUILTIN_TMIAPH:
18988 case ARM_BUILTIN_TMIATT:
18989 case ARM_BUILTIN_TMIATB:
18990 case ARM_BUILTIN_TMIABT:
18991 case ARM_BUILTIN_TMIABB:
18992 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18993 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18994 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18995 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18996 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18997 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18998 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18999 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19000 : CODE_FOR_iwmmxt_walign);
19001 arg0 = CALL_EXPR_ARG (exp, 0);
19002 arg1 = CALL_EXPR_ARG (exp, 1);
19003 arg2 = CALL_EXPR_ARG (exp, 2);
19004 op0 = expand_normal (arg0);
19005 op1 = expand_normal (arg1);
19006 op2 = expand_normal (arg2);
19007 tmode = insn_data[icode].operand[0].mode;
19008 mode0 = insn_data[icode].operand[1].mode;
19009 mode1 = insn_data[icode].operand[2].mode;
19010 mode2 = insn_data[icode].operand[3].mode;
19012 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19013 op0 = copy_to_mode_reg (mode0, op0);
19014 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19015 op1 = copy_to_mode_reg (mode1, op1);
19016 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19017 op2 = copy_to_mode_reg (mode2, op2);
19019 || GET_MODE (target) != tmode
19020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19021 target = gen_reg_rtx (tmode);
19022 pat = GEN_FCN (icode) (target, op0, op1, op2);
19028 case ARM_BUILTIN_WZERO:
19029 target = gen_reg_rtx (DImode);
19030 emit_insn (gen_iwmmxt_clrdi (target));
19033 case ARM_BUILTIN_THREAD_POINTER:
19034 return arm_load_tp (target);
19040 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19041 if (d->code == (const enum arm_builtins) fcode)
19042 return arm_expand_binop_builtin (d->icode, exp, target);
19044 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19045 if (d->code == (const enum arm_builtins) fcode)
19046 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19048 /* @@@ Should really do something sensible here. */
19052 /* Return the number (counting from 0) of
19053 the least significant set bit in MASK. */
19056 number_of_first_bit_set (unsigned mask)
19061 (mask & (1 << bit)) == 0;
19068 /* Emit code to push or pop registers to or from the stack. F is the
19069 assembly file. MASK is the registers to push or pop. PUSH is
19070 nonzero if we should push, and zero if we should pop. For debugging
19071 output, if pushing, adjust CFA_OFFSET by the amount of space added
19072 to the stack. REAL_REGS should have the same number of bits set as
19073 MASK, and will be used instead (in the same order) to describe which
19074 registers were saved - this is used to mark the save slots when we
19075 push high registers after moving them to low registers. */
19077 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19078 unsigned long real_regs)
19081 int lo_mask = mask & 0xFF;
19082 int pushed_words = 0;
19086 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19088 /* Special case. Do not generate a POP PC statement here, do it in
19090 thumb_exit (f, -1);
19094 if (ARM_EABI_UNWIND_TABLES && push)
19096 fprintf (f, "\t.save\t{");
19097 for (regno = 0; regno < 15; regno++)
19099 if (real_regs & (1 << regno))
19101 if (real_regs & ((1 << regno) -1))
19103 asm_fprintf (f, "%r", regno);
19106 fprintf (f, "}\n");
19109 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19111 /* Look at the low registers first. */
19112 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19116 asm_fprintf (f, "%r", regno);
19118 if ((lo_mask & ~1) != 0)
19125 if (push && (mask & (1 << LR_REGNUM)))
19127 /* Catch pushing the LR. */
19131 asm_fprintf (f, "%r", LR_REGNUM);
19135 else if (!push && (mask & (1 << PC_REGNUM)))
19137 /* Catch popping the PC. */
19138 if (TARGET_INTERWORK || TARGET_BACKTRACE
19139 || crtl->calls_eh_return)
19141 /* The PC is never poped directly, instead
19142 it is popped into r3 and then BX is used. */
19143 fprintf (f, "}\n");
19145 thumb_exit (f, -1);
19154 asm_fprintf (f, "%r", PC_REGNUM);
19158 fprintf (f, "}\n");
19160 if (push && pushed_words && dwarf2out_do_frame ())
19162 char *l = dwarf2out_cfi_label (false);
19163 int pushed_mask = real_regs;
19165 *cfa_offset += pushed_words * 4;
19166 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19169 pushed_mask = real_regs;
19170 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19172 if (pushed_mask & 1)
19173 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19178 /* Generate code to return from a thumb function.
19179 If 'reg_containing_return_addr' is -1, then the return address is
19180 actually on the stack, at the stack pointer. */
19182 thumb_exit (FILE *f, int reg_containing_return_addr)
19184 unsigned regs_available_for_popping;
19185 unsigned regs_to_pop;
19187 unsigned available;
19191 int restore_a4 = FALSE;
19193 /* Compute the registers we need to pop. */
19197 if (reg_containing_return_addr == -1)
19199 regs_to_pop |= 1 << LR_REGNUM;
19203 if (TARGET_BACKTRACE)
19205 /* Restore the (ARM) frame pointer and stack pointer. */
19206 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19210 /* If there is nothing to pop then just emit the BX instruction and
19212 if (pops_needed == 0)
19214 if (crtl->calls_eh_return)
19215 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19217 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19220 /* Otherwise if we are not supporting interworking and we have not created
19221 a backtrace structure and the function was not entered in ARM mode then
19222 just pop the return address straight into the PC. */
19223 else if (!TARGET_INTERWORK
19224 && !TARGET_BACKTRACE
19225 && !is_called_in_ARM_mode (current_function_decl)
19226 && !crtl->calls_eh_return)
19228 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19232 /* Find out how many of the (return) argument registers we can corrupt. */
19233 regs_available_for_popping = 0;
19235 /* If returning via __builtin_eh_return, the bottom three registers
19236 all contain information needed for the return. */
19237 if (crtl->calls_eh_return)
19241 /* If we can deduce the registers used from the function's
19242 return value. This is more reliable that examining
19243 df_regs_ever_live_p () because that will be set if the register is
19244 ever used in the function, not just if the register is used
19245 to hold a return value. */
19247 if (crtl->return_rtx != 0)
19248 mode = GET_MODE (crtl->return_rtx);
19250 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19252 size = GET_MODE_SIZE (mode);
19256 /* In a void function we can use any argument register.
19257 In a function that returns a structure on the stack
19258 we can use the second and third argument registers. */
19259 if (mode == VOIDmode)
19260 regs_available_for_popping =
19261 (1 << ARG_REGISTER (1))
19262 | (1 << ARG_REGISTER (2))
19263 | (1 << ARG_REGISTER (3));
19265 regs_available_for_popping =
19266 (1 << ARG_REGISTER (2))
19267 | (1 << ARG_REGISTER (3));
19269 else if (size <= 4)
19270 regs_available_for_popping =
19271 (1 << ARG_REGISTER (2))
19272 | (1 << ARG_REGISTER (3));
19273 else if (size <= 8)
19274 regs_available_for_popping =
19275 (1 << ARG_REGISTER (3));
19278 /* Match registers to be popped with registers into which we pop them. */
19279 for (available = regs_available_for_popping,
19280 required = regs_to_pop;
19281 required != 0 && available != 0;
19282 available &= ~(available & - available),
19283 required &= ~(required & - required))
19286 /* If we have any popping registers left over, remove them. */
19288 regs_available_for_popping &= ~available;
19290 /* Otherwise if we need another popping register we can use
19291 the fourth argument register. */
19292 else if (pops_needed)
19294 /* If we have not found any free argument registers and
19295 reg a4 contains the return address, we must move it. */
19296 if (regs_available_for_popping == 0
19297 && reg_containing_return_addr == LAST_ARG_REGNUM)
19299 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19300 reg_containing_return_addr = LR_REGNUM;
19302 else if (size > 12)
19304 /* Register a4 is being used to hold part of the return value,
19305 but we have dire need of a free, low register. */
19308 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19311 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19313 /* The fourth argument register is available. */
19314 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19320 /* Pop as many registers as we can. */
19321 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19322 regs_available_for_popping);
19324 /* Process the registers we popped. */
19325 if (reg_containing_return_addr == -1)
19327 /* The return address was popped into the lowest numbered register. */
19328 regs_to_pop &= ~(1 << LR_REGNUM);
19330 reg_containing_return_addr =
19331 number_of_first_bit_set (regs_available_for_popping);
19333 /* Remove this register for the mask of available registers, so that
19334 the return address will not be corrupted by further pops. */
19335 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19338 /* If we popped other registers then handle them here. */
19339 if (regs_available_for_popping)
19343 /* Work out which register currently contains the frame pointer. */
19344 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19346 /* Move it into the correct place. */
19347 asm_fprintf (f, "\tmov\t%r, %r\n",
19348 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19350 /* (Temporarily) remove it from the mask of popped registers. */
19351 regs_available_for_popping &= ~(1 << frame_pointer);
19352 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19354 if (regs_available_for_popping)
19358 /* We popped the stack pointer as well,
19359 find the register that contains it. */
19360 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19362 /* Move it into the stack register. */
19363 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19365 /* At this point we have popped all necessary registers, so
19366 do not worry about restoring regs_available_for_popping
19367 to its correct value:
19369 assert (pops_needed == 0)
19370 assert (regs_available_for_popping == (1 << frame_pointer))
19371 assert (regs_to_pop == (1 << STACK_POINTER)) */
19375 /* Since we have just move the popped value into the frame
19376 pointer, the popping register is available for reuse, and
19377 we know that we still have the stack pointer left to pop. */
19378 regs_available_for_popping |= (1 << frame_pointer);
19382 /* If we still have registers left on the stack, but we no longer have
19383 any registers into which we can pop them, then we must move the return
19384 address into the link register and make available the register that
19386 if (regs_available_for_popping == 0 && pops_needed > 0)
19388 regs_available_for_popping |= 1 << reg_containing_return_addr;
19390 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19391 reg_containing_return_addr);
19393 reg_containing_return_addr = LR_REGNUM;
19396 /* If we have registers left on the stack then pop some more.
19397 We know that at most we will want to pop FP and SP. */
19398 if (pops_needed > 0)
19403 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19404 regs_available_for_popping);
19406 /* We have popped either FP or SP.
19407 Move whichever one it is into the correct register. */
19408 popped_into = number_of_first_bit_set (regs_available_for_popping);
19409 move_to = number_of_first_bit_set (regs_to_pop);
19411 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19413 regs_to_pop &= ~(1 << move_to);
19418 /* If we still have not popped everything then we must have only
19419 had one register available to us and we are now popping the SP. */
19420 if (pops_needed > 0)
19424 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19425 regs_available_for_popping);
19427 popped_into = number_of_first_bit_set (regs_available_for_popping);
19429 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19431 assert (regs_to_pop == (1 << STACK_POINTER))
19432 assert (pops_needed == 1)
19436 /* If necessary restore the a4 register. */
19439 if (reg_containing_return_addr != LR_REGNUM)
19441 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19442 reg_containing_return_addr = LR_REGNUM;
19445 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19448 if (crtl->calls_eh_return)
19449 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19451 /* Return to caller. */
19452 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19457 thumb1_final_prescan_insn (rtx insn)
19459 if (flag_print_asm_name)
19460 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19461 INSN_ADDRESSES (INSN_UID (insn)));
19465 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19467 unsigned HOST_WIDE_INT mask = 0xff;
19470 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19471 if (val == 0) /* XXX */
19474 for (i = 0; i < 25; i++)
19475 if ((val & (mask << i)) == val)
19481 /* Returns nonzero if the current function contains,
19482 or might contain a far jump. */
19484 thumb_far_jump_used_p (void)
19488 /* This test is only important for leaf functions. */
19489 /* assert (!leaf_function_p ()); */
19491 /* If we have already decided that far jumps may be used,
19492 do not bother checking again, and always return true even if
19493 it turns out that they are not being used. Once we have made
19494 the decision that far jumps are present (and that hence the link
19495 register will be pushed onto the stack) we cannot go back on it. */
19496 if (cfun->machine->far_jump_used)
19499 /* If this function is not being called from the prologue/epilogue
19500 generation code then it must be being called from the
19501 INITIAL_ELIMINATION_OFFSET macro. */
19502 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19504 /* In this case we know that we are being asked about the elimination
19505 of the arg pointer register. If that register is not being used,
19506 then there are no arguments on the stack, and we do not have to
19507 worry that a far jump might force the prologue to push the link
19508 register, changing the stack offsets. In this case we can just
19509 return false, since the presence of far jumps in the function will
19510 not affect stack offsets.
19512 If the arg pointer is live (or if it was live, but has now been
19513 eliminated and so set to dead) then we do have to test to see if
19514 the function might contain a far jump. This test can lead to some
19515 false negatives, since before reload is completed, then length of
19516 branch instructions is not known, so gcc defaults to returning their
19517 longest length, which in turn sets the far jump attribute to true.
19519 A false negative will not result in bad code being generated, but it
19520 will result in a needless push and pop of the link register. We
19521 hope that this does not occur too often.
19523 If we need doubleword stack alignment this could affect the other
19524 elimination offsets so we can't risk getting it wrong. */
19525 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19526 cfun->machine->arg_pointer_live = 1;
19527 else if (!cfun->machine->arg_pointer_live)
19531 /* Check to see if the function contains a branch
19532 insn with the far jump attribute set. */
19533 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19535 if (GET_CODE (insn) == JUMP_INSN
19536 /* Ignore tablejump patterns. */
19537 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19538 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19539 && get_attr_far_jump (insn) == FAR_JUMP_YES
19542 /* Record the fact that we have decided that
19543 the function does use far jumps. */
19544 cfun->machine->far_jump_used = 1;
19552 /* Return nonzero if FUNC must be entered in ARM mode. */
19554 is_called_in_ARM_mode (tree func)
19556 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19558 /* Ignore the problem about functions whose address is taken. */
19559 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19563 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19569 /* Given the stack offsets and register mask in OFFSETS, decide how
19570 many additional registers to push instead of subtracting a constant
19571 from SP. For epilogues the principle is the same except we use pop.
19572 FOR_PROLOGUE indicates which we're generating. */
19574 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
19576 HOST_WIDE_INT amount;
19577 unsigned long live_regs_mask = offsets->saved_regs_mask;
19578 /* Extract a mask of the ones we can give to the Thumb's push/pop
19580 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
19581 /* Then count how many other high registers will need to be pushed. */
19582 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19583 int n_free, reg_base;
19585 if (!for_prologue && frame_pointer_needed)
19586 amount = offsets->locals_base - offsets->saved_regs;
19588 amount = offsets->outgoing_args - offsets->saved_regs;
19590 /* If the stack frame size is 512 exactly, we can save one load
19591 instruction, which should make this a win even when optimizing
19593 if (!optimize_size && amount != 512)
19596 /* Can't do this if there are high registers to push. */
19597 if (high_regs_pushed != 0)
19600 /* Shouldn't do it in the prologue if no registers would normally
19601 be pushed at all. In the epilogue, also allow it if we'll have
19602 a pop insn for the PC. */
19605 || TARGET_BACKTRACE
19606 || (live_regs_mask & 1 << LR_REGNUM) == 0
19607 || TARGET_INTERWORK
19608 || crtl->args.pretend_args_size != 0))
19611 /* Don't do this if thumb_expand_prologue wants to emit instructions
19612 between the push and the stack frame allocation. */
19614 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
19615 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
19622 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
19623 live_regs_mask >>= reg_base;
19626 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
19627 && (for_prologue || call_used_regs[reg_base + n_free]))
19629 live_regs_mask >>= 1;
19635 gcc_assert (amount / 4 * 4 == amount);
19637 if (amount >= 512 && (amount - n_free * 4) < 512)
19638 return (amount - 508) / 4;
19639 if (amount <= n_free * 4)
19644 /* The bits which aren't usefully expanded as rtl. */
19646 thumb_unexpanded_epilogue (void)
19648 arm_stack_offsets *offsets;
19650 unsigned long live_regs_mask = 0;
19651 int high_regs_pushed = 0;
19653 int had_to_push_lr;
19656 if (cfun->machine->return_used_this_function != 0)
19659 if (IS_NAKED (arm_current_func_type ()))
19662 offsets = arm_get_frame_offsets ();
19663 live_regs_mask = offsets->saved_regs_mask;
19664 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19666 /* If we can deduce the registers used from the function's return value.
19667 This is more reliable that examining df_regs_ever_live_p () because that
19668 will be set if the register is ever used in the function, not just if
19669 the register is used to hold a return value. */
19670 size = arm_size_return_regs ();
19672 extra_pop = thumb1_extra_regs_pushed (offsets, false);
19675 unsigned long extra_mask = (1 << extra_pop) - 1;
19676 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
19679 /* The prolog may have pushed some high registers to use as
19680 work registers. e.g. the testsuite file:
19681 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19682 compiles to produce:
19683 push {r4, r5, r6, r7, lr}
19687 as part of the prolog. We have to undo that pushing here. */
19689 if (high_regs_pushed)
19691 unsigned long mask = live_regs_mask & 0xff;
19694 /* The available low registers depend on the size of the value we are
19702 /* Oh dear! We have no low registers into which we can pop
19705 ("no low registers available for popping high registers");
19707 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19708 if (live_regs_mask & (1 << next_hi_reg))
19711 while (high_regs_pushed)
19713 /* Find lo register(s) into which the high register(s) can
19715 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19717 if (mask & (1 << regno))
19718 high_regs_pushed--;
19719 if (high_regs_pushed == 0)
19723 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19725 /* Pop the values into the low register(s). */
19726 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19728 /* Move the value(s) into the high registers. */
19729 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19731 if (mask & (1 << regno))
19733 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19736 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19737 if (live_regs_mask & (1 << next_hi_reg))
19742 live_regs_mask &= ~0x0f00;
19745 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19746 live_regs_mask &= 0xff;
19748 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19750 /* Pop the return address into the PC. */
19751 if (had_to_push_lr)
19752 live_regs_mask |= 1 << PC_REGNUM;
19754 /* Either no argument registers were pushed or a backtrace
19755 structure was created which includes an adjusted stack
19756 pointer, so just pop everything. */
19757 if (live_regs_mask)
19758 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19761 /* We have either just popped the return address into the
19762 PC or it is was kept in LR for the entire function.
19763 Note that thumb_pushpop has already called thumb_exit if the
19764 PC was in the list. */
19765 if (!had_to_push_lr)
19766 thumb_exit (asm_out_file, LR_REGNUM);
19770 /* Pop everything but the return address. */
19771 if (live_regs_mask)
19772 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19775 if (had_to_push_lr)
19779 /* We have no free low regs, so save one. */
19780 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19784 /* Get the return address into a temporary register. */
19785 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19786 1 << LAST_ARG_REGNUM);
19790 /* Move the return address to lr. */
19791 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19793 /* Restore the low register. */
19794 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19799 regno = LAST_ARG_REGNUM;
19804 /* Remove the argument registers that were pushed onto the stack. */
19805 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19806 SP_REGNUM, SP_REGNUM,
19807 crtl->args.pretend_args_size);
19809 thumb_exit (asm_out_file, regno);
19815 /* Functions to save and restore machine-specific function data. */
19816 static struct machine_function *
19817 arm_init_machine_status (void)
19819 struct machine_function *machine;
19820 machine = ggc_alloc_cleared_machine_function ();
19822 #if ARM_FT_UNKNOWN != 0
19823 machine->func_type = ARM_FT_UNKNOWN;
19828 /* Return an RTX indicating where the return address to the
19829 calling function can be found. */
19831 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19836 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19839 /* Do anything needed before RTL is emitted for each function. */
19841 arm_init_expanders (void)
19843 /* Arrange to initialize and mark the machine per-function status. */
19844 init_machine_status = arm_init_machine_status;
19846 /* This is to stop the combine pass optimizing away the alignment
19847 adjustment of va_arg. */
19848 /* ??? It is claimed that this should not be necessary. */
19850 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19854 /* Like arm_compute_initial_elimination offset. Simpler because there
19855 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19856 to point at the base of the local variables after static stack
19857 space for a function has been allocated. */
19860 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19862 arm_stack_offsets *offsets;
19864 offsets = arm_get_frame_offsets ();
19868 case ARG_POINTER_REGNUM:
19871 case STACK_POINTER_REGNUM:
19872 return offsets->outgoing_args - offsets->saved_args;
19874 case FRAME_POINTER_REGNUM:
19875 return offsets->soft_frame - offsets->saved_args;
19877 case ARM_HARD_FRAME_POINTER_REGNUM:
19878 return offsets->saved_regs - offsets->saved_args;
19880 case THUMB_HARD_FRAME_POINTER_REGNUM:
19881 return offsets->locals_base - offsets->saved_args;
19884 gcc_unreachable ();
19888 case FRAME_POINTER_REGNUM:
19891 case STACK_POINTER_REGNUM:
19892 return offsets->outgoing_args - offsets->soft_frame;
19894 case ARM_HARD_FRAME_POINTER_REGNUM:
19895 return offsets->saved_regs - offsets->soft_frame;
19897 case THUMB_HARD_FRAME_POINTER_REGNUM:
19898 return offsets->locals_base - offsets->soft_frame;
19901 gcc_unreachable ();
19906 gcc_unreachable ();
19910 /* Generate the rest of a function's prologue. */
19912 thumb1_expand_prologue (void)
19916 HOST_WIDE_INT amount;
19917 arm_stack_offsets *offsets;
19918 unsigned long func_type;
19920 unsigned long live_regs_mask;
19922 func_type = arm_current_func_type ();
19924 /* Naked functions don't have prologues. */
19925 if (IS_NAKED (func_type))
19928 if (IS_INTERRUPT (func_type))
19930 error ("interrupt Service Routines cannot be coded in Thumb mode");
19934 offsets = arm_get_frame_offsets ();
19935 live_regs_mask = offsets->saved_regs_mask;
19936 /* Load the pic register before setting the frame pointer,
19937 so we can use r7 as a temporary work register. */
19938 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19939 arm_load_pic_register (live_regs_mask);
19941 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19942 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19943 stack_pointer_rtx);
19945 amount = offsets->outgoing_args - offsets->saved_regs;
19946 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
19951 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19952 GEN_INT (- amount)));
19953 RTX_FRAME_RELATED_P (insn) = 1;
19959 /* The stack decrement is too big for an immediate value in a single
19960 insn. In theory we could issue multiple subtracts, but after
19961 three of them it becomes more space efficient to place the full
19962 value in the constant pool and load into a register. (Also the
19963 ARM debugger really likes to see only one stack decrement per
19964 function). So instead we look for a scratch register into which
19965 we can load the decrement, and then we subtract this from the
19966 stack pointer. Unfortunately on the thumb the only available
19967 scratch registers are the argument registers, and we cannot use
19968 these as they may hold arguments to the function. Instead we
19969 attempt to locate a call preserved register which is used by this
19970 function. If we can find one, then we know that it will have
19971 been pushed at the start of the prologue and so we can corrupt
19973 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19974 if (live_regs_mask & (1 << regno))
19977 gcc_assert(regno <= LAST_LO_REGNUM);
19979 reg = gen_rtx_REG (SImode, regno);
19981 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19983 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19984 stack_pointer_rtx, reg));
19985 RTX_FRAME_RELATED_P (insn) = 1;
19986 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19987 plus_constant (stack_pointer_rtx,
19989 RTX_FRAME_RELATED_P (dwarf) = 1;
19990 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19994 if (frame_pointer_needed)
19995 thumb_set_frame_pointer (offsets);
19997 /* If we are profiling, make sure no instructions are scheduled before
19998 the call to mcount. Similarly if the user has requested no
19999 scheduling in the prolog. Similarly if we want non-call exceptions
20000 using the EABI unwinder, to prevent faulting instructions from being
20001 swapped with a stack adjustment. */
20002 if (crtl->profile || !TARGET_SCHED_PROLOG
20003 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20004 emit_insn (gen_blockage ());
20006 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20007 if (live_regs_mask & 0xff)
20008 cfun->machine->lr_save_eliminated = 0;
20013 thumb1_expand_epilogue (void)
20015 HOST_WIDE_INT amount;
20016 arm_stack_offsets *offsets;
20019 /* Naked functions don't have prologues. */
20020 if (IS_NAKED (arm_current_func_type ()))
20023 offsets = arm_get_frame_offsets ();
20024 amount = offsets->outgoing_args - offsets->saved_regs;
20026 if (frame_pointer_needed)
20028 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20029 amount = offsets->locals_base - offsets->saved_regs;
20031 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20033 gcc_assert (amount >= 0);
20037 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20038 GEN_INT (amount)));
20041 /* r3 is always free in the epilogue. */
20042 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20044 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20045 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20049 /* Emit a USE (stack_pointer_rtx), so that
20050 the stack adjustment will not be deleted. */
20051 emit_insn (gen_prologue_use (stack_pointer_rtx));
20053 if (crtl->profile || !TARGET_SCHED_PROLOG)
20054 emit_insn (gen_blockage ());
20056 /* Emit a clobber for each insn that will be restored in the epilogue,
20057 so that flow2 will get register lifetimes correct. */
20058 for (regno = 0; regno < 13; regno++)
20059 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20060 emit_clobber (gen_rtx_REG (SImode, regno));
20062 if (! df_regs_ever_live_p (LR_REGNUM))
20063 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20067 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20069 arm_stack_offsets *offsets;
20070 unsigned long live_regs_mask = 0;
20071 unsigned long l_mask;
20072 unsigned high_regs_pushed = 0;
20073 int cfa_offset = 0;
20076 if (IS_NAKED (arm_current_func_type ()))
20079 if (is_called_in_ARM_mode (current_function_decl))
20083 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20084 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20086 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20088 /* Generate code sequence to switch us into Thumb mode. */
20089 /* The .code 32 directive has already been emitted by
20090 ASM_DECLARE_FUNCTION_NAME. */
20091 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20092 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20094 /* Generate a label, so that the debugger will notice the
20095 change in instruction sets. This label is also used by
20096 the assembler to bypass the ARM code when this function
20097 is called from a Thumb encoded function elsewhere in the
20098 same file. Hence the definition of STUB_NAME here must
20099 agree with the definition in gas/config/tc-arm.c. */
20101 #define STUB_NAME ".real_start_of"
20103 fprintf (f, "\t.code\t16\n");
20105 if (arm_dllexport_name_p (name))
20106 name = arm_strip_name_encoding (name);
20108 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20109 fprintf (f, "\t.thumb_func\n");
20110 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20113 if (crtl->args.pretend_args_size)
20115 /* Output unwind directive for the stack adjustment. */
20116 if (ARM_EABI_UNWIND_TABLES)
20117 fprintf (f, "\t.pad #%d\n",
20118 crtl->args.pretend_args_size);
20120 if (cfun->machine->uses_anonymous_args)
20124 fprintf (f, "\tpush\t{");
20126 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20128 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20129 regno <= LAST_ARG_REGNUM;
20131 asm_fprintf (f, "%r%s", regno,
20132 regno == LAST_ARG_REGNUM ? "" : ", ");
20134 fprintf (f, "}\n");
20137 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20138 SP_REGNUM, SP_REGNUM,
20139 crtl->args.pretend_args_size);
20141 /* We don't need to record the stores for unwinding (would it
20142 help the debugger any if we did?), but record the change in
20143 the stack pointer. */
20144 if (dwarf2out_do_frame ())
20146 char *l = dwarf2out_cfi_label (false);
20148 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20149 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20153 /* Get the registers we are going to push. */
20154 offsets = arm_get_frame_offsets ();
20155 live_regs_mask = offsets->saved_regs_mask;
20156 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20157 l_mask = live_regs_mask & 0x40ff;
20158 /* Then count how many other high registers will need to be pushed. */
20159 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20161 if (TARGET_BACKTRACE)
20164 unsigned work_register;
20166 /* We have been asked to create a stack backtrace structure.
20167 The code looks like this:
20171 0 sub SP, #16 Reserve space for 4 registers.
20172 2 push {R7} Push low registers.
20173 4 add R7, SP, #20 Get the stack pointer before the push.
20174 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20175 8 mov R7, PC Get hold of the start of this code plus 12.
20176 10 str R7, [SP, #16] Store it.
20177 12 mov R7, FP Get hold of the current frame pointer.
20178 14 str R7, [SP, #4] Store it.
20179 16 mov R7, LR Get hold of the current return address.
20180 18 str R7, [SP, #12] Store it.
20181 20 add R7, SP, #16 Point at the start of the backtrace structure.
20182 22 mov FP, R7 Put this value into the frame pointer. */
20184 work_register = thumb_find_work_register (live_regs_mask);
20186 if (ARM_EABI_UNWIND_TABLES)
20187 asm_fprintf (f, "\t.pad #16\n");
20190 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20191 SP_REGNUM, SP_REGNUM);
20193 if (dwarf2out_do_frame ())
20195 char *l = dwarf2out_cfi_label (false);
20197 cfa_offset = cfa_offset + 16;
20198 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20203 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20204 offset = bit_count (l_mask) * UNITS_PER_WORD;
20209 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20210 offset + 16 + crtl->args.pretend_args_size);
20212 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20215 /* Make sure that the instruction fetching the PC is in the right place
20216 to calculate "start of backtrace creation code + 12". */
20219 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20220 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20222 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20223 ARM_HARD_FRAME_POINTER_REGNUM);
20224 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20229 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20230 ARM_HARD_FRAME_POINTER_REGNUM);
20231 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20233 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20234 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20238 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20239 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20241 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20243 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20244 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20246 /* Optimization: If we are not pushing any low registers but we are going
20247 to push some high registers then delay our first push. This will just
20248 be a push of LR and we can combine it with the push of the first high
20250 else if ((l_mask & 0xff) != 0
20251 || (high_regs_pushed == 0 && l_mask))
20253 unsigned long mask = l_mask;
20254 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20255 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20258 if (high_regs_pushed)
20260 unsigned pushable_regs;
20261 unsigned next_hi_reg;
20263 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20264 if (live_regs_mask & (1 << next_hi_reg))
20267 pushable_regs = l_mask & 0xff;
20269 if (pushable_regs == 0)
20270 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20272 while (high_regs_pushed > 0)
20274 unsigned long real_regs_mask = 0;
20276 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20278 if (pushable_regs & (1 << regno))
20280 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20282 high_regs_pushed --;
20283 real_regs_mask |= (1 << next_hi_reg);
20285 if (high_regs_pushed)
20287 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20289 if (live_regs_mask & (1 << next_hi_reg))
20294 pushable_regs &= ~((1 << regno) - 1);
20300 /* If we had to find a work register and we have not yet
20301 saved the LR then add it to the list of regs to push. */
20302 if (l_mask == (1 << LR_REGNUM))
20304 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20306 real_regs_mask | (1 << LR_REGNUM));
20310 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20315 /* Handle the case of a double word load into a low register from
20316 a computed memory address. The computed address may involve a
20317 register which is overwritten by the load. */
20319 thumb_load_double_from_address (rtx *operands)
20327 gcc_assert (GET_CODE (operands[0]) == REG);
20328 gcc_assert (GET_CODE (operands[1]) == MEM);
20330 /* Get the memory address. */
20331 addr = XEXP (operands[1], 0);
20333 /* Work out how the memory address is computed. */
20334 switch (GET_CODE (addr))
20337 operands[2] = adjust_address (operands[1], SImode, 4);
20339 if (REGNO (operands[0]) == REGNO (addr))
20341 output_asm_insn ("ldr\t%H0, %2", operands);
20342 output_asm_insn ("ldr\t%0, %1", operands);
20346 output_asm_insn ("ldr\t%0, %1", operands);
20347 output_asm_insn ("ldr\t%H0, %2", operands);
20352 /* Compute <address> + 4 for the high order load. */
20353 operands[2] = adjust_address (operands[1], SImode, 4);
20355 output_asm_insn ("ldr\t%0, %1", operands);
20356 output_asm_insn ("ldr\t%H0, %2", operands);
20360 arg1 = XEXP (addr, 0);
20361 arg2 = XEXP (addr, 1);
20363 if (CONSTANT_P (arg1))
20364 base = arg2, offset = arg1;
20366 base = arg1, offset = arg2;
20368 gcc_assert (GET_CODE (base) == REG);
20370 /* Catch the case of <address> = <reg> + <reg> */
20371 if (GET_CODE (offset) == REG)
20373 int reg_offset = REGNO (offset);
20374 int reg_base = REGNO (base);
20375 int reg_dest = REGNO (operands[0]);
20377 /* Add the base and offset registers together into the
20378 higher destination register. */
20379 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20380 reg_dest + 1, reg_base, reg_offset);
20382 /* Load the lower destination register from the address in
20383 the higher destination register. */
20384 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20385 reg_dest, reg_dest + 1);
20387 /* Load the higher destination register from its own address
20389 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20390 reg_dest + 1, reg_dest + 1);
20394 /* Compute <address> + 4 for the high order load. */
20395 operands[2] = adjust_address (operands[1], SImode, 4);
20397 /* If the computed address is held in the low order register
20398 then load the high order register first, otherwise always
20399 load the low order register first. */
20400 if (REGNO (operands[0]) == REGNO (base))
20402 output_asm_insn ("ldr\t%H0, %2", operands);
20403 output_asm_insn ("ldr\t%0, %1", operands);
20407 output_asm_insn ("ldr\t%0, %1", operands);
20408 output_asm_insn ("ldr\t%H0, %2", operands);
20414 /* With no registers to worry about we can just load the value
20416 operands[2] = adjust_address (operands[1], SImode, 4);
20418 output_asm_insn ("ldr\t%H0, %2", operands);
20419 output_asm_insn ("ldr\t%0, %1", operands);
20423 gcc_unreachable ();
20430 thumb_output_move_mem_multiple (int n, rtx *operands)
20437 if (REGNO (operands[4]) > REGNO (operands[5]))
20440 operands[4] = operands[5];
20443 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20444 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20448 if (REGNO (operands[4]) > REGNO (operands[5]))
20451 operands[4] = operands[5];
20454 if (REGNO (operands[5]) > REGNO (operands[6]))
20457 operands[5] = operands[6];
20460 if (REGNO (operands[4]) > REGNO (operands[5]))
20463 operands[4] = operands[5];
20467 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20468 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20472 gcc_unreachable ();
20478 /* Output a call-via instruction for thumb state. */
20480 thumb_call_via_reg (rtx reg)
20482 int regno = REGNO (reg);
20485 gcc_assert (regno < LR_REGNUM);
20487 /* If we are in the normal text section we can use a single instance
20488 per compilation unit. If we are doing function sections, then we need
20489 an entry per section, since we can't rely on reachability. */
20490 if (in_section == text_section)
20492 thumb_call_reg_needed = 1;
20494 if (thumb_call_via_label[regno] == NULL)
20495 thumb_call_via_label[regno] = gen_label_rtx ();
20496 labelp = thumb_call_via_label + regno;
20500 if (cfun->machine->call_via[regno] == NULL)
20501 cfun->machine->call_via[regno] = gen_label_rtx ();
20502 labelp = cfun->machine->call_via + regno;
20505 output_asm_insn ("bl\t%a0", labelp);
20509 /* Routines for generating rtl. */
20511 thumb_expand_movmemqi (rtx *operands)
20513 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20514 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20515 HOST_WIDE_INT len = INTVAL (operands[2]);
20516 HOST_WIDE_INT offset = 0;
20520 emit_insn (gen_movmem12b (out, in, out, in));
20526 emit_insn (gen_movmem8b (out, in, out, in));
20532 rtx reg = gen_reg_rtx (SImode);
20533 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20534 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20541 rtx reg = gen_reg_rtx (HImode);
20542 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20543 plus_constant (in, offset))));
20544 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20552 rtx reg = gen_reg_rtx (QImode);
20553 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20554 plus_constant (in, offset))));
20555 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20561 thumb_reload_out_hi (rtx *operands)
20563 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20566 /* Handle reading a half-word from memory during reload. */
20568 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20570 gcc_unreachable ();
20573 /* Return the length of a function name prefix
20574 that starts with the character 'c'. */
20576 arm_get_strip_length (int c)
20580 ARM_NAME_ENCODING_LENGTHS
20585 /* Return a pointer to a function's name with any
20586 and all prefix encodings stripped from it. */
20588 arm_strip_name_encoding (const char *name)
20592 while ((skip = arm_get_strip_length (* name)))
20598 /* If there is a '*' anywhere in the name's prefix, then
20599 emit the stripped name verbatim, otherwise prepend an
20600 underscore if leading underscores are being used. */
20602 arm_asm_output_labelref (FILE *stream, const char *name)
20607 while ((skip = arm_get_strip_length (* name)))
20609 verbatim |= (*name == '*');
20614 fputs (name, stream);
20616 asm_fprintf (stream, "%U%s", name);
20620 arm_file_start (void)
20624 if (TARGET_UNIFIED_ASM)
20625 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20629 const char *fpu_name;
20630 if (arm_selected_arch)
20631 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20633 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20635 if (TARGET_SOFT_FLOAT)
20638 fpu_name = "softvfp";
20640 fpu_name = "softfpa";
20644 fpu_name = arm_fpu_desc->name;
20645 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20647 if (TARGET_HARD_FLOAT)
20648 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20649 if (TARGET_HARD_FLOAT_ABI)
20650 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20653 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20655 /* Some of these attributes only apply when the corresponding features
20656 are used. However we don't have any easy way of figuring this out.
20657 Conservatively record the setting that would have been used. */
20659 /* Tag_ABI_FP_rounding. */
20660 if (flag_rounding_math)
20661 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20662 if (!flag_unsafe_math_optimizations)
20664 /* Tag_ABI_FP_denomal. */
20665 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20666 /* Tag_ABI_FP_exceptions. */
20667 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20669 /* Tag_ABI_FP_user_exceptions. */
20670 if (flag_signaling_nans)
20671 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20672 /* Tag_ABI_FP_number_model. */
20673 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20674 flag_finite_math_only ? 1 : 3);
20676 /* Tag_ABI_align8_needed. */
20677 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20678 /* Tag_ABI_align8_preserved. */
20679 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20680 /* Tag_ABI_enum_size. */
20681 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20682 flag_short_enums ? 1 : 2);
20684 /* Tag_ABI_optimization_goals. */
20687 else if (optimize >= 2)
20693 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20695 /* Tag_ABI_FP_16bit_format. */
20696 if (arm_fp16_format)
20697 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20698 (int)arm_fp16_format);
20700 if (arm_lang_output_object_attributes_hook)
20701 arm_lang_output_object_attributes_hook();
20703 default_file_start();
20707 arm_file_end (void)
20711 if (NEED_INDICATE_EXEC_STACK)
20712 /* Add .note.GNU-stack. */
20713 file_end_indicate_exec_stack ();
20715 if (! thumb_call_reg_needed)
20718 switch_to_section (text_section);
20719 asm_fprintf (asm_out_file, "\t.code 16\n");
20720 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20722 for (regno = 0; regno < LR_REGNUM; regno++)
20724 rtx label = thumb_call_via_label[regno];
20728 targetm.asm_out.internal_label (asm_out_file, "L",
20729 CODE_LABEL_NUMBER (label));
20730 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20736 /* Symbols in the text segment can be accessed without indirecting via the
20737 constant pool; it may take an extra binary operation, but this is still
20738 faster than indirecting via memory. Don't do this when not optimizing,
20739 since we won't be calculating al of the offsets necessary to do this
20743 arm_encode_section_info (tree decl, rtx rtl, int first)
20745 if (optimize > 0 && TREE_CONSTANT (decl))
20746 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20748 default_encode_section_info (decl, rtl, first);
20750 #endif /* !ARM_PE */
20753 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20755 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20756 && !strcmp (prefix, "L"))
20758 arm_ccfsm_state = 0;
20759 arm_target_insn = NULL;
20761 default_internal_label (stream, prefix, labelno);
20764 /* Output code to add DELTA to the first argument, and then jump
20765 to FUNCTION. Used for C++ multiple inheritance. */
20767 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20768 HOST_WIDE_INT delta,
20769 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20772 static int thunk_label = 0;
20775 int mi_delta = delta;
20776 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20778 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20781 mi_delta = - mi_delta;
20785 int labelno = thunk_label++;
20786 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20787 /* Thunks are entered in arm mode when avaiable. */
20788 if (TARGET_THUMB1_ONLY)
20790 /* push r3 so we can use it as a temporary. */
20791 /* TODO: Omit this save if r3 is not used. */
20792 fputs ("\tpush {r3}\n", file);
20793 fputs ("\tldr\tr3, ", file);
20797 fputs ("\tldr\tr12, ", file);
20799 assemble_name (file, label);
20800 fputc ('\n', file);
20803 /* If we are generating PIC, the ldr instruction below loads
20804 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20805 the address of the add + 8, so we have:
20807 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20810 Note that we have "+ 1" because some versions of GNU ld
20811 don't set the low bit of the result for R_ARM_REL32
20812 relocations against thumb function symbols.
20813 On ARMv6M this is +4, not +8. */
20814 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20815 assemble_name (file, labelpc);
20816 fputs (":\n", file);
20817 if (TARGET_THUMB1_ONLY)
20819 /* This is 2 insns after the start of the thunk, so we know it
20820 is 4-byte aligned. */
20821 fputs ("\tadd\tr3, pc, r3\n", file);
20822 fputs ("\tmov r12, r3\n", file);
20825 fputs ("\tadd\tr12, pc, r12\n", file);
20827 else if (TARGET_THUMB1_ONLY)
20828 fputs ("\tmov r12, r3\n", file);
20830 if (TARGET_THUMB1_ONLY)
20832 if (mi_delta > 255)
20834 fputs ("\tldr\tr3, ", file);
20835 assemble_name (file, label);
20836 fputs ("+4\n", file);
20837 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20838 mi_op, this_regno, this_regno);
20840 else if (mi_delta != 0)
20842 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20843 mi_op, this_regno, this_regno,
20849 /* TODO: Use movw/movt for large constants when available. */
20850 while (mi_delta != 0)
20852 if ((mi_delta & (3 << shift)) == 0)
20856 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20857 mi_op, this_regno, this_regno,
20858 mi_delta & (0xff << shift));
20859 mi_delta &= ~(0xff << shift);
20866 if (TARGET_THUMB1_ONLY)
20867 fputs ("\tpop\t{r3}\n", file);
20869 fprintf (file, "\tbx\tr12\n");
20870 ASM_OUTPUT_ALIGN (file, 2);
20871 assemble_name (file, label);
20872 fputs (":\n", file);
20875 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20876 rtx tem = XEXP (DECL_RTL (function), 0);
20877 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20878 tem = gen_rtx_MINUS (GET_MODE (tem),
20880 gen_rtx_SYMBOL_REF (Pmode,
20881 ggc_strdup (labelpc)));
20882 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20885 /* Output ".word .LTHUNKn". */
20886 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20888 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20889 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20893 fputs ("\tb\t", file);
20894 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20895 if (NEED_PLT_RELOC)
20896 fputs ("(PLT)", file);
20897 fputc ('\n', file);
20902 arm_emit_vector_const (FILE *file, rtx x)
20905 const char * pattern;
20907 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20909 switch (GET_MODE (x))
20911 case V2SImode: pattern = "%08x"; break;
20912 case V4HImode: pattern = "%04x"; break;
20913 case V8QImode: pattern = "%02x"; break;
20914 default: gcc_unreachable ();
20917 fprintf (file, "0x");
20918 for (i = CONST_VECTOR_NUNITS (x); i--;)
20922 element = CONST_VECTOR_ELT (x, i);
20923 fprintf (file, pattern, INTVAL (element));
20929 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20930 HFmode constant pool entries are actually loaded with ldr. */
20932 arm_emit_fp16_const (rtx c)
20937 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20938 bits = real_to_target (NULL, &r, HFmode);
20939 if (WORDS_BIG_ENDIAN)
20940 assemble_zeros (2);
20941 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20942 if (!WORDS_BIG_ENDIAN)
20943 assemble_zeros (2);
20947 arm_output_load_gr (rtx *operands)
20954 if (GET_CODE (operands [1]) != MEM
20955 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20956 || GET_CODE (reg = XEXP (sum, 0)) != REG
20957 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20958 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20959 return "wldrw%?\t%0, %1";
20961 /* Fix up an out-of-range load of a GR register. */
20962 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20963 wcgr = operands[0];
20965 output_asm_insn ("ldr%?\t%0, %1", operands);
20967 operands[0] = wcgr;
20969 output_asm_insn ("tmcr%?\t%0, %1", operands);
20970 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20975 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20977 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20978 named arg and all anonymous args onto the stack.
20979 XXX I know the prologue shouldn't be pushing registers, but it is faster
20983 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20984 enum machine_mode mode,
20987 int second_time ATTRIBUTE_UNUSED)
20991 cfun->machine->uses_anonymous_args = 1;
20992 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20994 nregs = pcum->aapcs_ncrn;
20995 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20999 nregs = pcum->nregs;
21001 if (nregs < NUM_ARG_REGS)
21002 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21005 /* Return nonzero if the CONSUMER instruction (a store) does not need
21006 PRODUCER's value to calculate the address. */
21009 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21011 rtx value = PATTERN (producer);
21012 rtx addr = PATTERN (consumer);
21014 if (GET_CODE (value) == COND_EXEC)
21015 value = COND_EXEC_CODE (value);
21016 if (GET_CODE (value) == PARALLEL)
21017 value = XVECEXP (value, 0, 0);
21018 value = XEXP (value, 0);
21019 if (GET_CODE (addr) == COND_EXEC)
21020 addr = COND_EXEC_CODE (addr);
21021 if (GET_CODE (addr) == PARALLEL)
21022 addr = XVECEXP (addr, 0, 0);
21023 addr = XEXP (addr, 0);
21025 return !reg_overlap_mentioned_p (value, addr);
21028 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21029 have an early register shift value or amount dependency on the
21030 result of PRODUCER. */
21033 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21035 rtx value = PATTERN (producer);
21036 rtx op = PATTERN (consumer);
21039 if (GET_CODE (value) == COND_EXEC)
21040 value = COND_EXEC_CODE (value);
21041 if (GET_CODE (value) == PARALLEL)
21042 value = XVECEXP (value, 0, 0);
21043 value = XEXP (value, 0);
21044 if (GET_CODE (op) == COND_EXEC)
21045 op = COND_EXEC_CODE (op);
21046 if (GET_CODE (op) == PARALLEL)
21047 op = XVECEXP (op, 0, 0);
21050 early_op = XEXP (op, 0);
21051 /* This is either an actual independent shift, or a shift applied to
21052 the first operand of another operation. We want the whole shift
21054 if (GET_CODE (early_op) == REG)
21057 return !reg_overlap_mentioned_p (value, early_op);
21060 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21061 have an early register shift value dependency on the result of
21065 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21067 rtx value = PATTERN (producer);
21068 rtx op = PATTERN (consumer);
21071 if (GET_CODE (value) == COND_EXEC)
21072 value = COND_EXEC_CODE (value);
21073 if (GET_CODE (value) == PARALLEL)
21074 value = XVECEXP (value, 0, 0);
21075 value = XEXP (value, 0);
21076 if (GET_CODE (op) == COND_EXEC)
21077 op = COND_EXEC_CODE (op);
21078 if (GET_CODE (op) == PARALLEL)
21079 op = XVECEXP (op, 0, 0);
21082 early_op = XEXP (op, 0);
21084 /* This is either an actual independent shift, or a shift applied to
21085 the first operand of another operation. We want the value being
21086 shifted, in either case. */
21087 if (GET_CODE (early_op) != REG)
21088 early_op = XEXP (early_op, 0);
21090 return !reg_overlap_mentioned_p (value, early_op);
21093 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21094 have an early register mult dependency on the result of
21098 arm_no_early_mul_dep (rtx producer, rtx consumer)
21100 rtx value = PATTERN (producer);
21101 rtx op = PATTERN (consumer);
21103 if (GET_CODE (value) == COND_EXEC)
21104 value = COND_EXEC_CODE (value);
21105 if (GET_CODE (value) == PARALLEL)
21106 value = XVECEXP (value, 0, 0);
21107 value = XEXP (value, 0);
21108 if (GET_CODE (op) == COND_EXEC)
21109 op = COND_EXEC_CODE (op);
21110 if (GET_CODE (op) == PARALLEL)
21111 op = XVECEXP (op, 0, 0);
21114 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21116 if (GET_CODE (XEXP (op, 0)) == MULT)
21117 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21119 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21125 /* We can't rely on the caller doing the proper promotion when
21126 using APCS or ATPCS. */
21129 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21131 return !TARGET_AAPCS_BASED;
21134 static enum machine_mode
21135 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21136 enum machine_mode mode,
21137 int *punsignedp ATTRIBUTE_UNUSED,
21138 const_tree fntype ATTRIBUTE_UNUSED,
21139 int for_return ATTRIBUTE_UNUSED)
21141 if (GET_MODE_CLASS (mode) == MODE_INT
21142 && GET_MODE_SIZE (mode) < 4)
21148 /* AAPCS based ABIs use short enums by default. */
21151 arm_default_short_enums (void)
21153 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21157 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21160 arm_align_anon_bitfield (void)
21162 return TARGET_AAPCS_BASED;
21166 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21169 arm_cxx_guard_type (void)
21171 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21174 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21175 has an accumulator dependency on the result of the producer (a
21176 multiplication instruction) and no other dependency on that result. */
21178 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21180 rtx mul = PATTERN (producer);
21181 rtx mac = PATTERN (consumer);
21183 rtx mac_op0, mac_op1, mac_acc;
21185 if (GET_CODE (mul) == COND_EXEC)
21186 mul = COND_EXEC_CODE (mul);
21187 if (GET_CODE (mac) == COND_EXEC)
21188 mac = COND_EXEC_CODE (mac);
21190 /* Check that mul is of the form (set (...) (mult ...))
21191 and mla is of the form (set (...) (plus (mult ...) (...))). */
21192 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21193 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21194 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21197 mul_result = XEXP (mul, 0);
21198 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21199 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21200 mac_acc = XEXP (XEXP (mac, 1), 1);
21202 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21203 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21204 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21208 /* The EABI says test the least significant bit of a guard variable. */
21211 arm_cxx_guard_mask_bit (void)
21213 return TARGET_AAPCS_BASED;
21217 /* The EABI specifies that all array cookies are 8 bytes long. */
21220 arm_get_cookie_size (tree type)
21224 if (!TARGET_AAPCS_BASED)
21225 return default_cxx_get_cookie_size (type);
21227 size = build_int_cst (sizetype, 8);
21232 /* The EABI says that array cookies should also contain the element size. */
21235 arm_cookie_has_size (void)
21237 return TARGET_AAPCS_BASED;
21241 /* The EABI says constructors and destructors should return a pointer to
21242 the object constructed/destroyed. */
21245 arm_cxx_cdtor_returns_this (void)
21247 return TARGET_AAPCS_BASED;
21250 /* The EABI says that an inline function may never be the key
21254 arm_cxx_key_method_may_be_inline (void)
21256 return !TARGET_AAPCS_BASED;
21260 arm_cxx_determine_class_data_visibility (tree decl)
21262 if (!TARGET_AAPCS_BASED
21263 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21266 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21267 is exported. However, on systems without dynamic vague linkage,
21268 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21269 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21270 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21272 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21273 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21277 arm_cxx_class_data_always_comdat (void)
21279 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21280 vague linkage if the class has no key function. */
21281 return !TARGET_AAPCS_BASED;
21285 /* The EABI says __aeabi_atexit should be used to register static
21289 arm_cxx_use_aeabi_atexit (void)
21291 return TARGET_AAPCS_BASED;
21296 arm_set_return_address (rtx source, rtx scratch)
21298 arm_stack_offsets *offsets;
21299 HOST_WIDE_INT delta;
21301 unsigned long saved_regs;
21303 offsets = arm_get_frame_offsets ();
21304 saved_regs = offsets->saved_regs_mask;
21306 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21307 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21310 if (frame_pointer_needed)
21311 addr = plus_constant(hard_frame_pointer_rtx, -4);
21314 /* LR will be the first saved register. */
21315 delta = offsets->outgoing_args - (offsets->frame + 4);
21320 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21321 GEN_INT (delta & ~4095)));
21326 addr = stack_pointer_rtx;
21328 addr = plus_constant (addr, delta);
21330 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21336 thumb_set_return_address (rtx source, rtx scratch)
21338 arm_stack_offsets *offsets;
21339 HOST_WIDE_INT delta;
21340 HOST_WIDE_INT limit;
21343 unsigned long mask;
21347 offsets = arm_get_frame_offsets ();
21348 mask = offsets->saved_regs_mask;
21349 if (mask & (1 << LR_REGNUM))
21352 /* Find the saved regs. */
21353 if (frame_pointer_needed)
21355 delta = offsets->soft_frame - offsets->saved_args;
21356 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21362 delta = offsets->outgoing_args - offsets->saved_args;
21365 /* Allow for the stack frame. */
21366 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21368 /* The link register is always the first saved register. */
21371 /* Construct the address. */
21372 addr = gen_rtx_REG (SImode, reg);
21375 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21376 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21380 addr = plus_constant (addr, delta);
21382 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21385 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21388 /* Implements target hook vector_mode_supported_p. */
21390 arm_vector_mode_supported_p (enum machine_mode mode)
21392 /* Neon also supports V2SImode, etc. listed in the clause below. */
21393 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21394 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21397 if ((TARGET_NEON || TARGET_IWMMXT)
21398 && ((mode == V2SImode)
21399 || (mode == V4HImode)
21400 || (mode == V8QImode)))
21406 /* Implements target hook small_register_classes_for_mode_p. */
21408 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21410 return TARGET_THUMB1;
21413 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21414 ARM insns and therefore guarantee that the shift count is modulo 256.
21415 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21416 guarantee no particular behavior for out-of-range counts. */
21418 static unsigned HOST_WIDE_INT
21419 arm_shift_truncation_mask (enum machine_mode mode)
21421 return mode == SImode ? 255 : 0;
21425 /* Map internal gcc register numbers to DWARF2 register numbers. */
21428 arm_dbx_register_number (unsigned int regno)
21433 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21434 compatibility. The EABI defines them as registers 96-103. */
21435 if (IS_FPA_REGNUM (regno))
21436 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21438 if (IS_VFP_REGNUM (regno))
21440 /* See comment in arm_dwarf_register_span. */
21441 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21442 return 64 + regno - FIRST_VFP_REGNUM;
21444 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21447 if (IS_IWMMXT_GR_REGNUM (regno))
21448 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21450 if (IS_IWMMXT_REGNUM (regno))
21451 return 112 + regno - FIRST_IWMMXT_REGNUM;
21453 gcc_unreachable ();
21456 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21457 GCC models tham as 64 32-bit registers, so we need to describe this to
21458 the DWARF generation code. Other registers can use the default. */
21460 arm_dwarf_register_span (rtx rtl)
21467 regno = REGNO (rtl);
21468 if (!IS_VFP_REGNUM (regno))
21471 /* XXX FIXME: The EABI defines two VFP register ranges:
21472 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21474 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21475 corresponding D register. Until GDB supports this, we shall use the
21476 legacy encodings. We also use these encodings for D0-D15 for
21477 compatibility with older debuggers. */
21478 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21481 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21482 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21483 regno = (regno - FIRST_VFP_REGNUM) / 2;
21484 for (i = 0; i < nregs; i++)
21485 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21490 #ifdef TARGET_UNWIND_INFO
21491 /* Emit unwind directives for a store-multiple instruction or stack pointer
21492 push during alignment.
21493 These should only ever be generated by the function prologue code, so
21494 expect them to have a particular form. */
21497 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21500 HOST_WIDE_INT offset;
21501 HOST_WIDE_INT nregs;
21507 e = XVECEXP (p, 0, 0);
21508 if (GET_CODE (e) != SET)
21511 /* First insn will adjust the stack pointer. */
21512 if (GET_CODE (e) != SET
21513 || GET_CODE (XEXP (e, 0)) != REG
21514 || REGNO (XEXP (e, 0)) != SP_REGNUM
21515 || GET_CODE (XEXP (e, 1)) != PLUS)
21518 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21519 nregs = XVECLEN (p, 0) - 1;
21521 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21524 /* The function prologue may also push pc, but not annotate it as it is
21525 never restored. We turn this into a stack pointer adjustment. */
21526 if (nregs * 4 == offset - 4)
21528 fprintf (asm_out_file, "\t.pad #4\n");
21532 fprintf (asm_out_file, "\t.save {");
21534 else if (IS_VFP_REGNUM (reg))
21537 fprintf (asm_out_file, "\t.vsave {");
21539 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21541 /* FPA registers are done differently. */
21542 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21546 /* Unknown register type. */
21549 /* If the stack increment doesn't match the size of the saved registers,
21550 something has gone horribly wrong. */
21551 if (offset != nregs * reg_size)
21556 /* The remaining insns will describe the stores. */
21557 for (i = 1; i <= nregs; i++)
21559 /* Expect (set (mem <addr>) (reg)).
21560 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21561 e = XVECEXP (p, 0, i);
21562 if (GET_CODE (e) != SET
21563 || GET_CODE (XEXP (e, 0)) != MEM
21564 || GET_CODE (XEXP (e, 1)) != REG)
21567 reg = REGNO (XEXP (e, 1));
21572 fprintf (asm_out_file, ", ");
21573 /* We can't use %r for vfp because we need to use the
21574 double precision register names. */
21575 if (IS_VFP_REGNUM (reg))
21576 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21578 asm_fprintf (asm_out_file, "%r", reg);
21580 #ifdef ENABLE_CHECKING
21581 /* Check that the addresses are consecutive. */
21582 e = XEXP (XEXP (e, 0), 0);
21583 if (GET_CODE (e) == PLUS)
21585 offset += reg_size;
21586 if (GET_CODE (XEXP (e, 0)) != REG
21587 || REGNO (XEXP (e, 0)) != SP_REGNUM
21588 || GET_CODE (XEXP (e, 1)) != CONST_INT
21589 || offset != INTVAL (XEXP (e, 1)))
21593 || GET_CODE (e) != REG
21594 || REGNO (e) != SP_REGNUM)
21598 fprintf (asm_out_file, "}\n");
21601 /* Emit unwind directives for a SET. */
21604 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21612 switch (GET_CODE (e0))
21615 /* Pushing a single register. */
21616 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21617 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21618 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21621 asm_fprintf (asm_out_file, "\t.save ");
21622 if (IS_VFP_REGNUM (REGNO (e1)))
21623 asm_fprintf(asm_out_file, "{d%d}\n",
21624 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21626 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21630 if (REGNO (e0) == SP_REGNUM)
21632 /* A stack increment. */
21633 if (GET_CODE (e1) != PLUS
21634 || GET_CODE (XEXP (e1, 0)) != REG
21635 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21636 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21639 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21640 -INTVAL (XEXP (e1, 1)));
21642 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21644 HOST_WIDE_INT offset;
21646 if (GET_CODE (e1) == PLUS)
21648 if (GET_CODE (XEXP (e1, 0)) != REG
21649 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21651 reg = REGNO (XEXP (e1, 0));
21652 offset = INTVAL (XEXP (e1, 1));
21653 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21654 HARD_FRAME_POINTER_REGNUM, reg,
21657 else if (GET_CODE (e1) == REG)
21660 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21661 HARD_FRAME_POINTER_REGNUM, reg);
21666 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21668 /* Move from sp to reg. */
21669 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21671 else if (GET_CODE (e1) == PLUS
21672 && GET_CODE (XEXP (e1, 0)) == REG
21673 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21674 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21676 /* Set reg to offset from sp. */
21677 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21678 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21680 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21682 /* Stack pointer save before alignment. */
21684 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21697 /* Emit unwind directives for the given insn. */
21700 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21704 if (!ARM_EABI_UNWIND_TABLES)
21707 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21708 && (TREE_NOTHROW (current_function_decl)
21709 || crtl->all_throwers_are_sibcalls))
21712 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21715 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21717 pat = XEXP (pat, 0);
21719 pat = PATTERN (insn);
21721 switch (GET_CODE (pat))
21724 arm_unwind_emit_set (asm_out_file, pat);
21728 /* Store multiple. */
21729 arm_unwind_emit_sequence (asm_out_file, pat);
21738 /* Output a reference from a function exception table to the type_info
21739 object X. The EABI specifies that the symbol should be relocated by
21740 an R_ARM_TARGET2 relocation. */
21743 arm_output_ttype (rtx x)
21745 fputs ("\t.word\t", asm_out_file);
21746 output_addr_const (asm_out_file, x);
21747 /* Use special relocations for symbol references. */
21748 if (GET_CODE (x) != CONST_INT)
21749 fputs ("(TARGET2)", asm_out_file);
21750 fputc ('\n', asm_out_file);
21754 #endif /* TARGET_UNWIND_INFO */
21757 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21758 stack alignment. */
21761 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21763 rtx unspec = SET_SRC (pattern);
21764 gcc_assert (GET_CODE (unspec) == UNSPEC);
21768 case UNSPEC_STACK_ALIGN:
21769 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21770 put anything on the stack, so hopefully it won't matter.
21771 CFA = SP will be correct after alignment. */
21772 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21773 SET_DEST (pattern));
21776 gcc_unreachable ();
21781 /* Output unwind directives for the start/end of a function. */
21784 arm_output_fn_unwind (FILE * f, bool prologue)
21786 if (!ARM_EABI_UNWIND_TABLES)
21790 fputs ("\t.fnstart\n", f);
21793 /* If this function will never be unwound, then mark it as such.
21794 The came condition is used in arm_unwind_emit to suppress
21795 the frame annotations. */
21796 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21797 && (TREE_NOTHROW (current_function_decl)
21798 || crtl->all_throwers_are_sibcalls))
21799 fputs("\t.cantunwind\n", f);
21801 fputs ("\t.fnend\n", f);
21806 arm_emit_tls_decoration (FILE *fp, rtx x)
21808 enum tls_reloc reloc;
21811 val = XVECEXP (x, 0, 0);
21812 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21814 output_addr_const (fp, val);
21819 fputs ("(tlsgd)", fp);
21822 fputs ("(tlsldm)", fp);
21825 fputs ("(tlsldo)", fp);
21828 fputs ("(gottpoff)", fp);
21831 fputs ("(tpoff)", fp);
21834 gcc_unreachable ();
21842 fputs (" + (. - ", fp);
21843 output_addr_const (fp, XVECEXP (x, 0, 2));
21845 output_addr_const (fp, XVECEXP (x, 0, 3));
21855 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21858 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21860 gcc_assert (size == 4);
21861 fputs ("\t.word\t", file);
21862 output_addr_const (file, x);
21863 fputs ("(tlsldo)", file);
21867 arm_output_addr_const_extra (FILE *fp, rtx x)
21869 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21870 return arm_emit_tls_decoration (fp, x);
21871 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21874 int labelno = INTVAL (XVECEXP (x, 0, 0));
21876 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21877 assemble_name_raw (fp, label);
21881 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21883 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21887 output_addr_const (fp, XVECEXP (x, 0, 0));
21891 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21893 output_addr_const (fp, XVECEXP (x, 0, 0));
21897 output_addr_const (fp, XVECEXP (x, 0, 1));
21901 else if (GET_CODE (x) == CONST_VECTOR)
21902 return arm_emit_vector_const (fp, x);
21907 /* Output assembly for a shift instruction.
21908 SET_FLAGS determines how the instruction modifies the condition codes.
21909 0 - Do not set condition codes.
21910 1 - Set condition codes.
21911 2 - Use smallest instruction. */
21913 arm_output_shift(rtx * operands, int set_flags)
21916 static const char flag_chars[3] = {'?', '.', '!'};
21921 c = flag_chars[set_flags];
21922 if (TARGET_UNIFIED_ASM)
21924 shift = shift_op(operands[3], &val);
21928 operands[2] = GEN_INT(val);
21929 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21932 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21935 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21936 output_asm_insn (pattern, operands);
21940 /* Output a Thumb-1 casesi dispatch sequence. */
21942 thumb1_output_casesi (rtx *operands)
21944 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21946 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21948 switch (GET_MODE(diff_vec))
21951 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21952 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21954 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21955 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21957 return "bl\t%___gnu_thumb1_case_si";
21959 gcc_unreachable ();
21963 /* Output a Thumb-2 casesi instruction. */
21965 thumb2_output_casesi (rtx *operands)
21967 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21969 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21971 output_asm_insn ("cmp\t%0, %1", operands);
21972 output_asm_insn ("bhi\t%l3", operands);
21973 switch (GET_MODE(diff_vec))
21976 return "tbb\t[%|pc, %0]";
21978 return "tbh\t[%|pc, %0, lsl #1]";
21982 output_asm_insn ("adr\t%4, %l2", operands);
21983 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21984 output_asm_insn ("add\t%4, %4, %5", operands);
21989 output_asm_insn ("adr\t%4, %l2", operands);
21990 return "ldr\t%|pc, [%4, %0, lsl #2]";
21993 gcc_unreachable ();
21997 /* Most ARM cores are single issue, but some newer ones can dual issue.
21998 The scheduler descriptions rely on this being correct. */
22000 arm_issue_rate (void)
22015 /* A table and a function to perform ARM-specific name mangling for
22016 NEON vector types in order to conform to the AAPCS (see "Procedure
22017 Call Standard for the ARM Architecture", Appendix A). To qualify
22018 for emission with the mangled names defined in that document, a
22019 vector type must not only be of the correct mode but also be
22020 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22023 enum machine_mode mode;
22024 const char *element_type_name;
22025 const char *aapcs_name;
22026 } arm_mangle_map_entry;
22028 static arm_mangle_map_entry arm_mangle_map[] = {
22029 /* 64-bit containerized types. */
22030 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22031 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22032 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22033 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22034 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22035 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22036 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22037 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22038 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22039 /* 128-bit containerized types. */
22040 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22041 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22042 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22043 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22044 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22045 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22046 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22047 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22048 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22049 { VOIDmode, NULL, NULL }
22053 arm_mangle_type (const_tree type)
22055 arm_mangle_map_entry *pos = arm_mangle_map;
22057 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22058 has to be managled as if it is in the "std" namespace. */
22059 if (TARGET_AAPCS_BASED
22060 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22062 static bool warned;
22063 if (!warned && warn_psabi && !in_system_header)
22066 inform (input_location,
22067 "the mangling of %<va_list%> has changed in GCC 4.4");
22069 return "St9__va_list";
22072 /* Half-precision float. */
22073 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22076 if (TREE_CODE (type) != VECTOR_TYPE)
22079 /* Check the mode of the vector type, and the name of the vector
22080 element type, against the table. */
22081 while (pos->mode != VOIDmode)
22083 tree elt_type = TREE_TYPE (type);
22085 if (pos->mode == TYPE_MODE (type)
22086 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22087 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22088 pos->element_type_name))
22089 return pos->aapcs_name;
22094 /* Use the default mangling for unrecognized (possibly user-defined)
22099 /* Order of allocation of core registers for Thumb: this allocation is
22100 written over the corresponding initial entries of the array
22101 initialized with REG_ALLOC_ORDER. We allocate all low registers
22102 first. Saving and restoring a low register is usually cheaper than
22103 using a call-clobbered high register. */
22105 static const int thumb_core_reg_alloc_order[] =
22107 3, 2, 1, 0, 4, 5, 6, 7,
22108 14, 12, 8, 9, 10, 11, 13, 15
22111 /* Adjust register allocation order when compiling for Thumb. */
22114 arm_order_regs_for_local_alloc (void)
22116 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22117 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22119 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22120 sizeof (thumb_core_reg_alloc_order));
22123 /* Set default optimization options. */
22125 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22127 /* Enable section anchors by default at -O1 or higher.
22128 Use 2 to distinguish from an explicit -fsection-anchors
22129 given on the command line. */
22131 flag_section_anchors = 2;
22134 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22137 arm_frame_pointer_required (void)
22139 return (cfun->has_nonlocal_label
22140 || SUBTARGET_FRAME_POINTER_REQUIRED
22141 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22144 /* Only thumb1 can't support conditional execution, so return true if
22145 the target is not thumb1. */
22147 arm_have_conditional_execution (void)
22149 return !TARGET_THUMB1;
22152 #include "gt-arm.h"