1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static void arm_print_operand (FILE *, rtx, int);
88 static void arm_print_operand_address (FILE *, rtx);
89 static bool arm_print_operand_punct_valid_p (unsigned char code);
90 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
91 static arm_cc get_arm_condition_code (rtx);
92 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
93 static rtx is_jump_table (rtx);
94 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 static const char *shift_op (rtx, HOST_WIDE_INT *);
97 static struct machine_function *arm_init_machine_status (void);
98 static void thumb_exit (FILE *, int);
99 static rtx is_jump_table (rtx);
100 static HOST_WIDE_INT get_jump_table_size (rtx);
101 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
102 static Mnode *add_minipool_forward_ref (Mfix *);
103 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_backward_ref (Mfix *);
105 static void assign_minipool_offsets (Mfix *);
106 static void arm_print_value (FILE *, rtx);
107 static void dump_minipool (rtx);
108 static int arm_barrier_cost (rtx);
109 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
110 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
111 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 static void arm_reorg (void);
114 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
115 static unsigned long arm_compute_save_reg0_reg12_mask (void);
116 static unsigned long arm_compute_save_reg_mask (void);
117 static unsigned long arm_isr_value (tree);
118 static unsigned long arm_compute_func_type (void);
119 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
120 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
122 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
123 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
126 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
127 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static int arm_comp_type_attributes (const_tree, const_tree);
129 static void arm_set_default_type_attributes (tree);
130 static int arm_adjust_cost (rtx, rtx, rtx, int);
131 static int count_insns_for_constant (HOST_WIDE_INT, int);
132 static int arm_get_strip_length (int);
133 static bool arm_function_ok_for_sibcall (tree, tree);
134 static enum machine_mode arm_promote_function_mode (const_tree,
135 enum machine_mode, int *,
137 static bool arm_return_in_memory (const_tree, const_tree);
138 static rtx arm_function_value (const_tree, const_tree, bool);
139 static rtx arm_libcall_value (enum machine_mode, const_rtx);
141 static void arm_internal_label (FILE *, const char *, unsigned long);
142 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 static bool arm_have_conditional_execution (void);
145 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
146 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
147 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_rtx_costs (rtx, int, int, int *, bool);
152 static int arm_address_cost (rtx, bool);
153 static bool arm_memory_load_p (rtx);
154 static bool arm_cirrus_insn_p (rtx);
155 static void cirrus_reorg (rtx);
156 static void arm_init_builtins (void);
157 static void arm_init_iwmmxt_builtins (void);
158 static rtx safe_vector_operand (rtx, enum machine_mode);
159 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
160 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
161 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
162 static void emit_constant_insn (rtx cond, rtx pattern);
163 static rtx emit_set_insn (rtx, rtx);
164 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
168 static int aapcs_select_return_coproc (const_tree, const_tree);
170 #ifdef OBJECT_FORMAT_ELF
171 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
175 static void arm_encode_section_info (tree, rtx, int);
178 static void arm_file_end (void);
179 static void arm_file_start (void);
181 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
183 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
184 enum machine_mode, const_tree, bool);
185 static bool arm_promote_prototypes (const_tree);
186 static bool arm_default_short_enums (void);
187 static bool arm_align_anon_bitfield (void);
188 static bool arm_return_in_msb (const_tree);
189 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
190 static bool arm_return_in_memory (const_tree, const_tree);
191 #ifdef TARGET_UNWIND_INFO
192 static void arm_unwind_emit (FILE *, rtx);
193 static bool arm_output_ttype (rtx);
195 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
196 static rtx arm_dwarf_register_span (rtx);
198 static tree arm_cxx_guard_type (void);
199 static bool arm_cxx_guard_mask_bit (void);
200 static tree arm_get_cookie_size (tree);
201 static bool arm_cookie_has_size (void);
202 static bool arm_cxx_cdtor_returns_this (void);
203 static bool arm_cxx_key_method_may_be_inline (void);
204 static void arm_cxx_determine_class_data_visibility (tree);
205 static bool arm_cxx_class_data_always_comdat (void);
206 static bool arm_cxx_use_aeabi_atexit (void);
207 static void arm_init_libfuncs (void);
208 static tree arm_build_builtin_va_list (void);
209 static void arm_expand_builtin_va_start (tree, rtx);
210 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
211 static bool arm_handle_option (size_t, const char *, int);
212 static void arm_target_help (void);
213 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
214 static bool arm_cannot_copy_insn_p (rtx);
215 static bool arm_tls_symbol_p (rtx x);
216 static int arm_issue_rate (void);
217 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
218 static bool arm_allocate_stack_slots_for_args (void);
219 static const char *arm_invalid_parameter_type (const_tree t);
220 static const char *arm_invalid_return_type (const_tree t);
221 static tree arm_promoted_type (const_tree t);
222 static tree arm_convert_to_type (tree type, tree expr);
223 static bool arm_scalar_mode_supported_p (enum machine_mode);
224 static bool arm_frame_pointer_required (void);
225 static bool arm_can_eliminate (const int, const int);
226 static void arm_asm_trampoline_template (FILE *);
227 static void arm_trampoline_init (rtx, tree, rtx);
228 static rtx arm_trampoline_adjust_address (rtx);
229 static rtx arm_pic_static_addr (rtx orig, rtx reg);
232 /* Table of machine attributes. */
233 static const struct attribute_spec arm_attribute_table[] =
235 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
236 /* Function calls made to this symbol must be done indirectly, because
237 it may lie outside of the 26 bit addressing range of a normal function
239 { "long_call", 0, 0, false, true, true, NULL },
240 /* Whereas these functions are always known to reside within the 26 bit
242 { "short_call", 0, 0, false, true, true, NULL },
243 /* Specify the procedure call conventions for a function. */
244 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
245 /* Interrupt Service Routines have special prologue and epilogue requirements. */
246 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
247 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
248 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
250 /* ARM/PE has three new attributes:
252 dllexport - for exporting a function/variable that will live in a dll
253 dllimport - for importing a function/variable from a dll
255 Microsoft allows multiple declspecs in one __declspec, separating
256 them with spaces. We do NOT support this. Instead, use __declspec
259 { "dllimport", 0, 0, true, false, false, NULL },
260 { "dllexport", 0, 0, true, false, false, NULL },
261 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
262 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
263 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
264 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
265 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
267 { NULL, 0, 0, false, false, false, NULL }
270 /* Initialize the GCC target structure. */
271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
272 #undef TARGET_MERGE_DECL_ATTRIBUTES
273 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
282 #undef TARGET_ASM_FILE_START
283 #define TARGET_ASM_FILE_START arm_file_start
284 #undef TARGET_ASM_FILE_END
285 #define TARGET_ASM_FILE_END arm_file_end
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP NULL
289 #undef TARGET_ASM_INTEGER
290 #define TARGET_ASM_INTEGER arm_assemble_integer
292 #undef TARGET_PRINT_OPERAND
293 #define TARGET_PRINT_OPERAND arm_print_operand
294 #undef TARGET_PRINT_OPERAND_ADDRESS
295 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
296 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
297 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
299 #undef TARGET_ASM_FUNCTION_PROLOGUE
300 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_EPILOGUE
303 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION arm_handle_option
310 #define TARGET_HELP arm_target_help
312 #undef TARGET_COMP_TYPE_ATTRIBUTES
313 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
315 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
316 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
318 #undef TARGET_SCHED_ADJUST_COST
319 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
321 #undef TARGET_ENCODE_SECTION_INFO
323 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
325 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
328 #undef TARGET_STRIP_NAME_ENCODING
329 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
331 #undef TARGET_ASM_INTERNAL_LABEL
332 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
334 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
335 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
337 #undef TARGET_FUNCTION_VALUE
338 #define TARGET_FUNCTION_VALUE arm_function_value
340 #undef TARGET_LIBCALL_VALUE
341 #define TARGET_LIBCALL_VALUE arm_libcall_value
343 #undef TARGET_ASM_OUTPUT_MI_THUNK
344 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
345 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
346 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
348 #undef TARGET_RTX_COSTS
349 #define TARGET_RTX_COSTS arm_rtx_costs
350 #undef TARGET_ADDRESS_COST
351 #define TARGET_ADDRESS_COST arm_address_cost
353 #undef TARGET_SHIFT_TRUNCATION_MASK
354 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
355 #undef TARGET_VECTOR_MODE_SUPPORTED_P
356 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
358 #undef TARGET_MACHINE_DEPENDENT_REORG
359 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
361 #undef TARGET_INIT_BUILTINS
362 #define TARGET_INIT_BUILTINS arm_init_builtins
363 #undef TARGET_EXPAND_BUILTIN
364 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
366 #undef TARGET_INIT_LIBFUNCS
367 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
369 #undef TARGET_PROMOTE_FUNCTION_MODE
370 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
371 #undef TARGET_PROMOTE_PROTOTYPES
372 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
373 #undef TARGET_PASS_BY_REFERENCE
374 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
375 #undef TARGET_ARG_PARTIAL_BYTES
376 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
378 #undef TARGET_SETUP_INCOMING_VARARGS
379 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
381 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
382 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
384 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
386 #undef TARGET_TRAMPOLINE_INIT
387 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
388 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
391 #undef TARGET_DEFAULT_SHORT_ENUMS
392 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
394 #undef TARGET_ALIGN_ANON_BITFIELD
395 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
397 #undef TARGET_NARROW_VOLATILE_BITFIELD
398 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
400 #undef TARGET_CXX_GUARD_TYPE
401 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
403 #undef TARGET_CXX_GUARD_MASK_BIT
404 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
406 #undef TARGET_CXX_GET_COOKIE_SIZE
407 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
409 #undef TARGET_CXX_COOKIE_HAS_SIZE
410 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
412 #undef TARGET_CXX_CDTOR_RETURNS_THIS
413 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
415 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
416 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
418 #undef TARGET_CXX_USE_AEABI_ATEXIT
419 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
421 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
422 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
423 arm_cxx_determine_class_data_visibility
425 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
426 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
428 #undef TARGET_RETURN_IN_MSB
429 #define TARGET_RETURN_IN_MSB arm_return_in_msb
431 #undef TARGET_RETURN_IN_MEMORY
432 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
434 #undef TARGET_MUST_PASS_IN_STACK
435 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
437 #ifdef TARGET_UNWIND_INFO
438 #undef TARGET_ASM_UNWIND_EMIT
439 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
441 /* EABI unwinding tables use a different format for the typeinfo tables. */
442 #undef TARGET_ASM_TTYPE
443 #define TARGET_ASM_TTYPE arm_output_ttype
445 #undef TARGET_ARM_EABI_UNWINDER
446 #define TARGET_ARM_EABI_UNWINDER true
447 #endif /* TARGET_UNWIND_INFO */
449 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
450 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
452 #undef TARGET_DWARF_REGISTER_SPAN
453 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
455 #undef TARGET_CANNOT_COPY_INSN_P
456 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
459 #undef TARGET_HAVE_TLS
460 #define TARGET_HAVE_TLS true
463 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
464 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
466 #undef TARGET_CANNOT_FORCE_CONST_MEM
467 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
469 #undef TARGET_MAX_ANCHOR_OFFSET
470 #define TARGET_MAX_ANCHOR_OFFSET 4095
472 /* The minimum is set such that the total size of the block
473 for a particular anchor is -4088 + 1 + 4095 bytes, which is
474 divisible by eight, ensuring natural spacing of anchors. */
475 #undef TARGET_MIN_ANCHOR_OFFSET
476 #define TARGET_MIN_ANCHOR_OFFSET -4088
478 #undef TARGET_SCHED_ISSUE_RATE
479 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
481 #undef TARGET_MANGLE_TYPE
482 #define TARGET_MANGLE_TYPE arm_mangle_type
484 #undef TARGET_BUILD_BUILTIN_VA_LIST
485 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
486 #undef TARGET_EXPAND_BUILTIN_VA_START
487 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
488 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
489 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
492 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
493 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
496 #undef TARGET_LEGITIMATE_ADDRESS_P
497 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
499 #undef TARGET_INVALID_PARAMETER_TYPE
500 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
502 #undef TARGET_INVALID_RETURN_TYPE
503 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
505 #undef TARGET_PROMOTED_TYPE
506 #define TARGET_PROMOTED_TYPE arm_promoted_type
508 #undef TARGET_CONVERT_TO_TYPE
509 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
511 #undef TARGET_SCALAR_MODE_SUPPORTED_P
512 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
514 #undef TARGET_FRAME_POINTER_REQUIRED
515 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
517 #undef TARGET_CAN_ELIMINATE
518 #define TARGET_CAN_ELIMINATE arm_can_eliminate
520 struct gcc_target targetm = TARGET_INITIALIZER;
522 /* Obstack for minipool constant handling. */
523 static struct obstack minipool_obstack;
524 static char * minipool_startobj;
526 /* The maximum number of insns skipped which
527 will be conditionalised if possible. */
528 static int max_insns_skipped = 5;
530 extern FILE * asm_out_file;
532 /* True if we are currently building a constant table. */
533 int making_const_table;
535 /* The processor for which instructions should be scheduled. */
536 enum processor_type arm_tune = arm_none;
538 /* The current tuning set. */
539 const struct tune_params *current_tune;
541 /* Which floating point hardware to schedule for. */
544 /* Which floating popint hardware to use. */
545 const struct arm_fpu_desc *arm_fpu_desc;
547 /* Whether to use floating point hardware. */
548 enum float_abi_type arm_float_abi;
550 /* Which __fp16 format to use. */
551 enum arm_fp16_format_type arm_fp16_format;
553 /* Which ABI to use. */
554 enum arm_abi_type arm_abi;
556 /* Which thread pointer model to use. */
557 enum arm_tp_type target_thread_pointer = TP_AUTO;
559 /* Used to parse -mstructure_size_boundary command line option. */
560 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
562 /* Used for Thumb call_via trampolines. */
563 rtx thumb_call_via_label[14];
564 static int thumb_call_reg_needed;
566 /* Bit values used to identify processor capabilities. */
567 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
568 #define FL_ARCH3M (1 << 1) /* Extended multiply */
569 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
570 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
571 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
572 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
573 #define FL_THUMB (1 << 6) /* Thumb aware */
574 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
575 #define FL_STRONG (1 << 8) /* StrongARM */
576 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
577 #define FL_XSCALE (1 << 10) /* XScale */
578 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
579 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
580 media instructions. */
581 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
582 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
583 Note: ARM6 & 7 derivatives only. */
584 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
585 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
586 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
588 #define FL_DIV (1 << 18) /* Hardware divide. */
589 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
590 #define FL_NEON (1 << 20) /* Neon instructions. */
591 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
594 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
596 /* Flags that only effect tuning, not available instructions. */
597 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
600 #define FL_FOR_ARCH2 FL_NOTM
601 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
602 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
603 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
604 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
605 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
606 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
607 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
608 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
609 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
610 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
611 #define FL_FOR_ARCH6J FL_FOR_ARCH6
612 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
613 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
614 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
615 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
616 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
617 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
618 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
619 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
620 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
621 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
623 /* The bits in this mask specify which
624 instructions we are allowed to generate. */
625 static unsigned long insn_flags = 0;
627 /* The bits in this mask specify which instruction scheduling options should
629 static unsigned long tune_flags = 0;
631 /* The following are used in the arm.md file as equivalents to bits
632 in the above two flag variables. */
634 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
637 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
640 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
643 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
646 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
649 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
652 /* Nonzero if this chip supports the ARM 6K extensions. */
655 /* Nonzero if instructions not present in the 'M' profile can be used. */
656 int arm_arch_notm = 0;
658 /* Nonzero if instructions present in ARMv7E-M can be used. */
661 /* Nonzero if this chip can benefit from load scheduling. */
662 int arm_ld_sched = 0;
664 /* Nonzero if this chip is a StrongARM. */
665 int arm_tune_strongarm = 0;
667 /* Nonzero if this chip is a Cirrus variant. */
668 int arm_arch_cirrus = 0;
670 /* Nonzero if this chip supports Intel Wireless MMX technology. */
671 int arm_arch_iwmmxt = 0;
673 /* Nonzero if this chip is an XScale. */
674 int arm_arch_xscale = 0;
676 /* Nonzero if tuning for XScale */
677 int arm_tune_xscale = 0;
679 /* Nonzero if we want to tune for stores that access the write-buffer.
680 This typically means an ARM6 or ARM7 with MMU or MPU. */
681 int arm_tune_wbuf = 0;
683 /* Nonzero if tuning for Cortex-A9. */
684 int arm_tune_cortex_a9 = 0;
686 /* Nonzero if generating Thumb instructions. */
689 /* Nonzero if we should define __THUMB_INTERWORK__ in the
691 XXX This is a bit of a hack, it's intended to help work around
692 problems in GLD which doesn't understand that armv5t code is
693 interworking clean. */
694 int arm_cpp_interwork = 0;
696 /* Nonzero if chip supports Thumb 2. */
699 /* Nonzero if chip supports integer division instruction. */
702 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
703 we must report the mode of the memory reference from
704 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
705 enum machine_mode output_memory_reference_mode;
707 /* The register number to be used for the PIC offset register. */
708 unsigned arm_pic_register = INVALID_REGNUM;
710 /* Set to 1 after arm_reorg has started. Reset to start at the start of
711 the next function. */
712 static int after_arm_reorg = 0;
714 static enum arm_pcs arm_pcs_default;
716 /* For an explanation of these variables, see final_prescan_insn below. */
718 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
719 enum arm_cond_code arm_current_cc;
721 int arm_target_label;
722 /* The number of conditionally executed insns, including the current insn. */
723 int arm_condexec_count = 0;
724 /* A bitmask specifying the patterns for the IT block.
725 Zero means do not output an IT block before this insn. */
726 int arm_condexec_mask = 0;
727 /* The number of bits used in arm_condexec_mask. */
728 int arm_condexec_masklen = 0;
730 /* The condition codes of the ARM, and the inverse function. */
731 static const char * const arm_condition_codes[] =
733 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
734 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
737 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
738 #define streq(string1, string2) (strcmp (string1, string2) == 0)
740 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
741 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
742 | (1 << PIC_OFFSET_TABLE_REGNUM)))
744 /* Initialization code. */
748 const char *const name;
749 enum processor_type core;
751 const unsigned long flags;
752 const struct tune_params *const tune;
755 const struct tune_params arm_slowmul_tune =
757 arm_slowmul_rtx_costs,
761 const struct tune_params arm_fastmul_tune =
763 arm_fastmul_rtx_costs,
767 const struct tune_params arm_xscale_tune =
769 arm_xscale_rtx_costs,
773 const struct tune_params arm_9e_tune =
779 /* Not all of these give usefully different compilation alternatives,
780 but there is no simple way of generalizing them. */
781 static const struct processors all_cores[] =
784 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
785 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
786 #include "arm-cores.def"
788 {NULL, arm_none, NULL, 0, NULL}
791 static const struct processors all_architectures[] =
793 /* ARM Architectures */
794 /* We don't specify tuning costs here as it will be figured out
797 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
798 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
799 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
800 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
801 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
802 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
803 implementations that support it, so we will leave it out for now. */
804 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
805 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
806 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
807 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
808 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
809 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
810 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
811 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
812 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
813 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
814 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
815 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
816 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
817 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
818 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
819 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
820 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
821 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
822 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
823 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
824 {NULL, arm_none, NULL, 0 , NULL}
828 /* These are populated as commandline arguments are processed, or NULL
830 static const struct processors *arm_selected_arch;
831 static const struct processors *arm_selected_cpu;
832 static const struct processors *arm_selected_tune;
834 /* The name of the preprocessor macro to define for this architecture. */
836 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
838 /* Available values for -mfpu=. */
840 static const struct arm_fpu_desc all_fpus[] =
842 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
843 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
844 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
845 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
846 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
847 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
848 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
849 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
850 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
851 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
852 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
853 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
854 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
855 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
856 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
857 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
858 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
859 /* Compatibility aliases. */
860 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
867 enum float_abi_type abi_type;
871 /* Available values for -mfloat-abi=. */
873 static const struct float_abi all_float_abis[] =
875 {"soft", ARM_FLOAT_ABI_SOFT},
876 {"softfp", ARM_FLOAT_ABI_SOFTFP},
877 {"hard", ARM_FLOAT_ABI_HARD}
884 enum arm_fp16_format_type fp16_format_type;
888 /* Available values for -mfp16-format=. */
890 static const struct fp16_format all_fp16_formats[] =
892 {"none", ARM_FP16_FORMAT_NONE},
893 {"ieee", ARM_FP16_FORMAT_IEEE},
894 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
901 enum arm_abi_type abi_type;
905 /* Available values for -mabi=. */
907 static const struct abi_name arm_all_abis[] =
909 {"apcs-gnu", ARM_ABI_APCS},
910 {"atpcs", ARM_ABI_ATPCS},
911 {"aapcs", ARM_ABI_AAPCS},
912 {"iwmmxt", ARM_ABI_IWMMXT},
913 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
916 /* Supported TLS relocations. */
926 /* The maximum number of insns to be used when loading a constant. */
928 arm_constant_limit (bool size_p)
930 return size_p ? 1 : current_tune->constant_limit;
933 /* Emit an insn that's a simple single-set. Both the operands must be known
936 emit_set_insn (rtx x, rtx y)
938 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
941 /* Return the number of bits set in VALUE. */
943 bit_count (unsigned long value)
945 unsigned long count = 0;
950 value &= value - 1; /* Clear the least-significant set bit. */
956 /* Set up library functions unique to ARM. */
959 arm_init_libfuncs (void)
961 /* There are no special library functions unless we are using the
966 /* The functions below are described in Section 4 of the "Run-Time
967 ABI for the ARM architecture", Version 1.0. */
969 /* Double-precision floating-point arithmetic. Table 2. */
970 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
971 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
972 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
973 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
974 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
976 /* Double-precision comparisons. Table 3. */
977 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
978 set_optab_libfunc (ne_optab, DFmode, NULL);
979 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
980 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
981 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
982 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
983 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
985 /* Single-precision floating-point arithmetic. Table 4. */
986 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
987 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
988 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
989 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
990 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
992 /* Single-precision comparisons. Table 5. */
993 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
994 set_optab_libfunc (ne_optab, SFmode, NULL);
995 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
996 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
997 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
998 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
999 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1001 /* Floating-point to integer conversions. Table 6. */
1002 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1003 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1004 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1005 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1006 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1007 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1008 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1009 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1011 /* Conversions between floating types. Table 7. */
1012 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1013 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1015 /* Integer to floating-point conversions. Table 8. */
1016 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1017 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1018 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1019 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1020 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1021 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1022 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1023 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1025 /* Long long. Table 9. */
1026 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1027 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1028 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1029 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1030 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1031 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1032 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1033 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1035 /* Integer (32/32->32) division. \S 4.3.1. */
1036 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1037 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1039 /* The divmod functions are designed so that they can be used for
1040 plain division, even though they return both the quotient and the
1041 remainder. The quotient is returned in the usual location (i.e.,
1042 r0 for SImode, {r0, r1} for DImode), just as would be expected
1043 for an ordinary division routine. Because the AAPCS calling
1044 conventions specify that all of { r0, r1, r2, r3 } are
1045 callee-saved registers, there is no need to tell the compiler
1046 explicitly that those registers are clobbered by these
1048 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1049 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1051 /* For SImode division the ABI provides div-without-mod routines,
1052 which are faster. */
1053 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1054 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1056 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1057 divmod libcalls instead. */
1058 set_optab_libfunc (smod_optab, DImode, NULL);
1059 set_optab_libfunc (umod_optab, DImode, NULL);
1060 set_optab_libfunc (smod_optab, SImode, NULL);
1061 set_optab_libfunc (umod_optab, SImode, NULL);
1063 /* Half-precision float operations. The compiler handles all operations
1064 with NULL libfuncs by converting the SFmode. */
1065 switch (arm_fp16_format)
1067 case ARM_FP16_FORMAT_IEEE:
1068 case ARM_FP16_FORMAT_ALTERNATIVE:
1071 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1072 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1074 : "__gnu_f2h_alternative"));
1075 set_conv_libfunc (sext_optab, SFmode, HFmode,
1076 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1078 : "__gnu_h2f_alternative"));
1081 set_optab_libfunc (add_optab, HFmode, NULL);
1082 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1083 set_optab_libfunc (smul_optab, HFmode, NULL);
1084 set_optab_libfunc (neg_optab, HFmode, NULL);
1085 set_optab_libfunc (sub_optab, HFmode, NULL);
1088 set_optab_libfunc (eq_optab, HFmode, NULL);
1089 set_optab_libfunc (ne_optab, HFmode, NULL);
1090 set_optab_libfunc (lt_optab, HFmode, NULL);
1091 set_optab_libfunc (le_optab, HFmode, NULL);
1092 set_optab_libfunc (ge_optab, HFmode, NULL);
1093 set_optab_libfunc (gt_optab, HFmode, NULL);
1094 set_optab_libfunc (unord_optab, HFmode, NULL);
1101 if (TARGET_AAPCS_BASED)
1102 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1105 /* On AAPCS systems, this is the "struct __va_list". */
1106 static GTY(()) tree va_list_type;
1108 /* Return the type to use as __builtin_va_list. */
1110 arm_build_builtin_va_list (void)
1115 if (!TARGET_AAPCS_BASED)
1116 return std_build_builtin_va_list ();
1118 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1126 The C Library ABI further reinforces this definition in \S
1129 We must follow this definition exactly. The structure tag
1130 name is visible in C++ mangled names, and thus forms a part
1131 of the ABI. The field name may be used by people who
1132 #include <stdarg.h>. */
1133 /* Create the type. */
1134 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1135 /* Give it the required name. */
1136 va_list_name = build_decl (BUILTINS_LOCATION,
1138 get_identifier ("__va_list"),
1140 DECL_ARTIFICIAL (va_list_name) = 1;
1141 TYPE_NAME (va_list_type) = va_list_name;
1142 /* Create the __ap field. */
1143 ap_field = build_decl (BUILTINS_LOCATION,
1145 get_identifier ("__ap"),
1147 DECL_ARTIFICIAL (ap_field) = 1;
1148 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1149 TYPE_FIELDS (va_list_type) = ap_field;
1150 /* Compute its layout. */
1151 layout_type (va_list_type);
1153 return va_list_type;
1156 /* Return an expression of type "void *" pointing to the next
1157 available argument in a variable-argument list. VALIST is the
1158 user-level va_list object, of type __builtin_va_list. */
1160 arm_extract_valist_ptr (tree valist)
1162 if (TREE_TYPE (valist) == error_mark_node)
1163 return error_mark_node;
1165 /* On an AAPCS target, the pointer is stored within "struct
1167 if (TARGET_AAPCS_BASED)
1169 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1170 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1171 valist, ap_field, NULL_TREE);
1177 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1179 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1181 valist = arm_extract_valist_ptr (valist);
1182 std_expand_builtin_va_start (valist, nextarg);
1185 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1187 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1190 valist = arm_extract_valist_ptr (valist);
1191 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1194 /* Lookup NAME in SEL. */
1196 static const struct processors *
1197 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1199 if (!(name && *name))
1202 for (; sel->name != NULL; sel++)
1204 if (streq (name, sel->name))
1208 error ("bad value (%s) for %s switch", name, desc);
1212 /* Implement TARGET_HANDLE_OPTION. */
1215 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1220 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1224 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1227 case OPT_mhard_float:
1228 target_float_abi_name = "hard";
1231 case OPT_msoft_float:
1232 target_float_abi_name = "soft";
1236 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1245 arm_target_help (void)
1248 static int columns = 0;
1251 /* If we have not done so already, obtain the desired maximum width of
1252 the output. Note - this is a duplication of the code at the start of
1253 gcc/opts.c:print_specific_help() - the two copies should probably be
1254 replaced by a single function. */
1259 GET_ENVIRONMENT (p, "COLUMNS");
1262 int value = atoi (p);
1269 /* Use a reasonable default. */
1273 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1275 /* The - 2 is because we know that the last entry in the array is NULL. */
1276 i = ARRAY_SIZE (all_cores) - 2;
1278 printf (" %s", all_cores[i].name);
1279 remaining = columns - (strlen (all_cores[i].name) + 4);
1280 gcc_assert (remaining >= 0);
1284 int len = strlen (all_cores[i].name);
1286 if (remaining > len + 2)
1288 printf (", %s", all_cores[i].name);
1289 remaining -= len + 2;
1295 printf ("\n %s", all_cores[i].name);
1296 remaining = columns - (len + 4);
1300 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1302 i = ARRAY_SIZE (all_architectures) - 2;
1305 printf (" %s", all_architectures[i].name);
1306 remaining = columns - (strlen (all_architectures[i].name) + 4);
1307 gcc_assert (remaining >= 0);
1311 int len = strlen (all_architectures[i].name);
1313 if (remaining > len + 2)
1315 printf (", %s", all_architectures[i].name);
1316 remaining -= len + 2;
1322 printf ("\n %s", all_architectures[i].name);
1323 remaining = columns - (len + 4);
1330 /* Fix up any incompatible options that the user has specified.
1331 This has now turned into a maze. */
1333 arm_override_options (void)
1337 if (arm_selected_arch)
1339 if (arm_selected_cpu)
1341 /* Check for conflict between mcpu and march. */
1342 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1344 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1345 arm_selected_cpu->name, arm_selected_arch->name);
1346 /* -march wins for code generation.
1347 -mcpu wins for default tuning. */
1348 if (!arm_selected_tune)
1349 arm_selected_tune = arm_selected_cpu;
1351 arm_selected_cpu = arm_selected_arch;
1355 arm_selected_arch = NULL;
1358 /* Pick a CPU based on the architecture. */
1359 arm_selected_cpu = arm_selected_arch;
1362 /* If the user did not specify a processor, choose one for them. */
1363 if (!arm_selected_cpu)
1365 const struct processors * sel;
1366 unsigned int sought;
1368 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1369 if (!arm_selected_cpu->name)
1371 #ifdef SUBTARGET_CPU_DEFAULT
1372 /* Use the subtarget default CPU if none was specified by
1374 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1376 /* Default to ARM6. */
1377 if (arm_selected_cpu->name)
1378 arm_selected_cpu = &all_cores[arm6];
1381 sel = arm_selected_cpu;
1382 insn_flags = sel->flags;
1384 /* Now check to see if the user has specified some command line
1385 switch that require certain abilities from the cpu. */
1388 if (TARGET_INTERWORK || TARGET_THUMB)
1390 sought |= (FL_THUMB | FL_MODE32);
1392 /* There are no ARM processors that support both APCS-26 and
1393 interworking. Therefore we force FL_MODE26 to be removed
1394 from insn_flags here (if it was set), so that the search
1395 below will always be able to find a compatible processor. */
1396 insn_flags &= ~FL_MODE26;
1399 if (sought != 0 && ((sought & insn_flags) != sought))
1401 /* Try to locate a CPU type that supports all of the abilities
1402 of the default CPU, plus the extra abilities requested by
1404 for (sel = all_cores; sel->name != NULL; sel++)
1405 if ((sel->flags & sought) == (sought | insn_flags))
1408 if (sel->name == NULL)
1410 unsigned current_bit_count = 0;
1411 const struct processors * best_fit = NULL;
1413 /* Ideally we would like to issue an error message here
1414 saying that it was not possible to find a CPU compatible
1415 with the default CPU, but which also supports the command
1416 line options specified by the programmer, and so they
1417 ought to use the -mcpu=<name> command line option to
1418 override the default CPU type.
1420 If we cannot find a cpu that has both the
1421 characteristics of the default cpu and the given
1422 command line options we scan the array again looking
1423 for a best match. */
1424 for (sel = all_cores; sel->name != NULL; sel++)
1425 if ((sel->flags & sought) == sought)
1429 count = bit_count (sel->flags & insn_flags);
1431 if (count >= current_bit_count)
1434 current_bit_count = count;
1438 gcc_assert (best_fit);
1442 arm_selected_cpu = sel;
1446 gcc_assert (arm_selected_cpu);
1447 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1448 if (!arm_selected_tune)
1449 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1451 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1452 insn_flags = arm_selected_cpu->flags;
1454 arm_tune = arm_selected_tune->core;
1455 tune_flags = arm_selected_tune->flags;
1456 current_tune = arm_selected_tune->tune;
1458 if (target_fp16_format_name)
1460 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1462 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1464 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1468 if (i == ARRAY_SIZE (all_fp16_formats))
1469 error ("invalid __fp16 format option: -mfp16-format=%s",
1470 target_fp16_format_name);
1473 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1475 if (target_abi_name)
1477 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1479 if (streq (arm_all_abis[i].name, target_abi_name))
1481 arm_abi = arm_all_abis[i].abi_type;
1485 if (i == ARRAY_SIZE (arm_all_abis))
1486 error ("invalid ABI option: -mabi=%s", target_abi_name);
1489 arm_abi = ARM_DEFAULT_ABI;
1491 /* Make sure that the processor choice does not conflict with any of the
1492 other command line choices. */
1493 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1494 error ("target CPU does not support ARM mode");
1496 /* BPABI targets use linker tricks to allow interworking on cores
1497 without thumb support. */
1498 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1500 warning (0, "target CPU does not support interworking" );
1501 target_flags &= ~MASK_INTERWORK;
1504 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1506 warning (0, "target CPU does not support THUMB instructions");
1507 target_flags &= ~MASK_THUMB;
1510 if (TARGET_APCS_FRAME && TARGET_THUMB)
1512 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1513 target_flags &= ~MASK_APCS_FRAME;
1516 /* Callee super interworking implies thumb interworking. Adding
1517 this to the flags here simplifies the logic elsewhere. */
1518 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1519 target_flags |= MASK_INTERWORK;
1521 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1522 from here where no function is being compiled currently. */
1523 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1524 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1526 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1527 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1529 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1530 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1532 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1534 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1535 target_flags |= MASK_APCS_FRAME;
1538 if (TARGET_POKE_FUNCTION_NAME)
1539 target_flags |= MASK_APCS_FRAME;
1541 if (TARGET_APCS_REENT && flag_pic)
1542 error ("-fpic and -mapcs-reent are incompatible");
1544 if (TARGET_APCS_REENT)
1545 warning (0, "APCS reentrant code not supported. Ignored");
1547 /* If this target is normally configured to use APCS frames, warn if they
1548 are turned off and debugging is turned on. */
1550 && write_symbols != NO_DEBUG
1551 && !TARGET_APCS_FRAME
1552 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1553 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1555 if (TARGET_APCS_FLOAT)
1556 warning (0, "passing floating point arguments in fp regs not yet supported");
1558 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1559 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1560 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1561 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1562 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1563 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1564 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1565 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1566 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1567 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1568 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1569 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1570 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1572 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1573 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1574 thumb_code = (TARGET_ARM == 0);
1575 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1576 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1577 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1578 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1579 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1581 /* If we are not using the default (ARM mode) section anchor offset
1582 ranges, then set the correct ranges now. */
1585 /* Thumb-1 LDR instructions cannot have negative offsets.
1586 Permissible positive offset ranges are 5-bit (for byte loads),
1587 6-bit (for halfword loads), or 7-bit (for word loads).
1588 Empirical results suggest a 7-bit anchor range gives the best
1589 overall code size. */
1590 targetm.min_anchor_offset = 0;
1591 targetm.max_anchor_offset = 127;
1593 else if (TARGET_THUMB2)
1595 /* The minimum is set such that the total size of the block
1596 for a particular anchor is 248 + 1 + 4095 bytes, which is
1597 divisible by eight, ensuring natural spacing of anchors. */
1598 targetm.min_anchor_offset = -248;
1599 targetm.max_anchor_offset = 4095;
1602 /* V5 code we generate is completely interworking capable, so we turn off
1603 TARGET_INTERWORK here to avoid many tests later on. */
1605 /* XXX However, we must pass the right pre-processor defines to CPP
1606 or GLD can get confused. This is a hack. */
1607 if (TARGET_INTERWORK)
1608 arm_cpp_interwork = 1;
1611 target_flags &= ~MASK_INTERWORK;
1613 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1614 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1616 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1617 error ("iwmmxt abi requires an iwmmxt capable cpu");
1619 if (target_fpu_name == NULL && target_fpe_name != NULL)
1621 if (streq (target_fpe_name, "2"))
1622 target_fpu_name = "fpe2";
1623 else if (streq (target_fpe_name, "3"))
1624 target_fpu_name = "fpe3";
1626 error ("invalid floating point emulation option: -mfpe=%s",
1630 if (target_fpu_name == NULL)
1632 #ifdef FPUTYPE_DEFAULT
1633 target_fpu_name = FPUTYPE_DEFAULT;
1635 if (arm_arch_cirrus)
1636 target_fpu_name = "maverick";
1638 target_fpu_name = "fpe2";
1642 arm_fpu_desc = NULL;
1643 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1645 if (streq (all_fpus[i].name, target_fpu_name))
1647 arm_fpu_desc = &all_fpus[i];
1654 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1658 switch (arm_fpu_desc->model)
1660 case ARM_FP_MODEL_FPA:
1661 if (arm_fpu_desc->rev == 2)
1662 arm_fpu_attr = FPU_FPE2;
1663 else if (arm_fpu_desc->rev == 3)
1664 arm_fpu_attr = FPU_FPE3;
1666 arm_fpu_attr = FPU_FPA;
1669 case ARM_FP_MODEL_MAVERICK:
1670 arm_fpu_attr = FPU_MAVERICK;
1673 case ARM_FP_MODEL_VFP:
1674 arm_fpu_attr = FPU_VFP;
1681 if (target_float_abi_name != NULL)
1683 /* The user specified a FP ABI. */
1684 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1686 if (streq (all_float_abis[i].name, target_float_abi_name))
1688 arm_float_abi = all_float_abis[i].abi_type;
1692 if (i == ARRAY_SIZE (all_float_abis))
1693 error ("invalid floating point abi: -mfloat-abi=%s",
1694 target_float_abi_name);
1697 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1699 if (TARGET_AAPCS_BASED
1700 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1701 error ("FPA is unsupported in the AAPCS");
1703 if (TARGET_AAPCS_BASED)
1705 if (TARGET_CALLER_INTERWORKING)
1706 error ("AAPCS does not support -mcaller-super-interworking");
1708 if (TARGET_CALLEE_INTERWORKING)
1709 error ("AAPCS does not support -mcallee-super-interworking");
1712 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1713 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1714 will ever exist. GCC makes no attempt to support this combination. */
1715 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1716 sorry ("iWMMXt and hardware floating point");
1718 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1719 if (TARGET_THUMB2 && TARGET_IWMMXT)
1720 sorry ("Thumb-2 iWMMXt");
1722 /* __fp16 support currently assumes the core has ldrh. */
1723 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1724 sorry ("__fp16 and no ldrh");
1726 /* If soft-float is specified then don't use FPU. */
1727 if (TARGET_SOFT_FLOAT)
1728 arm_fpu_attr = FPU_NONE;
1730 if (TARGET_AAPCS_BASED)
1732 if (arm_abi == ARM_ABI_IWMMXT)
1733 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1734 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1735 && TARGET_HARD_FLOAT
1737 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1739 arm_pcs_default = ARM_PCS_AAPCS;
1743 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1744 sorry ("-mfloat-abi=hard and VFP");
1746 if (arm_abi == ARM_ABI_APCS)
1747 arm_pcs_default = ARM_PCS_APCS;
1749 arm_pcs_default = ARM_PCS_ATPCS;
1752 /* For arm2/3 there is no need to do any scheduling if there is only
1753 a floating point emulator, or we are doing software floating-point. */
1754 if ((TARGET_SOFT_FLOAT
1755 || (TARGET_FPA && arm_fpu_desc->rev))
1756 && (tune_flags & FL_MODE32) == 0)
1757 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1759 if (target_thread_switch)
1761 if (strcmp (target_thread_switch, "soft") == 0)
1762 target_thread_pointer = TP_SOFT;
1763 else if (strcmp (target_thread_switch, "auto") == 0)
1764 target_thread_pointer = TP_AUTO;
1765 else if (strcmp (target_thread_switch, "cp15") == 0)
1766 target_thread_pointer = TP_CP15;
1768 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1771 /* Use the cp15 method if it is available. */
1772 if (target_thread_pointer == TP_AUTO)
1774 if (arm_arch6k && !TARGET_THUMB1)
1775 target_thread_pointer = TP_CP15;
1777 target_thread_pointer = TP_SOFT;
1780 if (TARGET_HARD_TP && TARGET_THUMB1)
1781 error ("can not use -mtp=cp15 with 16-bit Thumb");
1783 /* Override the default structure alignment for AAPCS ABI. */
1784 if (TARGET_AAPCS_BASED)
1785 arm_structure_size_boundary = 8;
1787 if (structure_size_string != NULL)
1789 int size = strtol (structure_size_string, NULL, 0);
1791 if (size == 8 || size == 32
1792 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1793 arm_structure_size_boundary = size;
1795 warning (0, "structure size boundary can only be set to %s",
1796 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1799 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1801 error ("RTP PIC is incompatible with Thumb");
1805 /* If stack checking is disabled, we can use r10 as the PIC register,
1806 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1807 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1809 if (TARGET_VXWORKS_RTP)
1810 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1811 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1814 if (flag_pic && TARGET_VXWORKS_RTP)
1815 arm_pic_register = 9;
1817 if (arm_pic_register_string != NULL)
1819 int pic_register = decode_reg_name (arm_pic_register_string);
1822 warning (0, "-mpic-register= is useless without -fpic");
1824 /* Prevent the user from choosing an obviously stupid PIC register. */
1825 else if (pic_register < 0 || call_used_regs[pic_register]
1826 || pic_register == HARD_FRAME_POINTER_REGNUM
1827 || pic_register == STACK_POINTER_REGNUM
1828 || pic_register >= PC_REGNUM
1829 || (TARGET_VXWORKS_RTP
1830 && (unsigned int) pic_register != arm_pic_register))
1831 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1833 arm_pic_register = pic_register;
1836 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1837 if (fix_cm3_ldrd == 2)
1839 if (arm_selected_cpu->core == cortexm3)
1845 if (TARGET_THUMB1 && flag_schedule_insns)
1847 /* Don't warn since it's on by default in -O2. */
1848 flag_schedule_insns = 0;
1853 /* If optimizing for size, bump the number of instructions that we
1854 are prepared to conditionally execute (even on a StrongARM). */
1855 max_insns_skipped = 6;
1859 /* StrongARM has early execution of branches, so a sequence
1860 that is worth skipping is shorter. */
1861 if (arm_tune_strongarm)
1862 max_insns_skipped = 3;
1865 /* Hot/Cold partitioning is not currently supported, since we can't
1866 handle literal pool placement in that case. */
1867 if (flag_reorder_blocks_and_partition)
1869 inform (input_location,
1870 "-freorder-blocks-and-partition not supported on this architecture");
1871 flag_reorder_blocks_and_partition = 0;
1872 flag_reorder_blocks = 1;
1875 /* Register global variables with the garbage collector. */
1876 arm_add_gc_roots ();
1880 arm_add_gc_roots (void)
1882 gcc_obstack_init(&minipool_obstack);
1883 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1886 /* A table of known ARM exception types.
1887 For use with the interrupt function attribute. */
1891 const char *const arg;
1892 const unsigned long return_value;
1896 static const isr_attribute_arg isr_attribute_args [] =
1898 { "IRQ", ARM_FT_ISR },
1899 { "irq", ARM_FT_ISR },
1900 { "FIQ", ARM_FT_FIQ },
1901 { "fiq", ARM_FT_FIQ },
1902 { "ABORT", ARM_FT_ISR },
1903 { "abort", ARM_FT_ISR },
1904 { "ABORT", ARM_FT_ISR },
1905 { "abort", ARM_FT_ISR },
1906 { "UNDEF", ARM_FT_EXCEPTION },
1907 { "undef", ARM_FT_EXCEPTION },
1908 { "SWI", ARM_FT_EXCEPTION },
1909 { "swi", ARM_FT_EXCEPTION },
1910 { NULL, ARM_FT_NORMAL }
1913 /* Returns the (interrupt) function type of the current
1914 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1916 static unsigned long
1917 arm_isr_value (tree argument)
1919 const isr_attribute_arg * ptr;
1923 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1925 /* No argument - default to IRQ. */
1926 if (argument == NULL_TREE)
1929 /* Get the value of the argument. */
1930 if (TREE_VALUE (argument) == NULL_TREE
1931 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1932 return ARM_FT_UNKNOWN;
1934 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1936 /* Check it against the list of known arguments. */
1937 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1938 if (streq (arg, ptr->arg))
1939 return ptr->return_value;
1941 /* An unrecognized interrupt type. */
1942 return ARM_FT_UNKNOWN;
1945 /* Computes the type of the current function. */
1947 static unsigned long
1948 arm_compute_func_type (void)
1950 unsigned long type = ARM_FT_UNKNOWN;
1954 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1956 /* Decide if the current function is volatile. Such functions
1957 never return, and many memory cycles can be saved by not storing
1958 register values that will never be needed again. This optimization
1959 was added to speed up context switching in a kernel application. */
1961 && (TREE_NOTHROW (current_function_decl)
1962 || !(flag_unwind_tables
1963 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1964 && TREE_THIS_VOLATILE (current_function_decl))
1965 type |= ARM_FT_VOLATILE;
1967 if (cfun->static_chain_decl != NULL)
1968 type |= ARM_FT_NESTED;
1970 attr = DECL_ATTRIBUTES (current_function_decl);
1972 a = lookup_attribute ("naked", attr);
1974 type |= ARM_FT_NAKED;
1976 a = lookup_attribute ("isr", attr);
1978 a = lookup_attribute ("interrupt", attr);
1981 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1983 type |= arm_isr_value (TREE_VALUE (a));
1988 /* Returns the type of the current function. */
1991 arm_current_func_type (void)
1993 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1994 cfun->machine->func_type = arm_compute_func_type ();
1996 return cfun->machine->func_type;
2000 arm_allocate_stack_slots_for_args (void)
2002 /* Naked functions should not allocate stack slots for arguments. */
2003 return !IS_NAKED (arm_current_func_type ());
2007 /* Output assembler code for a block containing the constant parts
2008 of a trampoline, leaving space for the variable parts.
2010 On the ARM, (if r8 is the static chain regnum, and remembering that
2011 referencing pc adds an offset of 8) the trampoline looks like:
2014 .word static chain value
2015 .word function's address
2016 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2019 arm_asm_trampoline_template (FILE *f)
2023 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2024 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2026 else if (TARGET_THUMB2)
2028 /* The Thumb-2 trampoline is similar to the arm implementation.
2029 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2030 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2031 STATIC_CHAIN_REGNUM, PC_REGNUM);
2032 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2036 ASM_OUTPUT_ALIGN (f, 2);
2037 fprintf (f, "\t.code\t16\n");
2038 fprintf (f, ".Ltrampoline_start:\n");
2039 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2040 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2041 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2042 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2043 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2044 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2046 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2047 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2050 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2053 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2055 rtx fnaddr, mem, a_tramp;
2057 emit_block_move (m_tramp, assemble_trampoline_template (),
2058 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2060 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2061 emit_move_insn (mem, chain_value);
2063 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2064 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2065 emit_move_insn (mem, fnaddr);
2067 a_tramp = XEXP (m_tramp, 0);
2068 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2069 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2070 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2073 /* Thumb trampolines should be entered in thumb mode, so set
2074 the bottom bit of the address. */
2077 arm_trampoline_adjust_address (rtx addr)
2080 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2081 NULL, 0, OPTAB_LIB_WIDEN);
2085 /* Return 1 if it is possible to return using a single instruction.
2086 If SIBLING is non-null, this is a test for a return before a sibling
2087 call. SIBLING is the call insn, so we can examine its register usage. */
2090 use_return_insn (int iscond, rtx sibling)
2093 unsigned int func_type;
2094 unsigned long saved_int_regs;
2095 unsigned HOST_WIDE_INT stack_adjust;
2096 arm_stack_offsets *offsets;
2098 /* Never use a return instruction before reload has run. */
2099 if (!reload_completed)
2102 func_type = arm_current_func_type ();
2104 /* Naked, volatile and stack alignment functions need special
2106 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2109 /* So do interrupt functions that use the frame pointer and Thumb
2110 interrupt functions. */
2111 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2114 offsets = arm_get_frame_offsets ();
2115 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2117 /* As do variadic functions. */
2118 if (crtl->args.pretend_args_size
2119 || cfun->machine->uses_anonymous_args
2120 /* Or if the function calls __builtin_eh_return () */
2121 || crtl->calls_eh_return
2122 /* Or if the function calls alloca */
2123 || cfun->calls_alloca
2124 /* Or if there is a stack adjustment. However, if the stack pointer
2125 is saved on the stack, we can use a pre-incrementing stack load. */
2126 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2127 && stack_adjust == 4)))
2130 saved_int_regs = offsets->saved_regs_mask;
2132 /* Unfortunately, the insn
2134 ldmib sp, {..., sp, ...}
2136 triggers a bug on most SA-110 based devices, such that the stack
2137 pointer won't be correctly restored if the instruction takes a
2138 page fault. We work around this problem by popping r3 along with
2139 the other registers, since that is never slower than executing
2140 another instruction.
2142 We test for !arm_arch5 here, because code for any architecture
2143 less than this could potentially be run on one of the buggy
2145 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2147 /* Validate that r3 is a call-clobbered register (always true in
2148 the default abi) ... */
2149 if (!call_used_regs[3])
2152 /* ... that it isn't being used for a return value ... */
2153 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2156 /* ... or for a tail-call argument ... */
2159 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2161 if (find_regno_fusage (sibling, USE, 3))
2165 /* ... and that there are no call-saved registers in r0-r2
2166 (always true in the default ABI). */
2167 if (saved_int_regs & 0x7)
2171 /* Can't be done if interworking with Thumb, and any registers have been
2173 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2176 /* On StrongARM, conditional returns are expensive if they aren't
2177 taken and multiple registers have been stacked. */
2178 if (iscond && arm_tune_strongarm)
2180 /* Conditional return when just the LR is stored is a simple
2181 conditional-load instruction, that's not expensive. */
2182 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2186 && arm_pic_register != INVALID_REGNUM
2187 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2191 /* If there are saved registers but the LR isn't saved, then we need
2192 two instructions for the return. */
2193 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2196 /* Can't be done if any of the FPA regs are pushed,
2197 since this also requires an insn. */
2198 if (TARGET_HARD_FLOAT && TARGET_FPA)
2199 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2200 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2203 /* Likewise VFP regs. */
2204 if (TARGET_HARD_FLOAT && TARGET_VFP)
2205 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2206 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2209 if (TARGET_REALLY_IWMMXT)
2210 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2211 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2217 /* Return TRUE if int I is a valid immediate ARM constant. */
2220 const_ok_for_arm (HOST_WIDE_INT i)
2224 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2225 be all zero, or all one. */
2226 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2227 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2228 != ((~(unsigned HOST_WIDE_INT) 0)
2229 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2232 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2234 /* Fast return for 0 and small values. We must do this for zero, since
2235 the code below can't handle that one case. */
2236 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2239 /* Get the number of trailing zeros. */
2240 lowbit = ffs((int) i) - 1;
2242 /* Only even shifts are allowed in ARM mode so round down to the
2243 nearest even number. */
2247 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2252 /* Allow rotated constants in ARM mode. */
2254 && ((i & ~0xc000003f) == 0
2255 || (i & ~0xf000000f) == 0
2256 || (i & ~0xfc000003) == 0))
2263 /* Allow repeated pattern. */
2266 if (i == v || i == (v | (v << 8)))
2273 /* Return true if I is a valid constant for the operation CODE. */
2275 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2277 if (const_ok_for_arm (i))
2301 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2303 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2309 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2313 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2320 /* Emit a sequence of insns to handle a large constant.
2321 CODE is the code of the operation required, it can be any of SET, PLUS,
2322 IOR, AND, XOR, MINUS;
2323 MODE is the mode in which the operation is being performed;
2324 VAL is the integer to operate on;
2325 SOURCE is the other operand (a register, or a null-pointer for SET);
2326 SUBTARGETS means it is safe to create scratch registers if that will
2327 either produce a simpler sequence, or we will want to cse the values.
2328 Return value is the number of insns emitted. */
2330 /* ??? Tweak this for thumb2. */
2332 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2333 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2337 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2338 cond = COND_EXEC_TEST (PATTERN (insn));
2342 if (subtargets || code == SET
2343 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2344 && REGNO (target) != REGNO (source)))
2346 /* After arm_reorg has been called, we can't fix up expensive
2347 constants by pushing them into memory so we must synthesize
2348 them in-line, regardless of the cost. This is only likely to
2349 be more costly on chips that have load delay slots and we are
2350 compiling without running the scheduler (so no splitting
2351 occurred before the final instruction emission).
2353 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2355 if (!after_arm_reorg
2357 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2359 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2364 /* Currently SET is the only monadic value for CODE, all
2365 the rest are diadic. */
2366 if (TARGET_USE_MOVT)
2367 arm_emit_movpair (target, GEN_INT (val));
2369 emit_set_insn (target, GEN_INT (val));
2375 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2377 if (TARGET_USE_MOVT)
2378 arm_emit_movpair (temp, GEN_INT (val));
2380 emit_set_insn (temp, GEN_INT (val));
2382 /* For MINUS, the value is subtracted from, since we never
2383 have subtraction of a constant. */
2385 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2387 emit_set_insn (target,
2388 gen_rtx_fmt_ee (code, mode, source, temp));
2394 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2398 /* Return the number of instructions required to synthesize the given
2399 constant, if we start emitting them from bit-position I. */
2401 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2403 HOST_WIDE_INT temp1;
2404 int step_size = TARGET_ARM ? 2 : 1;
2407 gcc_assert (TARGET_ARM || i == 0);
2415 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2420 temp1 = remainder & ((0x0ff << end)
2421 | ((i < end) ? (0xff >> (32 - end)) : 0));
2422 remainder &= ~temp1;
2427 } while (remainder);
2432 find_best_start (unsigned HOST_WIDE_INT remainder)
2434 int best_consecutive_zeros = 0;
2438 /* If we aren't targetting ARM, the best place to start is always at
2443 for (i = 0; i < 32; i += 2)
2445 int consecutive_zeros = 0;
2447 if (!(remainder & (3 << i)))
2449 while ((i < 32) && !(remainder & (3 << i)))
2451 consecutive_zeros += 2;
2454 if (consecutive_zeros > best_consecutive_zeros)
2456 best_consecutive_zeros = consecutive_zeros;
2457 best_start = i - consecutive_zeros;
2463 /* So long as it won't require any more insns to do so, it's
2464 desirable to emit a small constant (in bits 0...9) in the last
2465 insn. This way there is more chance that it can be combined with
2466 a later addressing insn to form a pre-indexed load or store
2467 operation. Consider:
2469 *((volatile int *)0xe0000100) = 1;
2470 *((volatile int *)0xe0000110) = 2;
2472 We want this to wind up as:
2476 str rB, [rA, #0x100]
2478 str rB, [rA, #0x110]
2480 rather than having to synthesize both large constants from scratch.
2482 Therefore, we calculate how many insns would be required to emit
2483 the constant starting from `best_start', and also starting from
2484 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2485 yield a shorter sequence, we may as well use zero. */
2487 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2488 && (count_insns_for_constant (remainder, 0) <=
2489 count_insns_for_constant (remainder, best_start)))
2495 /* Emit an instruction with the indicated PATTERN. If COND is
2496 non-NULL, conditionalize the execution of the instruction on COND
2500 emit_constant_insn (rtx cond, rtx pattern)
2503 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2504 emit_insn (pattern);
2507 /* As above, but extra parameter GENERATE which, if clear, suppresses
2509 /* ??? This needs more work for thumb2. */
2512 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2513 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2518 int final_invert = 0;
2519 int can_negate_initial = 0;
2521 int num_bits_set = 0;
2522 int set_sign_bit_copies = 0;
2523 int clear_sign_bit_copies = 0;
2524 int clear_zero_bit_copies = 0;
2525 int set_zero_bit_copies = 0;
2527 unsigned HOST_WIDE_INT temp1, temp2;
2528 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2529 int step_size = TARGET_ARM ? 2 : 1;
2531 /* Find out which operations are safe for a given CODE. Also do a quick
2532 check for degenerate cases; these can occur when DImode operations
2543 can_negate_initial = 1;
2547 if (remainder == 0xffffffff)
2550 emit_constant_insn (cond,
2551 gen_rtx_SET (VOIDmode, target,
2552 GEN_INT (ARM_SIGN_EXTEND (val))));
2558 if (reload_completed && rtx_equal_p (target, source))
2562 emit_constant_insn (cond,
2563 gen_rtx_SET (VOIDmode, target, source));
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target, const0_rtx));
2579 if (remainder == 0xffffffff)
2581 if (reload_completed && rtx_equal_p (target, source))
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target, source));
2594 if (reload_completed && rtx_equal_p (target, source))
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, target, source));
2602 if (remainder == 0xffffffff)
2605 emit_constant_insn (cond,
2606 gen_rtx_SET (VOIDmode, target,
2607 gen_rtx_NOT (mode, source)));
2613 /* We treat MINUS as (val - source), since (source - val) is always
2614 passed as (source + (-val)). */
2618 emit_constant_insn (cond,
2619 gen_rtx_SET (VOIDmode, target,
2620 gen_rtx_NEG (mode, source)));
2623 if (const_ok_for_arm (val))
2626 emit_constant_insn (cond,
2627 gen_rtx_SET (VOIDmode, target,
2628 gen_rtx_MINUS (mode, GEN_INT (val),
2640 /* If we can do it in one insn get out quickly. */
2641 if (const_ok_for_arm (val)
2642 || (can_negate_initial && const_ok_for_arm (-val))
2643 || (can_invert && const_ok_for_arm (~val)))
2646 emit_constant_insn (cond,
2647 gen_rtx_SET (VOIDmode, target,
2649 ? gen_rtx_fmt_ee (code, mode, source,
2655 /* Calculate a few attributes that may be useful for specific
2657 /* Count number of leading zeros. */
2658 for (i = 31; i >= 0; i--)
2660 if ((remainder & (1 << i)) == 0)
2661 clear_sign_bit_copies++;
2666 /* Count number of leading 1's. */
2667 for (i = 31; i >= 0; i--)
2669 if ((remainder & (1 << i)) != 0)
2670 set_sign_bit_copies++;
2675 /* Count number of trailing zero's. */
2676 for (i = 0; i <= 31; i++)
2678 if ((remainder & (1 << i)) == 0)
2679 clear_zero_bit_copies++;
2684 /* Count number of trailing 1's. */
2685 for (i = 0; i <= 31; i++)
2687 if ((remainder & (1 << i)) != 0)
2688 set_zero_bit_copies++;
2696 /* See if we can use movw. */
2697 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2700 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2705 /* See if we can do this by sign_extending a constant that is known
2706 to be negative. This is a good, way of doing it, since the shift
2707 may well merge into a subsequent insn. */
2708 if (set_sign_bit_copies > 1)
2710 if (const_ok_for_arm
2711 (temp1 = ARM_SIGN_EXTEND (remainder
2712 << (set_sign_bit_copies - 1))))
2716 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2717 emit_constant_insn (cond,
2718 gen_rtx_SET (VOIDmode, new_src,
2720 emit_constant_insn (cond,
2721 gen_ashrsi3 (target, new_src,
2722 GEN_INT (set_sign_bit_copies - 1)));
2726 /* For an inverted constant, we will need to set the low bits,
2727 these will be shifted out of harm's way. */
2728 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2729 if (const_ok_for_arm (~temp1))
2733 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2734 emit_constant_insn (cond,
2735 gen_rtx_SET (VOIDmode, new_src,
2737 emit_constant_insn (cond,
2738 gen_ashrsi3 (target, new_src,
2739 GEN_INT (set_sign_bit_copies - 1)));
2745 /* See if we can calculate the value as the difference between two
2746 valid immediates. */
2747 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2749 int topshift = clear_sign_bit_copies & ~1;
2751 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2752 & (0xff000000 >> topshift));
2754 /* If temp1 is zero, then that means the 9 most significant
2755 bits of remainder were 1 and we've caused it to overflow.
2756 When topshift is 0 we don't need to do anything since we
2757 can borrow from 'bit 32'. */
2758 if (temp1 == 0 && topshift != 0)
2759 temp1 = 0x80000000 >> (topshift - 1);
2761 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2763 if (const_ok_for_arm (temp2))
2767 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2768 emit_constant_insn (cond,
2769 gen_rtx_SET (VOIDmode, new_src,
2771 emit_constant_insn (cond,
2772 gen_addsi3 (target, new_src,
2780 /* See if we can generate this by setting the bottom (or the top)
2781 16 bits, and then shifting these into the other half of the
2782 word. We only look for the simplest cases, to do more would cost
2783 too much. Be careful, however, not to generate this when the
2784 alternative would take fewer insns. */
2785 if (val & 0xffff0000)
2787 temp1 = remainder & 0xffff0000;
2788 temp2 = remainder & 0x0000ffff;
2790 /* Overlaps outside this range are best done using other methods. */
2791 for (i = 9; i < 24; i++)
2793 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2794 && !const_ok_for_arm (temp2))
2796 rtx new_src = (subtargets
2797 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2799 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2800 source, subtargets, generate);
2808 gen_rtx_ASHIFT (mode, source,
2815 /* Don't duplicate cases already considered. */
2816 for (i = 17; i < 24; i++)
2818 if (((temp1 | (temp1 >> i)) == remainder)
2819 && !const_ok_for_arm (temp1))
2821 rtx new_src = (subtargets
2822 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2824 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2825 source, subtargets, generate);
2830 gen_rtx_SET (VOIDmode, target,
2833 gen_rtx_LSHIFTRT (mode, source,
2844 /* If we have IOR or XOR, and the constant can be loaded in a
2845 single instruction, and we can find a temporary to put it in,
2846 then this can be done in two instructions instead of 3-4. */
2848 /* TARGET can't be NULL if SUBTARGETS is 0 */
2849 || (reload_completed && !reg_mentioned_p (target, source)))
2851 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2855 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2857 emit_constant_insn (cond,
2858 gen_rtx_SET (VOIDmode, sub,
2860 emit_constant_insn (cond,
2861 gen_rtx_SET (VOIDmode, target,
2862 gen_rtx_fmt_ee (code, mode,
2873 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2874 and the remainder 0s for e.g. 0xfff00000)
2875 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2877 This can be done in 2 instructions by using shifts with mov or mvn.
2882 mvn r0, r0, lsr #12 */
2883 if (set_sign_bit_copies > 8
2884 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2888 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2889 rtx shift = GEN_INT (set_sign_bit_copies);
2893 gen_rtx_SET (VOIDmode, sub,
2895 gen_rtx_ASHIFT (mode,
2900 gen_rtx_SET (VOIDmode, target,
2902 gen_rtx_LSHIFTRT (mode, sub,
2909 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2911 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2913 For eg. r0 = r0 | 0xfff
2918 if (set_zero_bit_copies > 8
2919 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2923 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2924 rtx shift = GEN_INT (set_zero_bit_copies);
2928 gen_rtx_SET (VOIDmode, sub,
2930 gen_rtx_LSHIFTRT (mode,
2935 gen_rtx_SET (VOIDmode, target,
2937 gen_rtx_ASHIFT (mode, sub,
2943 /* This will never be reached for Thumb2 because orn is a valid
2944 instruction. This is for Thumb1 and the ARM 32 bit cases.
2946 x = y | constant (such that ~constant is a valid constant)
2948 x = ~(~y & ~constant).
2950 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2954 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2955 emit_constant_insn (cond,
2956 gen_rtx_SET (VOIDmode, sub,
2957 gen_rtx_NOT (mode, source)));
2960 sub = gen_reg_rtx (mode);
2961 emit_constant_insn (cond,
2962 gen_rtx_SET (VOIDmode, sub,
2963 gen_rtx_AND (mode, source,
2965 emit_constant_insn (cond,
2966 gen_rtx_SET (VOIDmode, target,
2967 gen_rtx_NOT (mode, sub)));
2974 /* See if two shifts will do 2 or more insn's worth of work. */
2975 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2977 HOST_WIDE_INT shift_mask = ((0xffffffff
2978 << (32 - clear_sign_bit_copies))
2981 if ((remainder | shift_mask) != 0xffffffff)
2985 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2986 insns = arm_gen_constant (AND, mode, cond,
2987 remainder | shift_mask,
2988 new_src, source, subtargets, 1);
2993 rtx targ = subtargets ? NULL_RTX : target;
2994 insns = arm_gen_constant (AND, mode, cond,
2995 remainder | shift_mask,
2996 targ, source, subtargets, 0);
3002 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3003 rtx shift = GEN_INT (clear_sign_bit_copies);
3005 emit_insn (gen_ashlsi3 (new_src, source, shift));
3006 emit_insn (gen_lshrsi3 (target, new_src, shift));
3012 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3014 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3016 if ((remainder | shift_mask) != 0xffffffff)
3020 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 insns = arm_gen_constant (AND, mode, cond,
3023 remainder | shift_mask,
3024 new_src, source, subtargets, 1);
3029 rtx targ = subtargets ? NULL_RTX : target;
3031 insns = arm_gen_constant (AND, mode, cond,
3032 remainder | shift_mask,
3033 targ, source, subtargets, 0);
3039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3040 rtx shift = GEN_INT (clear_zero_bit_copies);
3042 emit_insn (gen_lshrsi3 (new_src, source, shift));
3043 emit_insn (gen_ashlsi3 (target, new_src, shift));
3055 for (i = 0; i < 32; i++)
3056 if (remainder & (1 << i))
3060 || (code != IOR && can_invert && num_bits_set > 16))
3061 remainder ^= 0xffffffff;
3062 else if (code == PLUS && num_bits_set > 16)
3063 remainder = (-remainder) & 0xffffffff;
3065 /* For XOR, if more than half the bits are set and there's a sequence
3066 of more than 8 consecutive ones in the pattern then we can XOR by the
3067 inverted constant and then invert the final result; this may save an
3068 instruction and might also lead to the final mvn being merged with
3069 some other operation. */
3070 else if (code == XOR && num_bits_set > 16
3071 && (count_insns_for_constant (remainder ^ 0xffffffff,
3073 (remainder ^ 0xffffffff))
3074 < count_insns_for_constant (remainder,
3075 find_best_start (remainder))))
3077 remainder ^= 0xffffffff;
3086 /* Now try and find a way of doing the job in either two or three
3088 We start by looking for the largest block of zeros that are aligned on
3089 a 2-bit boundary, we then fill up the temps, wrapping around to the
3090 top of the word when we drop off the bottom.
3091 In the worst case this code should produce no more than four insns.
3092 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3093 best place to start. */
3095 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3098 /* Now start emitting the insns. */
3099 i = find_best_start (remainder);
3106 if (remainder & (3 << (i - 2)))
3111 temp1 = remainder & ((0x0ff << end)
3112 | ((i < end) ? (0xff >> (32 - end)) : 0));
3113 remainder &= ~temp1;
3117 rtx new_src, temp1_rtx;
3119 if (code == SET || code == MINUS)
3121 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3122 if (can_invert && code != MINUS)
3127 if ((final_invert || remainder) && subtargets)
3128 new_src = gen_reg_rtx (mode);
3133 else if (can_negate)
3137 temp1 = trunc_int_for_mode (temp1, mode);
3138 temp1_rtx = GEN_INT (temp1);
3142 else if (code == MINUS)
3143 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3145 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, new_src,
3158 else if (code == MINUS)
3164 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3174 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3175 gen_rtx_NOT (mode, source)));
3182 /* Canonicalize a comparison so that we are more likely to recognize it.
3183 This can be done for a few constant compares, where we can make the
3184 immediate value easier to load. */
3187 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3189 enum machine_mode mode;
3190 unsigned HOST_WIDE_INT i, maxval;
3192 mode = GET_MODE (*op0);
3193 if (mode == VOIDmode)
3194 mode = GET_MODE (*op1);
3196 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3198 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3199 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3200 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3201 for GTU/LEU in Thumb mode. */
3206 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3208 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3211 if (code == GT || code == LE
3212 || (!TARGET_ARM && (code == GTU || code == LEU)))
3214 /* Missing comparison. First try to use an available
3216 if (GET_CODE (*op1) == CONST_INT)
3224 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3226 *op1 = GEN_INT (i + 1);
3227 return code == GT ? GE : LT;
3232 if (i != ~((unsigned HOST_WIDE_INT) 0)
3233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3235 *op1 = GEN_INT (i + 1);
3236 return code == GTU ? GEU : LTU;
3244 /* If that did not work, reverse the condition. */
3248 return swap_condition (code);
3254 /* Comparisons smaller than DImode. Only adjust comparisons against
3255 an out-of-range constant. */
3256 if (GET_CODE (*op1) != CONST_INT
3257 || const_ok_for_arm (INTVAL (*op1))
3258 || const_ok_for_arm (- INTVAL (*op1)))
3272 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3274 *op1 = GEN_INT (i + 1);
3275 return code == GT ? GE : LT;
3282 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3284 *op1 = GEN_INT (i - 1);
3285 return code == GE ? GT : LE;
3291 if (i != ~((unsigned HOST_WIDE_INT) 0)
3292 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3294 *op1 = GEN_INT (i + 1);
3295 return code == GTU ? GEU : LTU;
3302 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3304 *op1 = GEN_INT (i - 1);
3305 return code == GEU ? GTU : LEU;
3317 /* Define how to find the value returned by a function. */
3320 arm_function_value(const_tree type, const_tree func,
3321 bool outgoing ATTRIBUTE_UNUSED)
3323 enum machine_mode mode;
3324 int unsignedp ATTRIBUTE_UNUSED;
3325 rtx r ATTRIBUTE_UNUSED;
3327 mode = TYPE_MODE (type);
3329 if (TARGET_AAPCS_BASED)
3330 return aapcs_allocate_return_reg (mode, type, func);
3332 /* Promote integer types. */
3333 if (INTEGRAL_TYPE_P (type))
3334 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3336 /* Promotes small structs returned in a register to full-word size
3337 for big-endian AAPCS. */
3338 if (arm_return_in_msb (type))
3340 HOST_WIDE_INT size = int_size_in_bytes (type);
3341 if (size % UNITS_PER_WORD != 0)
3343 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3344 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3348 return LIBCALL_VALUE (mode);
3352 libcall_eq (const void *p1, const void *p2)
3354 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3358 libcall_hash (const void *p1)
3360 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3364 add_libcall (htab_t htab, rtx libcall)
3366 *htab_find_slot (htab, libcall, INSERT) = libcall;
3370 arm_libcall_uses_aapcs_base (const_rtx libcall)
3372 static bool init_done = false;
3373 static htab_t libcall_htab;
3379 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3381 add_libcall (libcall_htab,
3382 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3383 add_libcall (libcall_htab,
3384 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3385 add_libcall (libcall_htab,
3386 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3387 add_libcall (libcall_htab,
3388 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3390 add_libcall (libcall_htab,
3391 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3392 add_libcall (libcall_htab,
3393 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3394 add_libcall (libcall_htab,
3395 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3396 add_libcall (libcall_htab,
3397 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3399 add_libcall (libcall_htab,
3400 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3401 add_libcall (libcall_htab,
3402 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3403 add_libcall (libcall_htab,
3404 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3405 add_libcall (libcall_htab,
3406 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3407 add_libcall (libcall_htab,
3408 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3409 add_libcall (libcall_htab,
3410 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3413 return libcall && htab_find (libcall_htab, libcall) != NULL;
3417 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3419 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3420 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3422 /* The following libcalls return their result in integer registers,
3423 even though they return a floating point value. */
3424 if (arm_libcall_uses_aapcs_base (libcall))
3425 return gen_rtx_REG (mode, ARG_REGISTER(1));
3429 return LIBCALL_VALUE (mode);
3432 /* Determine the amount of memory needed to store the possible return
3433 registers of an untyped call. */
3435 arm_apply_result_size (void)
3441 if (TARGET_HARD_FLOAT_ABI)
3447 if (TARGET_MAVERICK)
3450 if (TARGET_IWMMXT_ABI)
3457 /* Decide whether TYPE should be returned in memory (true)
3458 or in a register (false). FNTYPE is the type of the function making
3461 arm_return_in_memory (const_tree type, const_tree fntype)
3465 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3467 if (TARGET_AAPCS_BASED)
3469 /* Simple, non-aggregate types (ie not including vectors and
3470 complex) are always returned in a register (or registers).
3471 We don't care about which register here, so we can short-cut
3472 some of the detail. */
3473 if (!AGGREGATE_TYPE_P (type)
3474 && TREE_CODE (type) != VECTOR_TYPE
3475 && TREE_CODE (type) != COMPLEX_TYPE)
3478 /* Any return value that is no larger than one word can be
3480 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3483 /* Check any available co-processors to see if they accept the
3484 type as a register candidate (VFP, for example, can return
3485 some aggregates in consecutive registers). These aren't
3486 available if the call is variadic. */
3487 if (aapcs_select_return_coproc (type, fntype) >= 0)
3490 /* Vector values should be returned using ARM registers, not
3491 memory (unless they're over 16 bytes, which will break since
3492 we only have four call-clobbered registers to play with). */
3493 if (TREE_CODE (type) == VECTOR_TYPE)
3494 return (size < 0 || size > (4 * UNITS_PER_WORD));
3496 /* The rest go in memory. */
3500 if (TREE_CODE (type) == VECTOR_TYPE)
3501 return (size < 0 || size > (4 * UNITS_PER_WORD));
3503 if (!AGGREGATE_TYPE_P (type) &&
3504 (TREE_CODE (type) != VECTOR_TYPE))
3505 /* All simple types are returned in registers. */
3508 if (arm_abi != ARM_ABI_APCS)
3510 /* ATPCS and later return aggregate types in memory only if they are
3511 larger than a word (or are variable size). */
3512 return (size < 0 || size > UNITS_PER_WORD);
3515 /* For the arm-wince targets we choose to be compatible with Microsoft's
3516 ARM and Thumb compilers, which always return aggregates in memory. */
3518 /* All structures/unions bigger than one word are returned in memory.
3519 Also catch the case where int_size_in_bytes returns -1. In this case
3520 the aggregate is either huge or of variable size, and in either case
3521 we will want to return it via memory and not in a register. */
3522 if (size < 0 || size > UNITS_PER_WORD)
3525 if (TREE_CODE (type) == RECORD_TYPE)
3529 /* For a struct the APCS says that we only return in a register
3530 if the type is 'integer like' and every addressable element
3531 has an offset of zero. For practical purposes this means
3532 that the structure can have at most one non bit-field element
3533 and that this element must be the first one in the structure. */
3535 /* Find the first field, ignoring non FIELD_DECL things which will
3536 have been created by C++. */
3537 for (field = TYPE_FIELDS (type);
3538 field && TREE_CODE (field) != FIELD_DECL;
3539 field = TREE_CHAIN (field))
3543 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3545 /* Check that the first field is valid for returning in a register. */
3547 /* ... Floats are not allowed */
3548 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3551 /* ... Aggregates that are not themselves valid for returning in
3552 a register are not allowed. */
3553 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3556 /* Now check the remaining fields, if any. Only bitfields are allowed,
3557 since they are not addressable. */
3558 for (field = TREE_CHAIN (field);
3560 field = TREE_CHAIN (field))
3562 if (TREE_CODE (field) != FIELD_DECL)
3565 if (!DECL_BIT_FIELD_TYPE (field))
3572 if (TREE_CODE (type) == UNION_TYPE)
3576 /* Unions can be returned in registers if every element is
3577 integral, or can be returned in an integer register. */
3578 for (field = TYPE_FIELDS (type);
3580 field = TREE_CHAIN (field))
3582 if (TREE_CODE (field) != FIELD_DECL)
3585 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3588 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3594 #endif /* not ARM_WINCE */
3596 /* Return all other types in memory. */
3600 /* Indicate whether or not words of a double are in big-endian order. */
3603 arm_float_words_big_endian (void)
3605 if (TARGET_MAVERICK)
3608 /* For FPA, float words are always big-endian. For VFP, floats words
3609 follow the memory system mode. */
3617 return (TARGET_BIG_END ? 1 : 0);
3622 const struct pcs_attribute_arg
3626 } pcs_attribute_args[] =
3628 {"aapcs", ARM_PCS_AAPCS},
3629 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3631 /* We could recognize these, but changes would be needed elsewhere
3632 * to implement them. */
3633 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3634 {"atpcs", ARM_PCS_ATPCS},
3635 {"apcs", ARM_PCS_APCS},
3637 {NULL, ARM_PCS_UNKNOWN}
3641 arm_pcs_from_attribute (tree attr)
3643 const struct pcs_attribute_arg *ptr;
3646 /* Get the value of the argument. */
3647 if (TREE_VALUE (attr) == NULL_TREE
3648 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3649 return ARM_PCS_UNKNOWN;
3651 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3653 /* Check it against the list of known arguments. */
3654 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3655 if (streq (arg, ptr->arg))
3658 /* An unrecognized interrupt type. */
3659 return ARM_PCS_UNKNOWN;
3662 /* Get the PCS variant to use for this call. TYPE is the function's type
3663 specification, DECL is the specific declartion. DECL may be null if
3664 the call could be indirect or if this is a library call. */
3666 arm_get_pcs_model (const_tree type, const_tree decl)
3668 bool user_convention = false;
3669 enum arm_pcs user_pcs = arm_pcs_default;
3674 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3677 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3678 user_convention = true;
3681 if (TARGET_AAPCS_BASED)
3683 /* Detect varargs functions. These always use the base rules
3684 (no argument is ever a candidate for a co-processor
3686 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3687 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3688 != void_type_node));
3690 if (user_convention)
3692 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3693 sorry ("Non-AAPCS derived PCS variant");
3694 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3695 error ("Variadic functions must use the base AAPCS variant");
3699 return ARM_PCS_AAPCS;
3700 else if (user_convention)
3702 else if (decl && flag_unit_at_a_time)
3704 /* Local functions never leak outside this compilation unit,
3705 so we are free to use whatever conventions are
3707 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3708 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3710 return ARM_PCS_AAPCS_LOCAL;
3713 else if (user_convention && user_pcs != arm_pcs_default)
3714 sorry ("PCS variant");
3716 /* For everything else we use the target's default. */
3717 return arm_pcs_default;
3722 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3723 const_tree fntype ATTRIBUTE_UNUSED,
3724 rtx libcall ATTRIBUTE_UNUSED,
3725 const_tree fndecl ATTRIBUTE_UNUSED)
3727 /* Record the unallocated VFP registers. */
3728 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3729 pcum->aapcs_vfp_reg_alloc = 0;
3732 /* Walk down the type tree of TYPE counting consecutive base elements.
3733 If *MODEP is VOIDmode, then set it to the first valid floating point
3734 type. If a non-floating point type is found, or if a floating point
3735 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3736 otherwise return the count in the sub-tree. */
3738 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3740 enum machine_mode mode;
3743 switch (TREE_CODE (type))
3746 mode = TYPE_MODE (type);
3747 if (mode != DFmode && mode != SFmode)
3750 if (*modep == VOIDmode)
3759 mode = TYPE_MODE (TREE_TYPE (type));
3760 if (mode != DFmode && mode != SFmode)
3763 if (*modep == VOIDmode)
3772 /* Use V2SImode and V4SImode as representatives of all 64-bit
3773 and 128-bit vector types, whether or not those modes are
3774 supported with the present options. */
3775 size = int_size_in_bytes (type);
3788 if (*modep == VOIDmode)
3791 /* Vector modes are considered to be opaque: two vectors are
3792 equivalent for the purposes of being homogeneous aggregates
3793 if they are the same size. */
3802 tree index = TYPE_DOMAIN (type);
3804 /* Can't handle incomplete types. */
3805 if (!COMPLETE_TYPE_P(type))
3808 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3811 || !TYPE_MAX_VALUE (index)
3812 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3813 || !TYPE_MIN_VALUE (index)
3814 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3818 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3819 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3821 /* There must be no padding. */
3822 if (!host_integerp (TYPE_SIZE (type), 1)
3823 || (tree_low_cst (TYPE_SIZE (type), 1)
3824 != count * GET_MODE_BITSIZE (*modep)))
3836 /* Can't handle incomplete types. */
3837 if (!COMPLETE_TYPE_P(type))
3840 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3842 if (TREE_CODE (field) != FIELD_DECL)
3845 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3851 /* There must be no padding. */
3852 if (!host_integerp (TYPE_SIZE (type), 1)
3853 || (tree_low_cst (TYPE_SIZE (type), 1)
3854 != count * GET_MODE_BITSIZE (*modep)))
3861 case QUAL_UNION_TYPE:
3863 /* These aren't very interesting except in a degenerate case. */
3868 /* Can't handle incomplete types. */
3869 if (!COMPLETE_TYPE_P(type))
3872 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3874 if (TREE_CODE (field) != FIELD_DECL)
3877 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3880 count = count > sub_count ? count : sub_count;
3883 /* There must be no padding. */
3884 if (!host_integerp (TYPE_SIZE (type), 1)
3885 || (tree_low_cst (TYPE_SIZE (type), 1)
3886 != count * GET_MODE_BITSIZE (*modep)))
3899 /* Return true if PCS_VARIANT should use VFP registers. */
3901 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3903 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3905 static bool seen_thumb1_vfp = false;
3907 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3909 sorry ("Thumb-1 hard-float VFP ABI");
3910 /* sorry() is not immediately fatal, so only display this once. */
3911 seen_thumb1_vfp = true;
3917 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3920 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3921 (TARGET_VFP_DOUBLE || !is_double));
3925 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3926 enum machine_mode mode, const_tree type,
3927 enum machine_mode *base_mode, int *count)
3929 enum machine_mode new_mode = VOIDmode;
3931 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3932 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3933 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3938 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3941 new_mode = (mode == DCmode ? DFmode : SFmode);
3943 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3945 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3947 if (ag_count > 0 && ag_count <= 4)
3956 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3959 *base_mode = new_mode;
3964 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3965 enum machine_mode mode, const_tree type)
3967 int count ATTRIBUTE_UNUSED;
3968 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3970 if (!use_vfp_abi (pcs_variant, false))
3972 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3977 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3980 if (!use_vfp_abi (pcum->pcs_variant, false))
3983 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3984 &pcum->aapcs_vfp_rmode,
3985 &pcum->aapcs_vfp_rcount);
3989 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3990 const_tree type ATTRIBUTE_UNUSED)
3992 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3993 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3996 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3997 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3999 pcum->aapcs_vfp_reg_alloc = mask << regno;
4000 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4003 int rcount = pcum->aapcs_vfp_rcount;
4005 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4009 /* Avoid using unsupported vector modes. */
4010 if (rmode == V2SImode)
4012 else if (rmode == V4SImode)
4019 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4020 for (i = 0; i < rcount; i++)
4022 rtx tmp = gen_rtx_REG (rmode,
4023 FIRST_VFP_REGNUM + regno + i * rshift);
4024 tmp = gen_rtx_EXPR_LIST
4026 GEN_INT (i * GET_MODE_SIZE (rmode)));
4027 XVECEXP (par, 0, i) = tmp;
4030 pcum->aapcs_reg = par;
4033 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4040 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4041 enum machine_mode mode,
4042 const_tree type ATTRIBUTE_UNUSED)
4044 if (!use_vfp_abi (pcs_variant, false))
4047 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4050 enum machine_mode ag_mode;
4055 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4060 if (ag_mode == V2SImode)
4062 else if (ag_mode == V4SImode)
4068 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4069 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4070 for (i = 0; i < count; i++)
4072 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4073 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4074 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4075 XVECEXP (par, 0, i) = tmp;
4081 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4085 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4086 enum machine_mode mode ATTRIBUTE_UNUSED,
4087 const_tree type ATTRIBUTE_UNUSED)
4089 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4090 pcum->aapcs_vfp_reg_alloc = 0;
4094 #define AAPCS_CP(X) \
4096 aapcs_ ## X ## _cum_init, \
4097 aapcs_ ## X ## _is_call_candidate, \
4098 aapcs_ ## X ## _allocate, \
4099 aapcs_ ## X ## _is_return_candidate, \
4100 aapcs_ ## X ## _allocate_return_reg, \
4101 aapcs_ ## X ## _advance \
4104 /* Table of co-processors that can be used to pass arguments in
4105 registers. Idealy no arugment should be a candidate for more than
4106 one co-processor table entry, but the table is processed in order
4107 and stops after the first match. If that entry then fails to put
4108 the argument into a co-processor register, the argument will go on
4112 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4113 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4115 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4116 BLKmode) is a candidate for this co-processor's registers; this
4117 function should ignore any position-dependent state in
4118 CUMULATIVE_ARGS and only use call-type dependent information. */
4119 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4121 /* Return true if the argument does get a co-processor register; it
4122 should set aapcs_reg to an RTX of the register allocated as is
4123 required for a return from FUNCTION_ARG. */
4124 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4126 /* Return true if a result of mode MODE (or type TYPE if MODE is
4127 BLKmode) is can be returned in this co-processor's registers. */
4128 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4130 /* Allocate and return an RTX element to hold the return type of a
4131 call, this routine must not fail and will only be called if
4132 is_return_candidate returned true with the same parameters. */
4133 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4135 /* Finish processing this argument and prepare to start processing
4137 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4138 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4146 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4151 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4152 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4159 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4161 /* We aren't passed a decl, so we can't check that a call is local.
4162 However, it isn't clear that that would be a win anyway, since it
4163 might limit some tail-calling opportunities. */
4164 enum arm_pcs pcs_variant;
4168 const_tree fndecl = NULL_TREE;
4170 if (TREE_CODE (fntype) == FUNCTION_DECL)
4173 fntype = TREE_TYPE (fntype);
4176 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4179 pcs_variant = arm_pcs_default;
4181 if (pcs_variant != ARM_PCS_AAPCS)
4185 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4186 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4195 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4198 /* We aren't passed a decl, so we can't check that a call is local.
4199 However, it isn't clear that that would be a win anyway, since it
4200 might limit some tail-calling opportunities. */
4201 enum arm_pcs pcs_variant;
4202 int unsignedp ATTRIBUTE_UNUSED;
4206 const_tree fndecl = NULL_TREE;
4208 if (TREE_CODE (fntype) == FUNCTION_DECL)
4211 fntype = TREE_TYPE (fntype);
4214 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4217 pcs_variant = arm_pcs_default;
4219 /* Promote integer types. */
4220 if (type && INTEGRAL_TYPE_P (type))
4221 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4223 if (pcs_variant != ARM_PCS_AAPCS)
4227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4228 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4230 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4234 /* Promotes small structs returned in a register to full-word size
4235 for big-endian AAPCS. */
4236 if (type && arm_return_in_msb (type))
4238 HOST_WIDE_INT size = int_size_in_bytes (type);
4239 if (size % UNITS_PER_WORD != 0)
4241 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4242 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4246 return gen_rtx_REG (mode, R0_REGNUM);
4250 aapcs_libcall_value (enum machine_mode mode)
4252 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4255 /* Lay out a function argument using the AAPCS rules. The rule
4256 numbers referred to here are those in the AAPCS. */
4258 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4259 tree type, int named)
4264 /* We only need to do this once per argument. */
4265 if (pcum->aapcs_arg_processed)
4268 pcum->aapcs_arg_processed = true;
4270 /* Special case: if named is false then we are handling an incoming
4271 anonymous argument which is on the stack. */
4275 /* Is this a potential co-processor register candidate? */
4276 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4278 int slot = aapcs_select_call_coproc (pcum, mode, type);
4279 pcum->aapcs_cprc_slot = slot;
4281 /* We don't have to apply any of the rules from part B of the
4282 preparation phase, these are handled elsewhere in the
4287 /* A Co-processor register candidate goes either in its own
4288 class of registers or on the stack. */
4289 if (!pcum->aapcs_cprc_failed[slot])
4291 /* C1.cp - Try to allocate the argument to co-processor
4293 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4296 /* C2.cp - Put the argument on the stack and note that we
4297 can't assign any more candidates in this slot. We also
4298 need to note that we have allocated stack space, so that
4299 we won't later try to split a non-cprc candidate between
4300 core registers and the stack. */
4301 pcum->aapcs_cprc_failed[slot] = true;
4302 pcum->can_split = false;
4305 /* We didn't get a register, so this argument goes on the
4307 gcc_assert (pcum->can_split == false);
4312 /* C3 - For double-word aligned arguments, round the NCRN up to the
4313 next even number. */
4314 ncrn = pcum->aapcs_ncrn;
4315 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4318 nregs = ARM_NUM_REGS2(mode, type);
4320 /* Sigh, this test should really assert that nregs > 0, but a GCC
4321 extension allows empty structs and then gives them empty size; it
4322 then allows such a structure to be passed by value. For some of
4323 the code below we have to pretend that such an argument has
4324 non-zero size so that we 'locate' it correctly either in
4325 registers or on the stack. */
4326 gcc_assert (nregs >= 0);
4328 nregs2 = nregs ? nregs : 1;
4330 /* C4 - Argument fits entirely in core registers. */
4331 if (ncrn + nregs2 <= NUM_ARG_REGS)
4333 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4334 pcum->aapcs_next_ncrn = ncrn + nregs;
4338 /* C5 - Some core registers left and there are no arguments already
4339 on the stack: split this argument between the remaining core
4340 registers and the stack. */
4341 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4343 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4344 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4345 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4349 /* C6 - NCRN is set to 4. */
4350 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4352 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4356 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4357 for a call to a function whose data type is FNTYPE.
4358 For a library call, FNTYPE is NULL. */
4360 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4362 tree fndecl ATTRIBUTE_UNUSED)
4364 /* Long call handling. */
4366 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4368 pcum->pcs_variant = arm_pcs_default;
4370 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4372 if (arm_libcall_uses_aapcs_base (libname))
4373 pcum->pcs_variant = ARM_PCS_AAPCS;
4375 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4376 pcum->aapcs_reg = NULL_RTX;
4377 pcum->aapcs_partial = 0;
4378 pcum->aapcs_arg_processed = false;
4379 pcum->aapcs_cprc_slot = -1;
4380 pcum->can_split = true;
4382 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4386 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4388 pcum->aapcs_cprc_failed[i] = false;
4389 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4397 /* On the ARM, the offset starts at 0. */
4399 pcum->iwmmxt_nregs = 0;
4400 pcum->can_split = true;
4402 /* Varargs vectors are treated the same as long long.
4403 named_count avoids having to change the way arm handles 'named' */
4404 pcum->named_count = 0;
4407 if (TARGET_REALLY_IWMMXT && fntype)
4411 for (fn_arg = TYPE_ARG_TYPES (fntype);
4413 fn_arg = TREE_CHAIN (fn_arg))
4414 pcum->named_count += 1;
4416 if (! pcum->named_count)
4417 pcum->named_count = INT_MAX;
4422 /* Return true if mode/type need doubleword alignment. */
4424 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4426 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4427 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4431 /* Determine where to put an argument to a function.
4432 Value is zero to push the argument on the stack,
4433 or a hard register in which to store the argument.
4435 MODE is the argument's machine mode.
4436 TYPE is the data type of the argument (as a tree).
4437 This is null for libcalls where that information may
4439 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4440 the preceding args and about the function being called.
4441 NAMED is nonzero if this argument is a named parameter
4442 (otherwise it is an extra parameter matching an ellipsis). */
4445 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4446 tree type, int named)
4450 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4451 a call insn (op3 of a call_value insn). */
4452 if (mode == VOIDmode)
4455 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4457 aapcs_layout_arg (pcum, mode, type, named);
4458 return pcum->aapcs_reg;
4461 /* Varargs vectors are treated the same as long long.
4462 named_count avoids having to change the way arm handles 'named' */
4463 if (TARGET_IWMMXT_ABI
4464 && arm_vector_mode_supported_p (mode)
4465 && pcum->named_count > pcum->nargs + 1)
4467 if (pcum->iwmmxt_nregs <= 9)
4468 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4471 pcum->can_split = false;
4476 /* Put doubleword aligned quantities in even register pairs. */
4478 && ARM_DOUBLEWORD_ALIGN
4479 && arm_needs_doubleword_align (mode, type))
4482 if (mode == VOIDmode)
4483 /* Pick an arbitrary value for operand 2 of the call insn. */
4486 /* Only allow splitting an arg between regs and memory if all preceding
4487 args were allocated to regs. For args passed by reference we only count
4488 the reference pointer. */
4489 if (pcum->can_split)
4492 nregs = ARM_NUM_REGS2 (mode, type);
4494 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4497 return gen_rtx_REG (mode, pcum->nregs);
4501 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4502 tree type, bool named)
4504 int nregs = pcum->nregs;
4506 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4508 aapcs_layout_arg (pcum, mode, type, named);
4509 return pcum->aapcs_partial;
4512 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4515 if (NUM_ARG_REGS > nregs
4516 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4518 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4524 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4525 tree type, bool named)
4527 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4529 aapcs_layout_arg (pcum, mode, type, named);
4531 if (pcum->aapcs_cprc_slot >= 0)
4533 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4535 pcum->aapcs_cprc_slot = -1;
4538 /* Generic stuff. */
4539 pcum->aapcs_arg_processed = false;
4540 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4541 pcum->aapcs_reg = NULL_RTX;
4542 pcum->aapcs_partial = 0;
4547 if (arm_vector_mode_supported_p (mode)
4548 && pcum->named_count > pcum->nargs
4549 && TARGET_IWMMXT_ABI)
4550 pcum->iwmmxt_nregs += 1;
4552 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4556 /* Variable sized types are passed by reference. This is a GCC
4557 extension to the ARM ABI. */
4560 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4561 enum machine_mode mode ATTRIBUTE_UNUSED,
4562 const_tree type, bool named ATTRIBUTE_UNUSED)
4564 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4567 /* Encode the current state of the #pragma [no_]long_calls. */
4570 OFF, /* No #pragma [no_]long_calls is in effect. */
4571 LONG, /* #pragma long_calls is in effect. */
4572 SHORT /* #pragma no_long_calls is in effect. */
4575 static arm_pragma_enum arm_pragma_long_calls = OFF;
4578 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4580 arm_pragma_long_calls = LONG;
4584 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4586 arm_pragma_long_calls = SHORT;
4590 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4592 arm_pragma_long_calls = OFF;
4595 /* Handle an attribute requiring a FUNCTION_DECL;
4596 arguments as in struct attribute_spec.handler. */
4598 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4599 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4601 if (TREE_CODE (*node) != FUNCTION_DECL)
4603 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4605 *no_add_attrs = true;
4611 /* Handle an "interrupt" or "isr" attribute;
4612 arguments as in struct attribute_spec.handler. */
4614 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4619 if (TREE_CODE (*node) != FUNCTION_DECL)
4621 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4623 *no_add_attrs = true;
4625 /* FIXME: the argument if any is checked for type attributes;
4626 should it be checked for decl ones? */
4630 if (TREE_CODE (*node) == FUNCTION_TYPE
4631 || TREE_CODE (*node) == METHOD_TYPE)
4633 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4635 warning (OPT_Wattributes, "%qE attribute ignored",
4637 *no_add_attrs = true;
4640 else if (TREE_CODE (*node) == POINTER_TYPE
4641 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4642 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4643 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4645 *node = build_variant_type_copy (*node);
4646 TREE_TYPE (*node) = build_type_attribute_variant
4648 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4649 *no_add_attrs = true;
4653 /* Possibly pass this attribute on from the type to a decl. */
4654 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4655 | (int) ATTR_FLAG_FUNCTION_NEXT
4656 | (int) ATTR_FLAG_ARRAY_NEXT))
4658 *no_add_attrs = true;
4659 return tree_cons (name, args, NULL_TREE);
4663 warning (OPT_Wattributes, "%qE attribute ignored",
4672 /* Handle a "pcs" attribute; arguments as in struct
4673 attribute_spec.handler. */
4675 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4678 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4680 warning (OPT_Wattributes, "%qE attribute ignored", name);
4681 *no_add_attrs = true;
4686 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4687 /* Handle the "notshared" attribute. This attribute is another way of
4688 requesting hidden visibility. ARM's compiler supports
4689 "__declspec(notshared)"; we support the same thing via an
4693 arm_handle_notshared_attribute (tree *node,
4694 tree name ATTRIBUTE_UNUSED,
4695 tree args ATTRIBUTE_UNUSED,
4696 int flags ATTRIBUTE_UNUSED,
4699 tree decl = TYPE_NAME (*node);
4703 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4704 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4705 *no_add_attrs = false;
4711 /* Return 0 if the attributes for two types are incompatible, 1 if they
4712 are compatible, and 2 if they are nearly compatible (which causes a
4713 warning to be generated). */
4715 arm_comp_type_attributes (const_tree type1, const_tree type2)
4719 /* Check for mismatch of non-default calling convention. */
4720 if (TREE_CODE (type1) != FUNCTION_TYPE)
4723 /* Check for mismatched call attributes. */
4724 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4725 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4726 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4727 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4729 /* Only bother to check if an attribute is defined. */
4730 if (l1 | l2 | s1 | s2)
4732 /* If one type has an attribute, the other must have the same attribute. */
4733 if ((l1 != l2) || (s1 != s2))
4736 /* Disallow mixed attributes. */
4737 if ((l1 & s2) || (l2 & s1))
4741 /* Check for mismatched ISR attribute. */
4742 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4744 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4745 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4747 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4754 /* Assigns default attributes to newly defined type. This is used to
4755 set short_call/long_call attributes for function types of
4756 functions defined inside corresponding #pragma scopes. */
4758 arm_set_default_type_attributes (tree type)
4760 /* Add __attribute__ ((long_call)) to all functions, when
4761 inside #pragma long_calls or __attribute__ ((short_call)),
4762 when inside #pragma no_long_calls. */
4763 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4765 tree type_attr_list, attr_name;
4766 type_attr_list = TYPE_ATTRIBUTES (type);
4768 if (arm_pragma_long_calls == LONG)
4769 attr_name = get_identifier ("long_call");
4770 else if (arm_pragma_long_calls == SHORT)
4771 attr_name = get_identifier ("short_call");
4775 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4776 TYPE_ATTRIBUTES (type) = type_attr_list;
4780 /* Return true if DECL is known to be linked into section SECTION. */
4783 arm_function_in_section_p (tree decl, section *section)
4785 /* We can only be certain about functions defined in the same
4786 compilation unit. */
4787 if (!TREE_STATIC (decl))
4790 /* Make sure that SYMBOL always binds to the definition in this
4791 compilation unit. */
4792 if (!targetm.binds_local_p (decl))
4795 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4796 if (!DECL_SECTION_NAME (decl))
4798 /* Make sure that we will not create a unique section for DECL. */
4799 if (flag_function_sections || DECL_ONE_ONLY (decl))
4803 return function_section (decl) == section;
4806 /* Return nonzero if a 32-bit "long_call" should be generated for
4807 a call from the current function to DECL. We generate a long_call
4810 a. has an __attribute__((long call))
4811 or b. is within the scope of a #pragma long_calls
4812 or c. the -mlong-calls command line switch has been specified
4814 However we do not generate a long call if the function:
4816 d. has an __attribute__ ((short_call))
4817 or e. is inside the scope of a #pragma no_long_calls
4818 or f. is defined in the same section as the current function. */
4821 arm_is_long_call_p (tree decl)
4826 return TARGET_LONG_CALLS;
4828 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4829 if (lookup_attribute ("short_call", attrs))
4832 /* For "f", be conservative, and only cater for cases in which the
4833 whole of the current function is placed in the same section. */
4834 if (!flag_reorder_blocks_and_partition
4835 && TREE_CODE (decl) == FUNCTION_DECL
4836 && arm_function_in_section_p (decl, current_function_section ()))
4839 if (lookup_attribute ("long_call", attrs))
4842 return TARGET_LONG_CALLS;
4845 /* Return nonzero if it is ok to make a tail-call to DECL. */
4847 arm_function_ok_for_sibcall (tree decl, tree exp)
4849 unsigned long func_type;
4851 if (cfun->machine->sibcall_blocked)
4854 /* Never tailcall something for which we have no decl, or if we
4855 are generating code for Thumb-1. */
4856 if (decl == NULL || TARGET_THUMB1)
4859 /* The PIC register is live on entry to VxWorks PLT entries, so we
4860 must make the call before restoring the PIC register. */
4861 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4864 /* Cannot tail-call to long calls, since these are out of range of
4865 a branch instruction. */
4866 if (arm_is_long_call_p (decl))
4869 /* If we are interworking and the function is not declared static
4870 then we can't tail-call it unless we know that it exists in this
4871 compilation unit (since it might be a Thumb routine). */
4872 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4875 func_type = arm_current_func_type ();
4876 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4877 if (IS_INTERRUPT (func_type))
4880 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4882 /* Check that the return value locations are the same. For
4883 example that we aren't returning a value from the sibling in
4884 a VFP register but then need to transfer it to a core
4888 a = arm_function_value (TREE_TYPE (exp), decl, false);
4889 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4891 if (!rtx_equal_p (a, b))
4895 /* Never tailcall if function may be called with a misaligned SP. */
4896 if (IS_STACKALIGN (func_type))
4899 /* Everything else is ok. */
4904 /* Addressing mode support functions. */
4906 /* Return nonzero if X is a legitimate immediate operand when compiling
4907 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4909 legitimate_pic_operand_p (rtx x)
4911 if (GET_CODE (x) == SYMBOL_REF
4912 || (GET_CODE (x) == CONST
4913 && GET_CODE (XEXP (x, 0)) == PLUS
4914 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4920 /* Record that the current function needs a PIC register. Initialize
4921 cfun->machine->pic_reg if we have not already done so. */
4924 require_pic_register (void)
4926 /* A lot of the logic here is made obscure by the fact that this
4927 routine gets called as part of the rtx cost estimation process.
4928 We don't want those calls to affect any assumptions about the real
4929 function; and further, we can't call entry_of_function() until we
4930 start the real expansion process. */
4931 if (!crtl->uses_pic_offset_table)
4933 gcc_assert (can_create_pseudo_p ());
4934 if (arm_pic_register != INVALID_REGNUM)
4936 if (!cfun->machine->pic_reg)
4937 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4939 /* Play games to avoid marking the function as needing pic
4940 if we are being called as part of the cost-estimation
4942 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4943 crtl->uses_pic_offset_table = 1;
4949 if (!cfun->machine->pic_reg)
4950 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4952 /* Play games to avoid marking the function as needing pic
4953 if we are being called as part of the cost-estimation
4955 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4957 crtl->uses_pic_offset_table = 1;
4960 arm_load_pic_register (0UL);
4964 /* We can be called during expansion of PHI nodes, where
4965 we can't yet emit instructions directly in the final
4966 insn stream. Queue the insns on the entry edge, they will
4967 be committed after everything else is expanded. */
4968 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4975 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4977 if (GET_CODE (orig) == SYMBOL_REF
4978 || GET_CODE (orig) == LABEL_REF)
4980 rtx pic_ref, address;
4985 gcc_assert (can_create_pseudo_p ());
4986 reg = gen_reg_rtx (Pmode);
4987 address = gen_reg_rtx (Pmode);
4992 /* VxWorks does not impose a fixed gap between segments; the run-time
4993 gap can be different from the object-file gap. We therefore can't
4994 use GOTOFF unless we are absolutely sure that the symbol is in the
4995 same segment as the GOT. Unfortunately, the flexibility of linker
4996 scripts means that we can't be sure of that in general, so assume
4997 that GOTOFF is never valid on VxWorks. */
4998 if ((GET_CODE (orig) == LABEL_REF
4999 || (GET_CODE (orig) == SYMBOL_REF &&
5000 SYMBOL_REF_LOCAL_P (orig)))
5002 && !TARGET_VXWORKS_RTP)
5003 insn = arm_pic_static_addr (orig, reg);
5006 /* If this function doesn't have a pic register, create one now. */
5007 require_pic_register ();
5010 emit_insn (gen_pic_load_addr_32bit (address, orig));
5011 else /* TARGET_THUMB1 */
5012 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
5014 pic_ref = gen_const_mem (Pmode,
5015 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
5017 insn = emit_move_insn (reg, pic_ref);
5020 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5022 set_unique_reg_note (insn, REG_EQUAL, orig);
5026 else if (GET_CODE (orig) == CONST)
5030 if (GET_CODE (XEXP (orig, 0)) == PLUS
5031 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5034 /* Handle the case where we have: const (UNSPEC_TLS). */
5035 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5036 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5039 /* Handle the case where we have:
5040 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5042 if (GET_CODE (XEXP (orig, 0)) == PLUS
5043 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5044 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5046 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5052 gcc_assert (can_create_pseudo_p ());
5053 reg = gen_reg_rtx (Pmode);
5056 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5058 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5059 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5060 base == reg ? 0 : reg);
5062 if (GET_CODE (offset) == CONST_INT)
5064 /* The base register doesn't really matter, we only want to
5065 test the index for the appropriate mode. */
5066 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5068 gcc_assert (can_create_pseudo_p ());
5069 offset = force_reg (Pmode, offset);
5072 if (GET_CODE (offset) == CONST_INT)
5073 return plus_constant (base, INTVAL (offset));
5076 if (GET_MODE_SIZE (mode) > 4
5077 && (GET_MODE_CLASS (mode) == MODE_INT
5078 || TARGET_SOFT_FLOAT))
5080 emit_insn (gen_addsi3 (reg, base, offset));
5084 return gen_rtx_PLUS (Pmode, base, offset);
5091 /* Find a spare register to use during the prolog of a function. */
5094 thumb_find_work_register (unsigned long pushed_regs_mask)
5098 /* Check the argument registers first as these are call-used. The
5099 register allocation order means that sometimes r3 might be used
5100 but earlier argument registers might not, so check them all. */
5101 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5102 if (!df_regs_ever_live_p (reg))
5105 /* Before going on to check the call-saved registers we can try a couple
5106 more ways of deducing that r3 is available. The first is when we are
5107 pushing anonymous arguments onto the stack and we have less than 4
5108 registers worth of fixed arguments(*). In this case r3 will be part of
5109 the variable argument list and so we can be sure that it will be
5110 pushed right at the start of the function. Hence it will be available
5111 for the rest of the prologue.
5112 (*): ie crtl->args.pretend_args_size is greater than 0. */
5113 if (cfun->machine->uses_anonymous_args
5114 && crtl->args.pretend_args_size > 0)
5115 return LAST_ARG_REGNUM;
5117 /* The other case is when we have fixed arguments but less than 4 registers
5118 worth. In this case r3 might be used in the body of the function, but
5119 it is not being used to convey an argument into the function. In theory
5120 we could just check crtl->args.size to see how many bytes are
5121 being passed in argument registers, but it seems that it is unreliable.
5122 Sometimes it will have the value 0 when in fact arguments are being
5123 passed. (See testcase execute/20021111-1.c for an example). So we also
5124 check the args_info.nregs field as well. The problem with this field is
5125 that it makes no allowances for arguments that are passed to the
5126 function but which are not used. Hence we could miss an opportunity
5127 when a function has an unused argument in r3. But it is better to be
5128 safe than to be sorry. */
5129 if (! cfun->machine->uses_anonymous_args
5130 && crtl->args.size >= 0
5131 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5132 && crtl->args.info.nregs < 4)
5133 return LAST_ARG_REGNUM;
5135 /* Otherwise look for a call-saved register that is going to be pushed. */
5136 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5137 if (pushed_regs_mask & (1 << reg))
5142 /* Thumb-2 can use high regs. */
5143 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5144 if (pushed_regs_mask & (1 << reg))
5147 /* Something went wrong - thumb_compute_save_reg_mask()
5148 should have arranged for a suitable register to be pushed. */
5152 static GTY(()) int pic_labelno;
5154 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5158 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5160 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5162 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5165 gcc_assert (flag_pic);
5167 pic_reg = cfun->machine->pic_reg;
5168 if (TARGET_VXWORKS_RTP)
5170 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5171 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5172 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5174 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5176 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5177 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5181 /* We use an UNSPEC rather than a LABEL_REF because this label
5182 never appears in the code stream. */
5184 labelno = GEN_INT (pic_labelno++);
5185 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5186 l1 = gen_rtx_CONST (VOIDmode, l1);
5188 /* On the ARM the PC register contains 'dot + 8' at the time of the
5189 addition, on the Thumb it is 'dot + 4'. */
5190 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5191 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5193 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5197 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5199 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5201 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5203 else /* TARGET_THUMB1 */
5205 if (arm_pic_register != INVALID_REGNUM
5206 && REGNO (pic_reg) > LAST_LO_REGNUM)
5208 /* We will have pushed the pic register, so we should always be
5209 able to find a work register. */
5210 pic_tmp = gen_rtx_REG (SImode,
5211 thumb_find_work_register (saved_regs));
5212 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5213 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5216 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5217 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5221 /* Need to emit this whether or not we obey regdecls,
5222 since setjmp/longjmp can cause life info to screw up. */
5226 /* Generate code to load the address of a static var when flag_pic is set. */
5228 arm_pic_static_addr (rtx orig, rtx reg)
5230 rtx l1, labelno, offset_rtx, insn;
5232 gcc_assert (flag_pic);
5234 /* We use an UNSPEC rather than a LABEL_REF because this label
5235 never appears in the code stream. */
5236 labelno = GEN_INT (pic_labelno++);
5237 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5238 l1 = gen_rtx_CONST (VOIDmode, l1);
5240 /* On the ARM the PC register contains 'dot + 8' at the time of the
5241 addition, on the Thumb it is 'dot + 4'. */
5242 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5243 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5244 UNSPEC_SYMBOL_OFFSET);
5245 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5249 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5251 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5253 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5255 else /* TARGET_THUMB1 */
5257 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5258 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5264 /* Return nonzero if X is valid as an ARM state addressing register. */
5266 arm_address_register_rtx_p (rtx x, int strict_p)
5270 if (GET_CODE (x) != REG)
5276 return ARM_REGNO_OK_FOR_BASE_P (regno);
5278 return (regno <= LAST_ARM_REGNUM
5279 || regno >= FIRST_PSEUDO_REGISTER
5280 || regno == FRAME_POINTER_REGNUM
5281 || regno == ARG_POINTER_REGNUM);
5284 /* Return TRUE if this rtx is the difference of a symbol and a label,
5285 and will reduce to a PC-relative relocation in the object file.
5286 Expressions like this can be left alone when generating PIC, rather
5287 than forced through the GOT. */
5289 pcrel_constant_p (rtx x)
5291 if (GET_CODE (x) == MINUS)
5292 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5297 /* Return nonzero if X is a valid ARM state address operand. */
5299 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5303 enum rtx_code code = GET_CODE (x);
5305 if (arm_address_register_rtx_p (x, strict_p))
5308 use_ldrd = (TARGET_LDRD
5310 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5312 if (code == POST_INC || code == PRE_DEC
5313 || ((code == PRE_INC || code == POST_DEC)
5314 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5315 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5317 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5318 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5319 && GET_CODE (XEXP (x, 1)) == PLUS
5320 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5322 rtx addend = XEXP (XEXP (x, 1), 1);
5324 /* Don't allow ldrd post increment by register because it's hard
5325 to fixup invalid register choices. */
5327 && GET_CODE (x) == POST_MODIFY
5328 && GET_CODE (addend) == REG)
5331 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5332 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5335 /* After reload constants split into minipools will have addresses
5336 from a LABEL_REF. */
5337 else if (reload_completed
5338 && (code == LABEL_REF
5340 && GET_CODE (XEXP (x, 0)) == PLUS
5341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5345 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5348 else if (code == PLUS)
5350 rtx xop0 = XEXP (x, 0);
5351 rtx xop1 = XEXP (x, 1);
5353 return ((arm_address_register_rtx_p (xop0, strict_p)
5354 && GET_CODE(xop1) == CONST_INT
5355 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5356 || (arm_address_register_rtx_p (xop1, strict_p)
5357 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5361 /* Reload currently can't handle MINUS, so disable this for now */
5362 else if (GET_CODE (x) == MINUS)
5364 rtx xop0 = XEXP (x, 0);
5365 rtx xop1 = XEXP (x, 1);
5367 return (arm_address_register_rtx_p (xop0, strict_p)
5368 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5372 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5373 && code == SYMBOL_REF
5374 && CONSTANT_POOL_ADDRESS_P (x)
5376 && symbol_mentioned_p (get_pool_constant (x))
5377 && ! pcrel_constant_p (get_pool_constant (x))))
5383 /* Return nonzero if X is a valid Thumb-2 address operand. */
5385 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5388 enum rtx_code code = GET_CODE (x);
5390 if (arm_address_register_rtx_p (x, strict_p))
5393 use_ldrd = (TARGET_LDRD
5395 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5397 if (code == POST_INC || code == PRE_DEC
5398 || ((code == PRE_INC || code == POST_DEC)
5399 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5400 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5402 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5403 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5404 && GET_CODE (XEXP (x, 1)) == PLUS
5405 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5407 /* Thumb-2 only has autoincrement by constant. */
5408 rtx addend = XEXP (XEXP (x, 1), 1);
5409 HOST_WIDE_INT offset;
5411 if (GET_CODE (addend) != CONST_INT)
5414 offset = INTVAL(addend);
5415 if (GET_MODE_SIZE (mode) <= 4)
5416 return (offset > -256 && offset < 256);
5418 return (use_ldrd && offset > -1024 && offset < 1024
5419 && (offset & 3) == 0);
5422 /* After reload constants split into minipools will have addresses
5423 from a LABEL_REF. */
5424 else if (reload_completed
5425 && (code == LABEL_REF
5427 && GET_CODE (XEXP (x, 0)) == PLUS
5428 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5429 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5432 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5435 else if (code == PLUS)
5437 rtx xop0 = XEXP (x, 0);
5438 rtx xop1 = XEXP (x, 1);
5440 return ((arm_address_register_rtx_p (xop0, strict_p)
5441 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5442 || (arm_address_register_rtx_p (xop1, strict_p)
5443 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5446 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5447 && code == SYMBOL_REF
5448 && CONSTANT_POOL_ADDRESS_P (x)
5450 && symbol_mentioned_p (get_pool_constant (x))
5451 && ! pcrel_constant_p (get_pool_constant (x))))
5457 /* Return nonzero if INDEX is valid for an address index operand in
5460 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5463 HOST_WIDE_INT range;
5464 enum rtx_code code = GET_CODE (index);
5466 /* Standard coprocessor addressing modes. */
5467 if (TARGET_HARD_FLOAT
5468 && (TARGET_FPA || TARGET_MAVERICK)
5469 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5470 || (TARGET_MAVERICK && mode == DImode)))
5471 return (code == CONST_INT && INTVAL (index) < 1024
5472 && INTVAL (index) > -1024
5473 && (INTVAL (index) & 3) == 0);
5476 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5477 return (code == CONST_INT
5478 && INTVAL (index) < 1016
5479 && INTVAL (index) > -1024
5480 && (INTVAL (index) & 3) == 0);
5482 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5483 return (code == CONST_INT
5484 && INTVAL (index) < 1024
5485 && INTVAL (index) > -1024
5486 && (INTVAL (index) & 3) == 0);
5488 if (arm_address_register_rtx_p (index, strict_p)
5489 && (GET_MODE_SIZE (mode) <= 4))
5492 if (mode == DImode || mode == DFmode)
5494 if (code == CONST_INT)
5496 HOST_WIDE_INT val = INTVAL (index);
5499 return val > -256 && val < 256;
5501 return val > -4096 && val < 4092;
5504 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5507 if (GET_MODE_SIZE (mode) <= 4
5511 || (mode == QImode && outer == SIGN_EXTEND))))
5515 rtx xiop0 = XEXP (index, 0);
5516 rtx xiop1 = XEXP (index, 1);
5518 return ((arm_address_register_rtx_p (xiop0, strict_p)
5519 && power_of_two_operand (xiop1, SImode))
5520 || (arm_address_register_rtx_p (xiop1, strict_p)
5521 && power_of_two_operand (xiop0, SImode)));
5523 else if (code == LSHIFTRT || code == ASHIFTRT
5524 || code == ASHIFT || code == ROTATERT)
5526 rtx op = XEXP (index, 1);
5528 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5529 && GET_CODE (op) == CONST_INT
5531 && INTVAL (op) <= 31);
5535 /* For ARM v4 we may be doing a sign-extend operation during the
5541 || (outer == SIGN_EXTEND && mode == QImode))
5547 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5549 return (code == CONST_INT
5550 && INTVAL (index) < range
5551 && INTVAL (index) > -range);
5554 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5555 index operand. i.e. 1, 2, 4 or 8. */
5557 thumb2_index_mul_operand (rtx op)
5561 if (GET_CODE(op) != CONST_INT)
5565 return (val == 1 || val == 2 || val == 4 || val == 8);
5568 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5570 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5572 enum rtx_code code = GET_CODE (index);
5574 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5575 /* Standard coprocessor addressing modes. */
5576 if (TARGET_HARD_FLOAT
5577 && (TARGET_FPA || TARGET_MAVERICK)
5578 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5579 || (TARGET_MAVERICK && mode == DImode)))
5580 return (code == CONST_INT && INTVAL (index) < 1024
5581 && INTVAL (index) > -1024
5582 && (INTVAL (index) & 3) == 0);
5584 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5586 /* For DImode assume values will usually live in core regs
5587 and only allow LDRD addressing modes. */
5588 if (!TARGET_LDRD || mode != DImode)
5589 return (code == CONST_INT
5590 && INTVAL (index) < 1024
5591 && INTVAL (index) > -1024
5592 && (INTVAL (index) & 3) == 0);
5596 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5597 return (code == CONST_INT
5598 && INTVAL (index) < 1016
5599 && INTVAL (index) > -1024
5600 && (INTVAL (index) & 3) == 0);
5602 if (arm_address_register_rtx_p (index, strict_p)
5603 && (GET_MODE_SIZE (mode) <= 4))
5606 if (mode == DImode || mode == DFmode)
5608 if (code == CONST_INT)
5610 HOST_WIDE_INT val = INTVAL (index);
5611 /* ??? Can we assume ldrd for thumb2? */
5612 /* Thumb-2 ldrd only has reg+const addressing modes. */
5613 /* ldrd supports offsets of +-1020.
5614 However the ldr fallback does not. */
5615 return val > -256 && val < 256 && (val & 3) == 0;
5623 rtx xiop0 = XEXP (index, 0);
5624 rtx xiop1 = XEXP (index, 1);
5626 return ((arm_address_register_rtx_p (xiop0, strict_p)
5627 && thumb2_index_mul_operand (xiop1))
5628 || (arm_address_register_rtx_p (xiop1, strict_p)
5629 && thumb2_index_mul_operand (xiop0)));
5631 else if (code == ASHIFT)
5633 rtx op = XEXP (index, 1);
5635 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5636 && GET_CODE (op) == CONST_INT
5638 && INTVAL (op) <= 3);
5641 return (code == CONST_INT
5642 && INTVAL (index) < 4096
5643 && INTVAL (index) > -256);
5646 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5648 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5652 if (GET_CODE (x) != REG)
5658 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5660 return (regno <= LAST_LO_REGNUM
5661 || regno > LAST_VIRTUAL_REGISTER
5662 || regno == FRAME_POINTER_REGNUM
5663 || (GET_MODE_SIZE (mode) >= 4
5664 && (regno == STACK_POINTER_REGNUM
5665 || regno >= FIRST_PSEUDO_REGISTER
5666 || x == hard_frame_pointer_rtx
5667 || x == arg_pointer_rtx)));
5670 /* Return nonzero if x is a legitimate index register. This is the case
5671 for any base register that can access a QImode object. */
5673 thumb1_index_register_rtx_p (rtx x, int strict_p)
5675 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5678 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5680 The AP may be eliminated to either the SP or the FP, so we use the
5681 least common denominator, e.g. SImode, and offsets from 0 to 64.
5683 ??? Verify whether the above is the right approach.
5685 ??? Also, the FP may be eliminated to the SP, so perhaps that
5686 needs special handling also.
5688 ??? Look at how the mips16 port solves this problem. It probably uses
5689 better ways to solve some of these problems.
5691 Although it is not incorrect, we don't accept QImode and HImode
5692 addresses based on the frame pointer or arg pointer until the
5693 reload pass starts. This is so that eliminating such addresses
5694 into stack based ones won't produce impossible code. */
5696 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5698 /* ??? Not clear if this is right. Experiment. */
5699 if (GET_MODE_SIZE (mode) < 4
5700 && !(reload_in_progress || reload_completed)
5701 && (reg_mentioned_p (frame_pointer_rtx, x)
5702 || reg_mentioned_p (arg_pointer_rtx, x)
5703 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5704 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5705 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5706 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5709 /* Accept any base register. SP only in SImode or larger. */
5710 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5713 /* This is PC relative data before arm_reorg runs. */
5714 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5715 && GET_CODE (x) == SYMBOL_REF
5716 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5719 /* This is PC relative data after arm_reorg runs. */
5720 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5722 && (GET_CODE (x) == LABEL_REF
5723 || (GET_CODE (x) == CONST
5724 && GET_CODE (XEXP (x, 0)) == PLUS
5725 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5726 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5729 /* Post-inc indexing only supported for SImode and larger. */
5730 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5731 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5734 else if (GET_CODE (x) == PLUS)
5736 /* REG+REG address can be any two index registers. */
5737 /* We disallow FRAME+REG addressing since we know that FRAME
5738 will be replaced with STACK, and SP relative addressing only
5739 permits SP+OFFSET. */
5740 if (GET_MODE_SIZE (mode) <= 4
5741 && XEXP (x, 0) != frame_pointer_rtx
5742 && XEXP (x, 1) != frame_pointer_rtx
5743 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5744 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5747 /* REG+const has 5-7 bit offset for non-SP registers. */
5748 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5749 || XEXP (x, 0) == arg_pointer_rtx)
5750 && GET_CODE (XEXP (x, 1)) == CONST_INT
5751 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5754 /* REG+const has 10-bit offset for SP, but only SImode and
5755 larger is supported. */
5756 /* ??? Should probably check for DI/DFmode overflow here
5757 just like GO_IF_LEGITIMATE_OFFSET does. */
5758 else if (GET_CODE (XEXP (x, 0)) == REG
5759 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5760 && GET_MODE_SIZE (mode) >= 4
5761 && GET_CODE (XEXP (x, 1)) == CONST_INT
5762 && INTVAL (XEXP (x, 1)) >= 0
5763 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5764 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5767 else if (GET_CODE (XEXP (x, 0)) == REG
5768 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5769 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5770 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5771 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5772 && GET_MODE_SIZE (mode) >= 4
5773 && GET_CODE (XEXP (x, 1)) == CONST_INT
5774 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5778 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5779 && GET_MODE_SIZE (mode) == 4
5780 && GET_CODE (x) == SYMBOL_REF
5781 && CONSTANT_POOL_ADDRESS_P (x)
5783 && symbol_mentioned_p (get_pool_constant (x))
5784 && ! pcrel_constant_p (get_pool_constant (x))))
5790 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5791 instruction of mode MODE. */
5793 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5795 switch (GET_MODE_SIZE (mode))
5798 return val >= 0 && val < 32;
5801 return val >= 0 && val < 64 && (val & 1) == 0;
5805 && (val + GET_MODE_SIZE (mode)) <= 128
5811 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5814 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5815 else if (TARGET_THUMB2)
5816 return thumb2_legitimate_address_p (mode, x, strict_p);
5817 else /* if (TARGET_THUMB1) */
5818 return thumb1_legitimate_address_p (mode, x, strict_p);
5821 /* Build the SYMBOL_REF for __tls_get_addr. */
5823 static GTY(()) rtx tls_get_addr_libfunc;
5826 get_tls_get_addr (void)
5828 if (!tls_get_addr_libfunc)
5829 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5830 return tls_get_addr_libfunc;
5834 arm_load_tp (rtx target)
5837 target = gen_reg_rtx (SImode);
5841 /* Can return in any reg. */
5842 emit_insn (gen_load_tp_hard (target));
5846 /* Always returned in r0. Immediately copy the result into a pseudo,
5847 otherwise other uses of r0 (e.g. setting up function arguments) may
5848 clobber the value. */
5852 emit_insn (gen_load_tp_soft ());
5854 tmp = gen_rtx_REG (SImode, 0);
5855 emit_move_insn (target, tmp);
5861 load_tls_operand (rtx x, rtx reg)
5865 if (reg == NULL_RTX)
5866 reg = gen_reg_rtx (SImode);
5868 tmp = gen_rtx_CONST (SImode, x);
5870 emit_move_insn (reg, tmp);
5876 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5878 rtx insns, label, labelno, sum;
5882 labelno = GEN_INT (pic_labelno++);
5883 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5884 label = gen_rtx_CONST (VOIDmode, label);
5886 sum = gen_rtx_UNSPEC (Pmode,
5887 gen_rtvec (4, x, GEN_INT (reloc), label,
5888 GEN_INT (TARGET_ARM ? 8 : 4)),
5890 reg = load_tls_operand (sum, reg);
5893 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5894 else if (TARGET_THUMB2)
5895 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5896 else /* TARGET_THUMB1 */
5897 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5899 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5900 Pmode, 1, reg, Pmode);
5902 insns = get_insns ();
5909 legitimize_tls_address (rtx x, rtx reg)
5911 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5912 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5916 case TLS_MODEL_GLOBAL_DYNAMIC:
5917 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5918 dest = gen_reg_rtx (Pmode);
5919 emit_libcall_block (insns, dest, ret, x);
5922 case TLS_MODEL_LOCAL_DYNAMIC:
5923 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5925 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5926 share the LDM result with other LD model accesses. */
5927 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5929 dest = gen_reg_rtx (Pmode);
5930 emit_libcall_block (insns, dest, ret, eqv);
5932 /* Load the addend. */
5933 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5935 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5936 return gen_rtx_PLUS (Pmode, dest, addend);
5938 case TLS_MODEL_INITIAL_EXEC:
5939 labelno = GEN_INT (pic_labelno++);
5940 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5941 label = gen_rtx_CONST (VOIDmode, label);
5942 sum = gen_rtx_UNSPEC (Pmode,
5943 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5944 GEN_INT (TARGET_ARM ? 8 : 4)),
5946 reg = load_tls_operand (sum, reg);
5949 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5950 else if (TARGET_THUMB2)
5951 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5954 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5955 emit_move_insn (reg, gen_const_mem (SImode, reg));
5958 tp = arm_load_tp (NULL_RTX);
5960 return gen_rtx_PLUS (Pmode, tp, reg);
5962 case TLS_MODEL_LOCAL_EXEC:
5963 tp = arm_load_tp (NULL_RTX);
5965 reg = gen_rtx_UNSPEC (Pmode,
5966 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5968 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5970 return gen_rtx_PLUS (Pmode, tp, reg);
5977 /* Try machine-dependent ways of modifying an illegitimate address
5978 to be legitimate. If we find one, return the new, valid address. */
5980 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5984 /* TODO: legitimize_address for Thumb2. */
5987 return thumb_legitimize_address (x, orig_x, mode);
5990 if (arm_tls_symbol_p (x))
5991 return legitimize_tls_address (x, NULL_RTX);
5993 if (GET_CODE (x) == PLUS)
5995 rtx xop0 = XEXP (x, 0);
5996 rtx xop1 = XEXP (x, 1);
5998 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5999 xop0 = force_reg (SImode, xop0);
6001 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6002 xop1 = force_reg (SImode, xop1);
6004 if (ARM_BASE_REGISTER_RTX_P (xop0)
6005 && GET_CODE (xop1) == CONST_INT)
6007 HOST_WIDE_INT n, low_n;
6011 /* VFP addressing modes actually allow greater offsets, but for
6012 now we just stick with the lowest common denominator. */
6014 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6026 low_n = ((mode) == TImode ? 0
6027 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6031 base_reg = gen_reg_rtx (SImode);
6032 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6033 emit_move_insn (base_reg, val);
6034 x = plus_constant (base_reg, low_n);
6036 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6037 x = gen_rtx_PLUS (SImode, xop0, xop1);
6040 /* XXX We don't allow MINUS any more -- see comment in
6041 arm_legitimate_address_outer_p (). */
6042 else if (GET_CODE (x) == MINUS)
6044 rtx xop0 = XEXP (x, 0);
6045 rtx xop1 = XEXP (x, 1);
6047 if (CONSTANT_P (xop0))
6048 xop0 = force_reg (SImode, xop0);
6050 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6051 xop1 = force_reg (SImode, xop1);
6053 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6054 x = gen_rtx_MINUS (SImode, xop0, xop1);
6057 /* Make sure to take full advantage of the pre-indexed addressing mode
6058 with absolute addresses which often allows for the base register to
6059 be factorized for multiple adjacent memory references, and it might
6060 even allows for the mini pool to be avoided entirely. */
6061 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6064 HOST_WIDE_INT mask, base, index;
6067 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6068 use a 8-bit index. So let's use a 12-bit index for SImode only and
6069 hope that arm_gen_constant will enable ldrb to use more bits. */
6070 bits = (mode == SImode) ? 12 : 8;
6071 mask = (1 << bits) - 1;
6072 base = INTVAL (x) & ~mask;
6073 index = INTVAL (x) & mask;
6074 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6076 /* It'll most probably be more efficient to generate the base
6077 with more bits set and use a negative index instead. */
6081 base_reg = force_reg (SImode, GEN_INT (base));
6082 x = plus_constant (base_reg, index);
6087 /* We need to find and carefully transform any SYMBOL and LABEL
6088 references; so go back to the original address expression. */
6089 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6091 if (new_x != orig_x)
6099 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6100 to be legitimate. If we find one, return the new, valid address. */
6102 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6104 if (arm_tls_symbol_p (x))
6105 return legitimize_tls_address (x, NULL_RTX);
6107 if (GET_CODE (x) == PLUS
6108 && GET_CODE (XEXP (x, 1)) == CONST_INT
6109 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6110 || INTVAL (XEXP (x, 1)) < 0))
6112 rtx xop0 = XEXP (x, 0);
6113 rtx xop1 = XEXP (x, 1);
6114 HOST_WIDE_INT offset = INTVAL (xop1);
6116 /* Try and fold the offset into a biasing of the base register and
6117 then offsetting that. Don't do this when optimizing for space
6118 since it can cause too many CSEs. */
6119 if (optimize_size && offset >= 0
6120 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6122 HOST_WIDE_INT delta;
6125 delta = offset - (256 - GET_MODE_SIZE (mode));
6126 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6127 delta = 31 * GET_MODE_SIZE (mode);
6129 delta = offset & (~31 * GET_MODE_SIZE (mode));
6131 xop0 = force_operand (plus_constant (xop0, offset - delta),
6133 x = plus_constant (xop0, delta);
6135 else if (offset < 0 && offset > -256)
6136 /* Small negative offsets are best done with a subtract before the
6137 dereference, forcing these into a register normally takes two
6139 x = force_operand (x, NULL_RTX);
6142 /* For the remaining cases, force the constant into a register. */
6143 xop1 = force_reg (SImode, xop1);
6144 x = gen_rtx_PLUS (SImode, xop0, xop1);
6147 else if (GET_CODE (x) == PLUS
6148 && s_register_operand (XEXP (x, 1), SImode)
6149 && !s_register_operand (XEXP (x, 0), SImode))
6151 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6153 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6158 /* We need to find and carefully transform any SYMBOL and LABEL
6159 references; so go back to the original address expression. */
6160 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6162 if (new_x != orig_x)
6170 thumb_legitimize_reload_address (rtx *x_p,
6171 enum machine_mode mode,
6172 int opnum, int type,
6173 int ind_levels ATTRIBUTE_UNUSED)
6177 if (GET_CODE (x) == PLUS
6178 && GET_MODE_SIZE (mode) < 4
6179 && REG_P (XEXP (x, 0))
6180 && XEXP (x, 0) == stack_pointer_rtx
6181 && GET_CODE (XEXP (x, 1)) == CONST_INT
6182 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6187 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6188 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6192 /* If both registers are hi-regs, then it's better to reload the
6193 entire expression rather than each register individually. That
6194 only requires one reload register rather than two. */
6195 if (GET_CODE (x) == PLUS
6196 && REG_P (XEXP (x, 0))
6197 && REG_P (XEXP (x, 1))
6198 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6199 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6204 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6205 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6212 /* Test for various thread-local symbols. */
6214 /* Return TRUE if X is a thread-local symbol. */
6217 arm_tls_symbol_p (rtx x)
6219 if (! TARGET_HAVE_TLS)
6222 if (GET_CODE (x) != SYMBOL_REF)
6225 return SYMBOL_REF_TLS_MODEL (x) != 0;
6228 /* Helper for arm_tls_referenced_p. */
6231 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6233 if (GET_CODE (*x) == SYMBOL_REF)
6234 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6236 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6237 TLS offsets, not real symbol references. */
6238 if (GET_CODE (*x) == UNSPEC
6239 && XINT (*x, 1) == UNSPEC_TLS)
6245 /* Return TRUE if X contains any TLS symbol references. */
6248 arm_tls_referenced_p (rtx x)
6250 if (! TARGET_HAVE_TLS)
6253 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6256 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6259 arm_cannot_force_const_mem (rtx x)
6263 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6265 split_const (x, &base, &offset);
6266 if (GET_CODE (base) == SYMBOL_REF
6267 && !offset_within_block_p (base, INTVAL (offset)))
6270 return arm_tls_referenced_p (x);
6273 #define REG_OR_SUBREG_REG(X) \
6274 (GET_CODE (X) == REG \
6275 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6277 #define REG_OR_SUBREG_RTX(X) \
6278 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6280 #ifndef COSTS_N_INSNS
6281 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6284 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6286 enum machine_mode mode = GET_MODE (x);
6300 return COSTS_N_INSNS (1);
6303 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6306 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6313 return COSTS_N_INSNS (2) + cycles;
6315 return COSTS_N_INSNS (1) + 16;
6318 return (COSTS_N_INSNS (1)
6319 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6320 + GET_CODE (SET_DEST (x)) == MEM));
6325 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6327 if (thumb_shiftable_const (INTVAL (x)))
6328 return COSTS_N_INSNS (2);
6329 return COSTS_N_INSNS (3);
6331 else if ((outer == PLUS || outer == COMPARE)
6332 && INTVAL (x) < 256 && INTVAL (x) > -256)
6334 else if ((outer == IOR || outer == XOR || outer == AND)
6335 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6336 return COSTS_N_INSNS (1);
6337 else if (outer == AND)
6340 /* This duplicates the tests in the andsi3 expander. */
6341 for (i = 9; i <= 31; i++)
6342 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6343 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6344 return COSTS_N_INSNS (2);
6346 else if (outer == ASHIFT || outer == ASHIFTRT
6347 || outer == LSHIFTRT)
6349 return COSTS_N_INSNS (2);
6355 return COSTS_N_INSNS (3);
6373 /* XXX another guess. */
6374 /* Memory costs quite a lot for the first word, but subsequent words
6375 load at the equivalent of a single insn each. */
6376 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6377 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6382 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6388 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6389 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6395 return total + COSTS_N_INSNS (1);
6397 /* Assume a two-shift sequence. Increase the cost slightly so
6398 we prefer actual shifts over an extend operation. */
6399 return total + 1 + COSTS_N_INSNS (2);
6407 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6409 enum machine_mode mode = GET_MODE (x);
6410 enum rtx_code subcode;
6412 enum rtx_code code = GET_CODE (x);
6418 /* Memory costs quite a lot for the first word, but subsequent words
6419 load at the equivalent of a single insn each. */
6420 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6427 if (TARGET_HARD_FLOAT && mode == SFmode)
6428 *total = COSTS_N_INSNS (2);
6429 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6430 *total = COSTS_N_INSNS (4);
6432 *total = COSTS_N_INSNS (20);
6436 if (GET_CODE (XEXP (x, 1)) == REG)
6437 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6438 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6439 *total = rtx_cost (XEXP (x, 1), code, speed);
6445 *total += COSTS_N_INSNS (4);
6450 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6451 *total += rtx_cost (XEXP (x, 0), code, speed);
6454 *total += COSTS_N_INSNS (3);
6458 *total += COSTS_N_INSNS (1);
6459 /* Increase the cost of complex shifts because they aren't any faster,
6460 and reduce dual issue opportunities. */
6461 if (arm_tune_cortex_a9
6462 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6470 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6471 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6472 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6474 *total += rtx_cost (XEXP (x, 1), code, speed);
6478 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6479 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6481 *total += rtx_cost (XEXP (x, 0), code, speed);
6488 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6490 if (TARGET_HARD_FLOAT
6492 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6494 *total = COSTS_N_INSNS (1);
6495 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6496 && arm_const_double_rtx (XEXP (x, 0)))
6498 *total += rtx_cost (XEXP (x, 1), code, speed);
6502 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6503 && arm_const_double_rtx (XEXP (x, 1)))
6505 *total += rtx_cost (XEXP (x, 0), code, speed);
6511 *total = COSTS_N_INSNS (20);
6515 *total = COSTS_N_INSNS (1);
6516 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6517 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6519 *total += rtx_cost (XEXP (x, 1), code, speed);
6523 subcode = GET_CODE (XEXP (x, 1));
6524 if (subcode == ASHIFT || subcode == ASHIFTRT
6525 || subcode == LSHIFTRT
6526 || subcode == ROTATE || subcode == ROTATERT)
6528 *total += rtx_cost (XEXP (x, 0), code, speed);
6529 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6533 /* A shift as a part of RSB costs no more than RSB itself. */
6534 if (GET_CODE (XEXP (x, 0)) == MULT
6535 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6537 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6538 *total += rtx_cost (XEXP (x, 1), code, speed);
6543 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6545 *total += rtx_cost (XEXP (x, 0), code, speed);
6546 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6550 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6551 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6553 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6554 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6555 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6556 *total += COSTS_N_INSNS (1);
6564 if (code == PLUS && arm_arch6 && mode == SImode
6565 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6566 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6568 *total = COSTS_N_INSNS (1);
6569 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6571 *total += rtx_cost (XEXP (x, 1), code, speed);
6575 /* MLA: All arguments must be registers. We filter out
6576 multiplication by a power of two, so that we fall down into
6578 if (GET_CODE (XEXP (x, 0)) == MULT
6579 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6581 /* The cost comes from the cost of the multiply. */
6585 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6587 if (TARGET_HARD_FLOAT
6589 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6591 *total = COSTS_N_INSNS (1);
6592 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6593 && arm_const_double_rtx (XEXP (x, 1)))
6595 *total += rtx_cost (XEXP (x, 0), code, speed);
6602 *total = COSTS_N_INSNS (20);
6606 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6607 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6609 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6610 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6611 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6612 *total += COSTS_N_INSNS (1);
6618 case AND: case XOR: case IOR:
6620 /* Normally the frame registers will be spilt into reg+const during
6621 reload, so it is a bad idea to combine them with other instructions,
6622 since then they might not be moved outside of loops. As a compromise
6623 we allow integration with ops that have a constant as their second
6625 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6626 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6627 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6628 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6629 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6634 *total += COSTS_N_INSNS (2);
6635 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6636 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6638 *total += rtx_cost (XEXP (x, 0), code, speed);
6645 *total += COSTS_N_INSNS (1);
6646 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6647 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6649 *total += rtx_cost (XEXP (x, 0), code, speed);
6652 subcode = GET_CODE (XEXP (x, 0));
6653 if (subcode == ASHIFT || subcode == ASHIFTRT
6654 || subcode == LSHIFTRT
6655 || subcode == ROTATE || subcode == ROTATERT)
6657 *total += rtx_cost (XEXP (x, 1), code, speed);
6658 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6663 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6665 *total += rtx_cost (XEXP (x, 1), code, speed);
6666 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6670 if (subcode == UMIN || subcode == UMAX
6671 || subcode == SMIN || subcode == SMAX)
6673 *total = COSTS_N_INSNS (3);
6680 /* This should have been handled by the CPU specific routines. */
6684 if (arm_arch3m && mode == SImode
6685 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6686 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6687 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6688 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6689 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6690 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6692 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6695 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6699 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6701 if (TARGET_HARD_FLOAT
6703 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6705 *total = COSTS_N_INSNS (1);
6708 *total = COSTS_N_INSNS (2);
6714 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6715 if (mode == SImode && code == NOT)
6717 subcode = GET_CODE (XEXP (x, 0));
6718 if (subcode == ASHIFT || subcode == ASHIFTRT
6719 || subcode == LSHIFTRT
6720 || subcode == ROTATE || subcode == ROTATERT
6722 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6724 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6725 /* Register shifts cost an extra cycle. */
6726 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6727 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6736 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6738 *total = COSTS_N_INSNS (4);
6742 operand = XEXP (x, 0);
6744 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6745 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6746 && GET_CODE (XEXP (operand, 0)) == REG
6747 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6748 *total += COSTS_N_INSNS (1);
6749 *total += (rtx_cost (XEXP (x, 1), code, speed)
6750 + rtx_cost (XEXP (x, 2), code, speed));
6754 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6756 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6762 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6763 && mode == SImode && XEXP (x, 1) == const0_rtx)
6765 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6771 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6772 && mode == SImode && XEXP (x, 1) == const0_rtx)
6774 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6794 /* SCC insns. In the case where the comparison has already been
6795 performed, then they cost 2 instructions. Otherwise they need
6796 an additional comparison before them. */
6797 *total = COSTS_N_INSNS (2);
6798 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6805 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6811 *total += COSTS_N_INSNS (1);
6812 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6813 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6815 *total += rtx_cost (XEXP (x, 0), code, speed);
6819 subcode = GET_CODE (XEXP (x, 0));
6820 if (subcode == ASHIFT || subcode == ASHIFTRT
6821 || subcode == LSHIFTRT
6822 || subcode == ROTATE || subcode == ROTATERT)
6824 *total += rtx_cost (XEXP (x, 1), code, speed);
6825 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6830 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6832 *total += rtx_cost (XEXP (x, 1), code, speed);
6833 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6843 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6844 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6845 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6846 *total += rtx_cost (XEXP (x, 1), code, speed);
6850 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6852 if (TARGET_HARD_FLOAT
6854 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6856 *total = COSTS_N_INSNS (1);
6859 *total = COSTS_N_INSNS (20);
6862 *total = COSTS_N_INSNS (1);
6864 *total += COSTS_N_INSNS (3);
6870 if (GET_MODE_CLASS (mode) == MODE_INT)
6872 rtx op = XEXP (x, 0);
6873 enum machine_mode opmode = GET_MODE (op);
6876 *total += COSTS_N_INSNS (1);
6878 if (opmode != SImode)
6882 /* If !arm_arch4, we use one of the extendhisi2_mem
6883 or movhi_bytes patterns for HImode. For a QImode
6884 sign extension, we first zero-extend from memory
6885 and then perform a shift sequence. */
6886 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6887 *total += COSTS_N_INSNS (2);
6890 *total += COSTS_N_INSNS (1);
6892 /* We don't have the necessary insn, so we need to perform some
6894 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6895 /* An and with constant 255. */
6896 *total += COSTS_N_INSNS (1);
6898 /* A shift sequence. Increase costs slightly to avoid
6899 combining two shifts into an extend operation. */
6900 *total += COSTS_N_INSNS (2) + 1;
6906 switch (GET_MODE (XEXP (x, 0)))
6913 *total = COSTS_N_INSNS (1);
6923 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6927 if (const_ok_for_arm (INTVAL (x))
6928 || const_ok_for_arm (~INTVAL (x)))
6929 *total = COSTS_N_INSNS (1);
6931 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6932 INTVAL (x), NULL_RTX,
6939 *total = COSTS_N_INSNS (3);
6943 *total = COSTS_N_INSNS (1);
6947 *total = COSTS_N_INSNS (1);
6948 *total += rtx_cost (XEXP (x, 0), code, speed);
6952 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6953 && (mode == SFmode || !TARGET_VFP_SINGLE))
6954 *total = COSTS_N_INSNS (1);
6956 *total = COSTS_N_INSNS (4);
6960 *total = COSTS_N_INSNS (4);
6965 /* Estimates the size cost of thumb1 instructions.
6966 For now most of the code is copied from thumb1_rtx_costs. We need more
6967 fine grain tuning when we have more related test cases. */
6969 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6971 enum machine_mode mode = GET_MODE (x);
6984 return COSTS_N_INSNS (1);
6987 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6989 /* Thumb1 mul instruction can't operate on const. We must Load it
6990 into a register first. */
6991 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6992 return COSTS_N_INSNS (1) + const_size;
6994 return COSTS_N_INSNS (1);
6997 return (COSTS_N_INSNS (1)
6998 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6999 + GET_CODE (SET_DEST (x)) == MEM));
7004 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7006 if (thumb_shiftable_const (INTVAL (x)))
7007 return COSTS_N_INSNS (2);
7008 return COSTS_N_INSNS (3);
7010 else if ((outer == PLUS || outer == COMPARE)
7011 && INTVAL (x) < 256 && INTVAL (x) > -256)
7013 else if ((outer == IOR || outer == XOR || outer == AND)
7014 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7015 return COSTS_N_INSNS (1);
7016 else if (outer == AND)
7019 /* This duplicates the tests in the andsi3 expander. */
7020 for (i = 9; i <= 31; i++)
7021 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7022 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7023 return COSTS_N_INSNS (2);
7025 else if (outer == ASHIFT || outer == ASHIFTRT
7026 || outer == LSHIFTRT)
7028 return COSTS_N_INSNS (2);
7034 return COSTS_N_INSNS (3);
7052 /* XXX another guess. */
7053 /* Memory costs quite a lot for the first word, but subsequent words
7054 load at the equivalent of a single insn each. */
7055 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7066 /* XXX still guessing. */
7067 switch (GET_MODE (XEXP (x, 0)))
7070 return (1 + (mode == DImode ? 4 : 0)
7071 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7074 return (4 + (mode == DImode ? 4 : 0)
7075 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7078 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7089 /* RTX costs when optimizing for size. */
7091 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7094 enum machine_mode mode = GET_MODE (x);
7097 *total = thumb1_size_rtx_costs (x, code, outer_code);
7101 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7105 /* A memory access costs 1 insn if the mode is small, or the address is
7106 a single register, otherwise it costs one insn per word. */
7107 if (REG_P (XEXP (x, 0)))
7108 *total = COSTS_N_INSNS (1);
7110 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7117 /* Needs a libcall, so it costs about this. */
7118 *total = COSTS_N_INSNS (2);
7122 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7124 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7132 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7134 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7137 else if (mode == SImode)
7139 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7140 /* Slightly disparage register shifts, but not by much. */
7141 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7142 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7146 /* Needs a libcall. */
7147 *total = COSTS_N_INSNS (2);
7151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7152 && (mode == SFmode || !TARGET_VFP_SINGLE))
7154 *total = COSTS_N_INSNS (1);
7160 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7161 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7163 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7164 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7165 || subcode1 == ROTATE || subcode1 == ROTATERT
7166 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7167 || subcode1 == ASHIFTRT)
7169 /* It's just the cost of the two operands. */
7174 *total = COSTS_N_INSNS (1);
7178 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7182 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7183 && (mode == SFmode || !TARGET_VFP_SINGLE))
7185 *total = COSTS_N_INSNS (1);
7189 /* A shift as a part of ADD costs nothing. */
7190 if (GET_CODE (XEXP (x, 0)) == MULT
7191 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7193 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7194 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7195 *total += rtx_cost (XEXP (x, 1), code, false);
7200 case AND: case XOR: case IOR:
7203 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7205 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7206 || subcode == LSHIFTRT || subcode == ASHIFTRT
7207 || (code == AND && subcode == NOT))
7209 /* It's just the cost of the two operands. */
7215 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7219 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7223 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7224 && (mode == SFmode || !TARGET_VFP_SINGLE))
7226 *total = COSTS_N_INSNS (1);
7232 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7241 if (cc_register (XEXP (x, 0), VOIDmode))
7244 *total = COSTS_N_INSNS (1);
7248 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7249 && (mode == SFmode || !TARGET_VFP_SINGLE))
7250 *total = COSTS_N_INSNS (1);
7252 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7257 return arm_rtx_costs_1 (x, outer_code, total, 0);
7260 if (const_ok_for_arm (INTVAL (x)))
7261 /* A multiplication by a constant requires another instruction
7262 to load the constant to a register. */
7263 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7265 else if (const_ok_for_arm (~INTVAL (x)))
7266 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7267 else if (const_ok_for_arm (-INTVAL (x)))
7269 if (outer_code == COMPARE || outer_code == PLUS
7270 || outer_code == MINUS)
7273 *total = COSTS_N_INSNS (1);
7276 *total = COSTS_N_INSNS (2);
7282 *total = COSTS_N_INSNS (2);
7286 *total = COSTS_N_INSNS (4);
7291 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7292 cost of these slightly. */
7293 *total = COSTS_N_INSNS (1) + 1;
7297 if (mode != VOIDmode)
7298 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7300 *total = COSTS_N_INSNS (4); /* How knows? */
7305 /* RTX costs when optimizing for size. */
7307 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7311 return arm_size_rtx_costs (x, (enum rtx_code) code,
7312 (enum rtx_code) outer_code, total);
7314 return current_tune->rtx_costs (x, (enum rtx_code) code,
7315 (enum rtx_code) outer_code,
7319 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7320 supported on any "slowmul" cores, so it can be ignored. */
7323 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7324 int *total, bool speed)
7326 enum machine_mode mode = GET_MODE (x);
7330 *total = thumb1_rtx_costs (x, code, outer_code);
7337 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7340 *total = COSTS_N_INSNS (20);
7344 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7346 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7347 & (unsigned HOST_WIDE_INT) 0xffffffff);
7348 int cost, const_ok = const_ok_for_arm (i);
7349 int j, booth_unit_size;
7351 /* Tune as appropriate. */
7352 cost = const_ok ? 4 : 8;
7353 booth_unit_size = 2;
7354 for (j = 0; i && j < 32; j += booth_unit_size)
7356 i >>= booth_unit_size;
7360 *total = COSTS_N_INSNS (cost);
7361 *total += rtx_cost (XEXP (x, 0), code, speed);
7365 *total = COSTS_N_INSNS (20);
7369 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7374 /* RTX cost for cores with a fast multiply unit (M variants). */
7377 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7378 int *total, bool speed)
7380 enum machine_mode mode = GET_MODE (x);
7384 *total = thumb1_rtx_costs (x, code, outer_code);
7388 /* ??? should thumb2 use different costs? */
7392 /* There is no point basing this on the tuning, since it is always the
7393 fast variant if it exists at all. */
7395 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7396 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7397 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7399 *total = COSTS_N_INSNS(2);
7406 *total = COSTS_N_INSNS (5);
7410 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7412 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7413 & (unsigned HOST_WIDE_INT) 0xffffffff);
7414 int cost, const_ok = const_ok_for_arm (i);
7415 int j, booth_unit_size;
7417 /* Tune as appropriate. */
7418 cost = const_ok ? 4 : 8;
7419 booth_unit_size = 8;
7420 for (j = 0; i && j < 32; j += booth_unit_size)
7422 i >>= booth_unit_size;
7426 *total = COSTS_N_INSNS(cost);
7432 *total = COSTS_N_INSNS (4);
7436 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7438 if (TARGET_HARD_FLOAT
7440 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7442 *total = COSTS_N_INSNS (1);
7447 /* Requires a lib call */
7448 *total = COSTS_N_INSNS (20);
7452 return arm_rtx_costs_1 (x, outer_code, total, speed);
7457 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7458 so it can be ignored. */
7461 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7462 int *total, bool speed)
7464 enum machine_mode mode = GET_MODE (x);
7468 *total = thumb1_rtx_costs (x, code, outer_code);
7475 if (GET_CODE (XEXP (x, 0)) != MULT)
7476 return arm_rtx_costs_1 (x, outer_code, total, speed);
7478 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7479 will stall until the multiplication is complete. */
7480 *total = COSTS_N_INSNS (3);
7484 /* There is no point basing this on the tuning, since it is always the
7485 fast variant if it exists at all. */
7487 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7488 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7489 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7491 *total = COSTS_N_INSNS (2);
7498 *total = COSTS_N_INSNS (5);
7502 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7504 /* If operand 1 is a constant we can more accurately
7505 calculate the cost of the multiply. The multiplier can
7506 retire 15 bits on the first cycle and a further 12 on the
7507 second. We do, of course, have to load the constant into
7508 a register first. */
7509 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7510 /* There's a general overhead of one cycle. */
7512 unsigned HOST_WIDE_INT masked_const;
7517 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7519 masked_const = i & 0xffff8000;
7520 if (masked_const != 0)
7523 masked_const = i & 0xf8000000;
7524 if (masked_const != 0)
7527 *total = COSTS_N_INSNS (cost);
7533 *total = COSTS_N_INSNS (3);
7537 /* Requires a lib call */
7538 *total = COSTS_N_INSNS (20);
7542 return arm_rtx_costs_1 (x, outer_code, total, speed);
7547 /* RTX costs for 9e (and later) cores. */
7550 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7551 int *total, bool speed)
7553 enum machine_mode mode = GET_MODE (x);
7560 *total = COSTS_N_INSNS (3);
7564 *total = thumb1_rtx_costs (x, code, outer_code);
7572 /* There is no point basing this on the tuning, since it is always the
7573 fast variant if it exists at all. */
7575 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7576 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7577 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7579 *total = COSTS_N_INSNS (2);
7586 *total = COSTS_N_INSNS (5);
7592 *total = COSTS_N_INSNS (2);
7596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7598 if (TARGET_HARD_FLOAT
7600 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7602 *total = COSTS_N_INSNS (1);
7607 *total = COSTS_N_INSNS (20);
7611 return arm_rtx_costs_1 (x, outer_code, total, speed);
7614 /* All address computations that can be done are free, but rtx cost returns
7615 the same for practically all of them. So we weight the different types
7616 of address here in the order (most pref first):
7617 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7619 arm_arm_address_cost (rtx x)
7621 enum rtx_code c = GET_CODE (x);
7623 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7625 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7630 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7633 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7643 arm_thumb_address_cost (rtx x)
7645 enum rtx_code c = GET_CODE (x);
7650 && GET_CODE (XEXP (x, 0)) == REG
7651 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7658 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7660 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7664 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7668 /* Some true dependencies can have a higher cost depending
7669 on precisely how certain input operands are used. */
7671 && REG_NOTE_KIND (link) == 0
7672 && recog_memoized (insn) >= 0
7673 && recog_memoized (dep) >= 0)
7675 int shift_opnum = get_attr_shift (insn);
7676 enum attr_type attr_type = get_attr_type (dep);
7678 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7679 operand for INSN. If we have a shifted input operand and the
7680 instruction we depend on is another ALU instruction, then we may
7681 have to account for an additional stall. */
7682 if (shift_opnum != 0
7683 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7685 rtx shifted_operand;
7688 /* Get the shifted operand. */
7689 extract_insn (insn);
7690 shifted_operand = recog_data.operand[shift_opnum];
7692 /* Iterate over all the operands in DEP. If we write an operand
7693 that overlaps with SHIFTED_OPERAND, then we have increase the
7694 cost of this dependency. */
7696 preprocess_constraints ();
7697 for (opno = 0; opno < recog_data.n_operands; opno++)
7699 /* We can ignore strict inputs. */
7700 if (recog_data.operand_type[opno] == OP_IN)
7703 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7710 /* XXX This is not strictly true for the FPA. */
7711 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7712 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7715 /* Call insns don't incur a stall, even if they follow a load. */
7716 if (REG_NOTE_KIND (link) == 0
7717 && GET_CODE (insn) == CALL_INSN)
7720 if ((i_pat = single_set (insn)) != NULL
7721 && GET_CODE (SET_SRC (i_pat)) == MEM
7722 && (d_pat = single_set (dep)) != NULL
7723 && GET_CODE (SET_DEST (d_pat)) == MEM)
7725 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7726 /* This is a load after a store, there is no conflict if the load reads
7727 from a cached area. Assume that loads from the stack, and from the
7728 constant pool are cached, and that others will miss. This is a
7731 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7732 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7733 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7734 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7741 static int fp_consts_inited = 0;
7743 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7744 static const char * const strings_fp[8] =
7747 "4", "5", "0.5", "10"
7750 static REAL_VALUE_TYPE values_fp[8];
7753 init_fp_table (void)
7759 fp_consts_inited = 1;
7761 fp_consts_inited = 8;
7763 for (i = 0; i < fp_consts_inited; i++)
7765 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7770 /* Return TRUE if rtx X is a valid immediate FP constant. */
7772 arm_const_double_rtx (rtx x)
7777 if (!fp_consts_inited)
7780 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7781 if (REAL_VALUE_MINUS_ZERO (r))
7784 for (i = 0; i < fp_consts_inited; i++)
7785 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7791 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7793 neg_const_double_rtx_ok_for_fpa (rtx x)
7798 if (!fp_consts_inited)
7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7802 r = real_value_negate (&r);
7803 if (REAL_VALUE_MINUS_ZERO (r))
7806 for (i = 0; i < 8; i++)
7807 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7814 /* VFPv3 has a fairly wide range of representable immediates, formed from
7815 "quarter-precision" floating-point values. These can be evaluated using this
7816 formula (with ^ for exponentiation):
7820 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7821 16 <= n <= 31 and 0 <= r <= 7.
7823 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7825 - A (most-significant) is the sign bit.
7826 - BCD are the exponent (encoded as r XOR 3).
7827 - EFGH are the mantissa (encoded as n - 16).
7830 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7831 fconst[sd] instruction, or -1 if X isn't suitable. */
7833 vfp3_const_double_index (rtx x)
7835 REAL_VALUE_TYPE r, m;
7837 unsigned HOST_WIDE_INT mantissa, mant_hi;
7838 unsigned HOST_WIDE_INT mask;
7839 HOST_WIDE_INT m1, m2;
7840 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7842 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7845 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7847 /* We can't represent these things, so detect them first. */
7848 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7851 /* Extract sign, exponent and mantissa. */
7852 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7853 r = real_value_abs (&r);
7854 exponent = REAL_EXP (&r);
7855 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7856 highest (sign) bit, with a fixed binary point at bit point_pos.
7857 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7858 bits for the mantissa, this may fail (low bits would be lost). */
7859 real_ldexp (&m, &r, point_pos - exponent);
7860 REAL_VALUE_TO_INT (&m1, &m2, m);
7864 /* If there are bits set in the low part of the mantissa, we can't
7865 represent this value. */
7869 /* Now make it so that mantissa contains the most-significant bits, and move
7870 the point_pos to indicate that the least-significant bits have been
7872 point_pos -= HOST_BITS_PER_WIDE_INT;
7875 /* We can permit four significant bits of mantissa only, plus a high bit
7876 which is always 1. */
7877 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7878 if ((mantissa & mask) != 0)
7881 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7882 mantissa >>= point_pos - 5;
7884 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7885 floating-point immediate zero with Neon using an integer-zero load, but
7886 that case is handled elsewhere.) */
7890 gcc_assert (mantissa >= 16 && mantissa <= 31);
7892 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7893 normalized significands are in the range [1, 2). (Our mantissa is shifted
7894 left 4 places at this point relative to normalized IEEE754 values). GCC
7895 internally uses [0.5, 1) (see real.c), so the exponent returned from
7896 REAL_EXP must be altered. */
7897 exponent = 5 - exponent;
7899 if (exponent < 0 || exponent > 7)
7902 /* Sign, mantissa and exponent are now in the correct form to plug into the
7903 formula described in the comment above. */
7904 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7907 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7909 vfp3_const_double_rtx (rtx x)
7914 return vfp3_const_double_index (x) != -1;
7917 /* Recognize immediates which can be used in various Neon instructions. Legal
7918 immediates are described by the following table (for VMVN variants, the
7919 bitwise inverse of the constant shown is recognized. In either case, VMOV
7920 is output and the correct instruction to use for a given constant is chosen
7921 by the assembler). The constant shown is replicated across all elements of
7922 the destination vector.
7924 insn elems variant constant (binary)
7925 ---- ----- ------- -----------------
7926 vmov i32 0 00000000 00000000 00000000 abcdefgh
7927 vmov i32 1 00000000 00000000 abcdefgh 00000000
7928 vmov i32 2 00000000 abcdefgh 00000000 00000000
7929 vmov i32 3 abcdefgh 00000000 00000000 00000000
7930 vmov i16 4 00000000 abcdefgh
7931 vmov i16 5 abcdefgh 00000000
7932 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7933 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7934 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7935 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7936 vmvn i16 10 00000000 abcdefgh
7937 vmvn i16 11 abcdefgh 00000000
7938 vmov i32 12 00000000 00000000 abcdefgh 11111111
7939 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7940 vmov i32 14 00000000 abcdefgh 11111111 11111111
7941 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7943 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7944 eeeeeeee ffffffff gggggggg hhhhhhhh
7945 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7947 For case 18, B = !b. Representable values are exactly those accepted by
7948 vfp3_const_double_index, but are output as floating-point numbers rather
7951 Variants 0-5 (inclusive) may also be used as immediates for the second
7952 operand of VORR/VBIC instructions.
7954 The INVERSE argument causes the bitwise inverse of the given operand to be
7955 recognized instead (used for recognizing legal immediates for the VAND/VORN
7956 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7957 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7958 output, rather than the real insns vbic/vorr).
7960 INVERSE makes no difference to the recognition of float vectors.
7962 The return value is the variant of immediate as shown in the above table, or
7963 -1 if the given value doesn't match any of the listed patterns.
7966 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7967 rtx *modconst, int *elementwidth)
7969 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7971 for (i = 0; i < idx; i += (STRIDE)) \
7976 immtype = (CLASS); \
7977 elsize = (ELSIZE); \
7981 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7982 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7983 unsigned char bytes[16];
7984 int immtype = -1, matches;
7985 unsigned int invmask = inverse ? 0xff : 0;
7987 /* Vectors of float constants. */
7988 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7990 rtx el0 = CONST_VECTOR_ELT (op, 0);
7993 if (!vfp3_const_double_rtx (el0))
7996 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7998 for (i = 1; i < n_elts; i++)
8000 rtx elt = CONST_VECTOR_ELT (op, i);
8003 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8005 if (!REAL_VALUES_EQUAL (r0, re))
8010 *modconst = CONST_VECTOR_ELT (op, 0);
8018 /* Splat vector constant out into a byte vector. */
8019 for (i = 0; i < n_elts; i++)
8021 rtx el = CONST_VECTOR_ELT (op, i);
8022 unsigned HOST_WIDE_INT elpart;
8023 unsigned int part, parts;
8025 if (GET_CODE (el) == CONST_INT)
8027 elpart = INTVAL (el);
8030 else if (GET_CODE (el) == CONST_DOUBLE)
8032 elpart = CONST_DOUBLE_LOW (el);
8038 for (part = 0; part < parts; part++)
8041 for (byte = 0; byte < innersize; byte++)
8043 bytes[idx++] = (elpart & 0xff) ^ invmask;
8044 elpart >>= BITS_PER_UNIT;
8046 if (GET_CODE (el) == CONST_DOUBLE)
8047 elpart = CONST_DOUBLE_HIGH (el);
8052 gcc_assert (idx == GET_MODE_SIZE (mode));
8056 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8057 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8059 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8062 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8063 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8065 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8066 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8068 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8070 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8072 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8073 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8075 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8076 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8078 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8079 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8081 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8082 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8084 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8086 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8088 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8089 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8091 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8092 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8094 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8095 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8097 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8098 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8100 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8102 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8103 && bytes[i] == bytes[(i + 8) % idx]);
8111 *elementwidth = elsize;
8115 unsigned HOST_WIDE_INT imm = 0;
8117 /* Un-invert bytes of recognized vector, if necessary. */
8119 for (i = 0; i < idx; i++)
8120 bytes[i] ^= invmask;
8124 /* FIXME: Broken on 32-bit H_W_I hosts. */
8125 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8127 for (i = 0; i < 8; i++)
8128 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8129 << (i * BITS_PER_UNIT);
8131 *modconst = GEN_INT (imm);
8135 unsigned HOST_WIDE_INT imm = 0;
8137 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8138 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8140 *modconst = GEN_INT (imm);
8148 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8149 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8150 float elements), and a modified constant (whatever should be output for a
8151 VMOV) in *MODCONST. */
8154 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8155 rtx *modconst, int *elementwidth)
8159 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8165 *modconst = tmpconst;
8168 *elementwidth = tmpwidth;
8173 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8174 the immediate is valid, write a constant suitable for using as an operand
8175 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8176 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8179 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8180 rtx *modconst, int *elementwidth)
8184 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8186 if (retval < 0 || retval > 5)
8190 *modconst = tmpconst;
8193 *elementwidth = tmpwidth;
8198 /* Return a string suitable for output of Neon immediate logic operation
8202 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8203 int inverse, int quad)
8205 int width, is_valid;
8206 static char templ[40];
8208 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8210 gcc_assert (is_valid != 0);
8213 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8215 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8220 /* Output a sequence of pairwise operations to implement a reduction.
8221 NOTE: We do "too much work" here, because pairwise operations work on two
8222 registers-worth of operands in one go. Unfortunately we can't exploit those
8223 extra calculations to do the full operation in fewer steps, I don't think.
8224 Although all vector elements of the result but the first are ignored, we
8225 actually calculate the same result in each of the elements. An alternative
8226 such as initially loading a vector with zero to use as each of the second
8227 operands would use up an additional register and take an extra instruction,
8228 for no particular gain. */
8231 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8232 rtx (*reduc) (rtx, rtx, rtx))
8234 enum machine_mode inner = GET_MODE_INNER (mode);
8235 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8238 for (i = parts / 2; i >= 1; i /= 2)
8240 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8241 emit_insn (reduc (dest, tmpsum, tmpsum));
8246 /* If VALS is a vector constant that can be loaded into a register
8247 using VDUP, generate instructions to do so and return an RTX to
8248 assign to the register. Otherwise return NULL_RTX. */
8251 neon_vdup_constant (rtx vals)
8253 enum machine_mode mode = GET_MODE (vals);
8254 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8255 int n_elts = GET_MODE_NUNITS (mode);
8256 bool all_same = true;
8260 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8263 for (i = 0; i < n_elts; ++i)
8265 x = XVECEXP (vals, 0, i);
8266 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8271 /* The elements are not all the same. We could handle repeating
8272 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8273 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8277 /* We can load this constant by using VDUP and a constant in a
8278 single ARM register. This will be cheaper than a vector
8281 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8282 return gen_rtx_VEC_DUPLICATE (mode, x);
8285 /* Generate code to load VALS, which is a PARALLEL containing only
8286 constants (for vec_init) or CONST_VECTOR, efficiently into a
8287 register. Returns an RTX to copy into the register, or NULL_RTX
8288 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8291 neon_make_constant (rtx vals)
8293 enum machine_mode mode = GET_MODE (vals);
8295 rtx const_vec = NULL_RTX;
8296 int n_elts = GET_MODE_NUNITS (mode);
8300 if (GET_CODE (vals) == CONST_VECTOR)
8302 else if (GET_CODE (vals) == PARALLEL)
8304 /* A CONST_VECTOR must contain only CONST_INTs and
8305 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8306 Only store valid constants in a CONST_VECTOR. */
8307 for (i = 0; i < n_elts; ++i)
8309 rtx x = XVECEXP (vals, 0, i);
8310 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8313 if (n_const == n_elts)
8314 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8319 if (const_vec != NULL
8320 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8321 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8323 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8324 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8325 pipeline cycle; creating the constant takes one or two ARM
8328 else if (const_vec != NULL_RTX)
8329 /* Load from constant pool. On Cortex-A8 this takes two cycles
8330 (for either double or quad vectors). We can not take advantage
8331 of single-cycle VLD1 because we need a PC-relative addressing
8335 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8336 We can not construct an initializer. */
8340 /* Initialize vector TARGET to VALS. */
8343 neon_expand_vector_init (rtx target, rtx vals)
8345 enum machine_mode mode = GET_MODE (target);
8346 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8347 int n_elts = GET_MODE_NUNITS (mode);
8348 int n_var = 0, one_var = -1;
8349 bool all_same = true;
8353 for (i = 0; i < n_elts; ++i)
8355 x = XVECEXP (vals, 0, i);
8356 if (!CONSTANT_P (x))
8357 ++n_var, one_var = i;
8359 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8365 rtx constant = neon_make_constant (vals);
8366 if (constant != NULL_RTX)
8368 emit_move_insn (target, constant);
8373 /* Splat a single non-constant element if we can. */
8374 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8376 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8377 emit_insn (gen_rtx_SET (VOIDmode, target,
8378 gen_rtx_VEC_DUPLICATE (mode, x)));
8382 /* One field is non-constant. Load constant then overwrite varying
8383 field. This is more efficient than using the stack. */
8386 rtx copy = copy_rtx (vals);
8387 rtx index = GEN_INT (one_var);
8389 /* Load constant part of vector, substitute neighboring value for
8391 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8392 neon_expand_vector_init (target, copy);
8394 /* Insert variable. */
8395 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8399 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8402 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8405 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8408 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8411 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8414 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8417 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8420 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8423 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8431 /* Construct the vector in memory one field at a time
8432 and load the whole vector. */
8433 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8434 for (i = 0; i < n_elts; i++)
8435 emit_move_insn (adjust_address_nv (mem, inner_mode,
8436 i * GET_MODE_SIZE (inner_mode)),
8437 XVECEXP (vals, 0, i));
8438 emit_move_insn (target, mem);
8441 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8442 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8443 reported source locations are bogus. */
8446 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8451 gcc_assert (GET_CODE (operand) == CONST_INT);
8453 lane = INTVAL (operand);
8455 if (lane < low || lane >= high)
8459 /* Bounds-check lanes. */
8462 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8464 bounds_check (operand, low, high, "lane out of range");
8467 /* Bounds-check constants. */
8470 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8472 bounds_check (operand, low, high, "constant out of range");
8476 neon_element_bits (enum machine_mode mode)
8479 return GET_MODE_BITSIZE (mode);
8481 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8485 /* Predicates for `match_operand' and `match_operator'. */
8487 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8489 cirrus_memory_offset (rtx op)
8491 /* Reject eliminable registers. */
8492 if (! (reload_in_progress || reload_completed)
8493 && ( reg_mentioned_p (frame_pointer_rtx, op)
8494 || reg_mentioned_p (arg_pointer_rtx, op)
8495 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8496 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8497 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8498 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8501 if (GET_CODE (op) == MEM)
8507 /* Match: (mem (reg)). */
8508 if (GET_CODE (ind) == REG)
8514 if (GET_CODE (ind) == PLUS
8515 && GET_CODE (XEXP (ind, 0)) == REG
8516 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8517 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8524 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8525 WB is true if full writeback address modes are allowed and is false
8526 if limited writeback address modes (POST_INC and PRE_DEC) are
8530 arm_coproc_mem_operand (rtx op, bool wb)
8534 /* Reject eliminable registers. */
8535 if (! (reload_in_progress || reload_completed)
8536 && ( reg_mentioned_p (frame_pointer_rtx, op)
8537 || reg_mentioned_p (arg_pointer_rtx, op)
8538 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8539 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8540 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8541 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8544 /* Constants are converted into offsets from labels. */
8545 if (GET_CODE (op) != MEM)
8550 if (reload_completed
8551 && (GET_CODE (ind) == LABEL_REF
8552 || (GET_CODE (ind) == CONST
8553 && GET_CODE (XEXP (ind, 0)) == PLUS
8554 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8555 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8558 /* Match: (mem (reg)). */
8559 if (GET_CODE (ind) == REG)
8560 return arm_address_register_rtx_p (ind, 0);
8562 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8563 acceptable in any case (subject to verification by
8564 arm_address_register_rtx_p). We need WB to be true to accept
8565 PRE_INC and POST_DEC. */
8566 if (GET_CODE (ind) == POST_INC
8567 || GET_CODE (ind) == PRE_DEC
8569 && (GET_CODE (ind) == PRE_INC
8570 || GET_CODE (ind) == POST_DEC)))
8571 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8574 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8575 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8576 && GET_CODE (XEXP (ind, 1)) == PLUS
8577 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8578 ind = XEXP (ind, 1);
8583 if (GET_CODE (ind) == PLUS
8584 && GET_CODE (XEXP (ind, 0)) == REG
8585 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8586 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8587 && INTVAL (XEXP (ind, 1)) > -1024
8588 && INTVAL (XEXP (ind, 1)) < 1024
8589 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8595 /* Return TRUE if OP is a memory operand which we can load or store a vector
8596 to/from. TYPE is one of the following values:
8597 0 - Vector load/stor (vldr)
8598 1 - Core registers (ldm)
8599 2 - Element/structure loads (vld1)
8602 neon_vector_mem_operand (rtx op, int type)
8606 /* Reject eliminable registers. */
8607 if (! (reload_in_progress || reload_completed)
8608 && ( reg_mentioned_p (frame_pointer_rtx, op)
8609 || reg_mentioned_p (arg_pointer_rtx, op)
8610 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8611 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8612 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8613 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8616 /* Constants are converted into offsets from labels. */
8617 if (GET_CODE (op) != MEM)
8622 if (reload_completed
8623 && (GET_CODE (ind) == LABEL_REF
8624 || (GET_CODE (ind) == CONST
8625 && GET_CODE (XEXP (ind, 0)) == PLUS
8626 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8627 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8630 /* Match: (mem (reg)). */
8631 if (GET_CODE (ind) == REG)
8632 return arm_address_register_rtx_p (ind, 0);
8634 /* Allow post-increment with Neon registers. */
8635 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8636 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8638 /* FIXME: vld1 allows register post-modify. */
8644 && GET_CODE (ind) == PLUS
8645 && GET_CODE (XEXP (ind, 0)) == REG
8646 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8647 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8648 && INTVAL (XEXP (ind, 1)) > -1024
8649 && INTVAL (XEXP (ind, 1)) < 1016
8650 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8656 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8659 neon_struct_mem_operand (rtx op)
8663 /* Reject eliminable registers. */
8664 if (! (reload_in_progress || reload_completed)
8665 && ( reg_mentioned_p (frame_pointer_rtx, op)
8666 || reg_mentioned_p (arg_pointer_rtx, op)
8667 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8668 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8669 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8670 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8673 /* Constants are converted into offsets from labels. */
8674 if (GET_CODE (op) != MEM)
8679 if (reload_completed
8680 && (GET_CODE (ind) == LABEL_REF
8681 || (GET_CODE (ind) == CONST
8682 && GET_CODE (XEXP (ind, 0)) == PLUS
8683 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8684 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8687 /* Match: (mem (reg)). */
8688 if (GET_CODE (ind) == REG)
8689 return arm_address_register_rtx_p (ind, 0);
8694 /* Return true if X is a register that will be eliminated later on. */
8696 arm_eliminable_register (rtx x)
8698 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8699 || REGNO (x) == ARG_POINTER_REGNUM
8700 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8701 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8704 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8705 coprocessor registers. Otherwise return NO_REGS. */
8708 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8712 if (!TARGET_NEON_FP16)
8713 return GENERAL_REGS;
8714 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8716 return GENERAL_REGS;
8720 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8721 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8722 && neon_vector_mem_operand (x, 0))
8725 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8728 return GENERAL_REGS;
8731 /* Values which must be returned in the most-significant end of the return
8735 arm_return_in_msb (const_tree valtype)
8737 return (TARGET_AAPCS_BASED
8739 && (AGGREGATE_TYPE_P (valtype)
8740 || TREE_CODE (valtype) == COMPLEX_TYPE));
8743 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8744 Use by the Cirrus Maverick code which has to workaround
8745 a hardware bug triggered by such instructions. */
8747 arm_memory_load_p (rtx insn)
8749 rtx body, lhs, rhs;;
8751 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8754 body = PATTERN (insn);
8756 if (GET_CODE (body) != SET)
8759 lhs = XEXP (body, 0);
8760 rhs = XEXP (body, 1);
8762 lhs = REG_OR_SUBREG_RTX (lhs);
8764 /* If the destination is not a general purpose
8765 register we do not have to worry. */
8766 if (GET_CODE (lhs) != REG
8767 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8770 /* As well as loads from memory we also have to react
8771 to loads of invalid constants which will be turned
8772 into loads from the minipool. */
8773 return (GET_CODE (rhs) == MEM
8774 || GET_CODE (rhs) == SYMBOL_REF
8775 || note_invalid_constants (insn, -1, false));
8778 /* Return TRUE if INSN is a Cirrus instruction. */
8780 arm_cirrus_insn_p (rtx insn)
8782 enum attr_cirrus attr;
8784 /* get_attr cannot accept USE or CLOBBER. */
8786 || GET_CODE (insn) != INSN
8787 || GET_CODE (PATTERN (insn)) == USE
8788 || GET_CODE (PATTERN (insn)) == CLOBBER)
8791 attr = get_attr_cirrus (insn);
8793 return attr != CIRRUS_NOT;
8796 /* Cirrus reorg for invalid instruction combinations. */
8798 cirrus_reorg (rtx first)
8800 enum attr_cirrus attr;
8801 rtx body = PATTERN (first);
8805 /* Any branch must be followed by 2 non Cirrus instructions. */
8806 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8809 t = next_nonnote_insn (first);
8811 if (arm_cirrus_insn_p (t))
8814 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8818 emit_insn_after (gen_nop (), first);
8823 /* (float (blah)) is in parallel with a clobber. */
8824 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8825 body = XVECEXP (body, 0, 0);
8827 if (GET_CODE (body) == SET)
8829 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8831 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8832 be followed by a non Cirrus insn. */
8833 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8835 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8836 emit_insn_after (gen_nop (), first);
8840 else if (arm_memory_load_p (first))
8842 unsigned int arm_regno;
8844 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8845 ldr/cfmv64hr combination where the Rd field is the same
8846 in both instructions must be split with a non Cirrus
8853 /* Get Arm register number for ldr insn. */
8854 if (GET_CODE (lhs) == REG)
8855 arm_regno = REGNO (lhs);
8858 gcc_assert (GET_CODE (rhs) == REG);
8859 arm_regno = REGNO (rhs);
8863 first = next_nonnote_insn (first);
8865 if (! arm_cirrus_insn_p (first))
8868 body = PATTERN (first);
8870 /* (float (blah)) is in parallel with a clobber. */
8871 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8872 body = XVECEXP (body, 0, 0);
8874 if (GET_CODE (body) == FLOAT)
8875 body = XEXP (body, 0);
8877 if (get_attr_cirrus (first) == CIRRUS_MOVE
8878 && GET_CODE (XEXP (body, 1)) == REG
8879 && arm_regno == REGNO (XEXP (body, 1)))
8880 emit_insn_after (gen_nop (), first);
8886 /* get_attr cannot accept USE or CLOBBER. */
8888 || GET_CODE (first) != INSN
8889 || GET_CODE (PATTERN (first)) == USE
8890 || GET_CODE (PATTERN (first)) == CLOBBER)
8893 attr = get_attr_cirrus (first);
8895 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8896 must be followed by a non-coprocessor instruction. */
8897 if (attr == CIRRUS_COMPARE)
8901 t = next_nonnote_insn (first);
8903 if (arm_cirrus_insn_p (t))
8906 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8910 emit_insn_after (gen_nop (), first);
8916 /* Return TRUE if X references a SYMBOL_REF. */
8918 symbol_mentioned_p (rtx x)
8923 if (GET_CODE (x) == SYMBOL_REF)
8926 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8927 are constant offsets, not symbols. */
8928 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8931 fmt = GET_RTX_FORMAT (GET_CODE (x));
8933 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8939 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8940 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8943 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8950 /* Return TRUE if X references a LABEL_REF. */
8952 label_mentioned_p (rtx x)
8957 if (GET_CODE (x) == LABEL_REF)
8960 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8961 instruction, but they are constant offsets, not symbols. */
8962 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8965 fmt = GET_RTX_FORMAT (GET_CODE (x));
8966 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8972 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8973 if (label_mentioned_p (XVECEXP (x, i, j)))
8976 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8984 tls_mentioned_p (rtx x)
8986 switch (GET_CODE (x))
8989 return tls_mentioned_p (XEXP (x, 0));
8992 if (XINT (x, 1) == UNSPEC_TLS)
9000 /* Must not copy any rtx that uses a pc-relative address. */
9003 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9005 if (GET_CODE (*x) == UNSPEC
9006 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9012 arm_cannot_copy_insn_p (rtx insn)
9014 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9020 enum rtx_code code = GET_CODE (x);
9037 /* Return 1 if memory locations are adjacent. */
9039 adjacent_mem_locations (rtx a, rtx b)
9041 /* We don't guarantee to preserve the order of these memory refs. */
9042 if (volatile_refs_p (a) || volatile_refs_p (b))
9045 if ((GET_CODE (XEXP (a, 0)) == REG
9046 || (GET_CODE (XEXP (a, 0)) == PLUS
9047 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9048 && (GET_CODE (XEXP (b, 0)) == REG
9049 || (GET_CODE (XEXP (b, 0)) == PLUS
9050 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9052 HOST_WIDE_INT val0 = 0, val1 = 0;
9056 if (GET_CODE (XEXP (a, 0)) == PLUS)
9058 reg0 = XEXP (XEXP (a, 0), 0);
9059 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9064 if (GET_CODE (XEXP (b, 0)) == PLUS)
9066 reg1 = XEXP (XEXP (b, 0), 0);
9067 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9072 /* Don't accept any offset that will require multiple
9073 instructions to handle, since this would cause the
9074 arith_adjacentmem pattern to output an overlong sequence. */
9075 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9078 /* Don't allow an eliminable register: register elimination can make
9079 the offset too large. */
9080 if (arm_eliminable_register (reg0))
9083 val_diff = val1 - val0;
9087 /* If the target has load delay slots, then there's no benefit
9088 to using an ldm instruction unless the offset is zero and
9089 we are optimizing for size. */
9090 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9091 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9092 && (val_diff == 4 || val_diff == -4));
9095 return ((REGNO (reg0) == REGNO (reg1))
9096 && (val_diff == 4 || val_diff == -4));
9102 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9103 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9104 instruction. ADD_OFFSET is nonzero if the base address register needs
9105 to be modified with an add instruction before we can use it. */
9108 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9109 int nops, HOST_WIDE_INT add_offset)
9111 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9112 if the offset isn't small enough. The reason 2 ldrs are faster
9113 is because these ARMs are able to do more than one cache access
9114 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9115 whilst the ARM8 has a double bandwidth cache. This means that
9116 these cores can do both an instruction fetch and a data fetch in
9117 a single cycle, so the trick of calculating the address into a
9118 scratch register (one of the result regs) and then doing a load
9119 multiple actually becomes slower (and no smaller in code size).
9120 That is the transformation
9122 ldr rd1, [rbase + offset]
9123 ldr rd2, [rbase + offset + 4]
9127 add rd1, rbase, offset
9128 ldmia rd1, {rd1, rd2}
9130 produces worse code -- '3 cycles + any stalls on rd2' instead of
9131 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9132 access per cycle, the first sequence could never complete in less
9133 than 6 cycles, whereas the ldm sequence would only take 5 and
9134 would make better use of sequential accesses if not hitting the
9137 We cheat here and test 'arm_ld_sched' which we currently know to
9138 only be true for the ARM8, ARM9 and StrongARM. If this ever
9139 changes, then the test below needs to be reworked. */
9140 if (nops == 2 && arm_ld_sched && add_offset != 0)
9146 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9147 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9148 an array ORDER which describes the sequence to use when accessing the
9149 offsets that produces an ascending order. In this sequence, each
9150 offset must be larger by exactly 4 than the previous one. ORDER[0]
9151 must have been filled in with the lowest offset by the caller.
9152 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9153 we use to verify that ORDER produces an ascending order of registers.
9154 Return true if it was possible to construct such an order, false if
9158 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9162 for (i = 1; i < nops; i++)
9166 order[i] = order[i - 1];
9167 for (j = 0; j < nops; j++)
9168 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9170 /* We must find exactly one offset that is higher than the
9171 previous one by 4. */
9172 if (order[i] != order[i - 1])
9176 if (order[i] == order[i - 1])
9178 /* The register numbers must be ascending. */
9179 if (unsorted_regs != NULL
9180 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9187 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9188 HOST_WIDE_INT *load_offset)
9190 int unsorted_regs[MAX_LDM_STM_OPS];
9191 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9192 int order[MAX_LDM_STM_OPS];
9196 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9197 easily extended if required. */
9198 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9200 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9202 /* Loop over the operands and check that the memory references are
9203 suitable (i.e. immediate offsets from the same base register). At
9204 the same time, extract the target register, and the memory
9206 for (i = 0; i < nops; i++)
9211 /* Convert a subreg of a mem into the mem itself. */
9212 if (GET_CODE (operands[nops + i]) == SUBREG)
9213 operands[nops + i] = alter_subreg (operands + (nops + i));
9215 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9217 /* Don't reorder volatile memory references; it doesn't seem worth
9218 looking for the case where the order is ok anyway. */
9219 if (MEM_VOLATILE_P (operands[nops + i]))
9222 offset = const0_rtx;
9224 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9225 || (GET_CODE (reg) == SUBREG
9226 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9227 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9228 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9230 || (GET_CODE (reg) == SUBREG
9231 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9232 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9236 base_reg = REGNO (reg);
9239 if (base_reg != (int) REGNO (reg))
9240 /* Not addressed from the same base register. */
9243 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9244 ? REGNO (operands[i])
9245 : REGNO (SUBREG_REG (operands[i])));
9247 /* If it isn't an integer register, or if it overwrites the
9248 base register but isn't the last insn in the list, then
9249 we can't do this. */
9250 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9251 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9254 unsorted_offsets[i] = INTVAL (offset);
9255 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9259 /* Not a suitable memory address. */
9263 /* All the useful information has now been extracted from the
9264 operands into unsorted_regs and unsorted_offsets; additionally,
9265 order[0] has been set to the lowest offset in the list. Sort
9266 the offsets into order, verifying that they are adjacent, and
9267 check that the register numbers are ascending. */
9268 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9275 for (i = 0; i < nops; i++)
9276 regs[i] = unsorted_regs[order[i]];
9278 *load_offset = unsorted_offsets[order[0]];
9281 if (unsorted_offsets[order[0]] == 0)
9282 ldm_case = 1; /* ldmia */
9283 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9284 ldm_case = 2; /* ldmib */
9285 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9286 ldm_case = 3; /* ldmda */
9287 else if (unsorted_offsets[order[nops - 1]] == -4)
9288 ldm_case = 4; /* ldmdb */
9289 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9290 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9295 if (!multiple_operation_profitable_p (false, nops,
9297 ? unsorted_offsets[order[0]] : 0))
9304 emit_ldm_seq (rtx *operands, int nops)
9306 int regs[MAX_LDM_STM_OPS];
9308 HOST_WIDE_INT offset;
9312 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9315 strcpy (buf, "ldm%(ia%)\t");
9319 strcpy (buf, "ldm%(ib%)\t");
9323 strcpy (buf, "ldm%(da%)\t");
9327 strcpy (buf, "ldm%(db%)\t");
9332 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9333 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9336 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9337 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9339 output_asm_insn (buf, operands);
9341 strcpy (buf, "ldm%(ia%)\t");
9348 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9349 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9351 for (i = 1; i < nops; i++)
9352 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9353 reg_names[regs[i]]);
9355 strcat (buf, "}\t%@ phole ldm");
9357 output_asm_insn (buf, operands);
9362 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9363 HOST_WIDE_INT * load_offset)
9365 int unsorted_regs[MAX_LDM_STM_OPS];
9366 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9367 int order[MAX_LDM_STM_OPS];
9371 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9372 easily extended if required. */
9373 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9375 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9377 /* Loop over the operands and check that the memory references are
9378 suitable (i.e. immediate offsets from the same base register). At
9379 the same time, extract the target register, and the memory
9381 for (i = 0; i < nops; i++)
9386 /* Convert a subreg of a mem into the mem itself. */
9387 if (GET_CODE (operands[nops + i]) == SUBREG)
9388 operands[nops + i] = alter_subreg (operands + (nops + i));
9390 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9392 /* Don't reorder volatile memory references; it doesn't seem worth
9393 looking for the case where the order is ok anyway. */
9394 if (MEM_VOLATILE_P (operands[nops + i]))
9397 offset = const0_rtx;
9399 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9400 || (GET_CODE (reg) == SUBREG
9401 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9402 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9403 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9405 || (GET_CODE (reg) == SUBREG
9406 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9407 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9410 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9411 ? REGNO (operands[i])
9412 : REGNO (SUBREG_REG (operands[i])));
9414 base_reg = REGNO (reg);
9415 else if (base_reg != (int) REGNO (reg))
9416 /* Not addressed from the same base register. */
9419 /* If it isn't an integer register, then we can't do this. */
9420 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9423 unsorted_offsets[i] = INTVAL (offset);
9424 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9428 /* Not a suitable memory address. */
9432 /* All the useful information has now been extracted from the
9433 operands into unsorted_regs and unsorted_offsets; additionally,
9434 order[0] has been set to the lowest offset in the list. Sort
9435 the offsets into order, verifying that they are adjacent, and
9436 check that the register numbers are ascending. */
9437 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9444 for (i = 0; i < nops; i++)
9445 regs[i] = unsorted_regs[order[i]];
9447 *load_offset = unsorted_offsets[order[0]];
9450 if (unsorted_offsets[order[0]] == 0)
9451 stm_case = 1; /* stmia */
9452 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9453 stm_case = 2; /* stmib */
9454 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9455 stm_case = 3; /* stmda */
9456 else if (unsorted_offsets[order[nops - 1]] == -4)
9457 stm_case = 4; /* stmdb */
9461 if (!multiple_operation_profitable_p (false, nops, 0))
9468 emit_stm_seq (rtx *operands, int nops)
9470 int regs[MAX_LDM_STM_OPS];
9472 HOST_WIDE_INT offset;
9476 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9479 strcpy (buf, "stm%(ia%)\t");
9483 strcpy (buf, "stm%(ib%)\t");
9487 strcpy (buf, "stm%(da%)\t");
9491 strcpy (buf, "stm%(db%)\t");
9498 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9499 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9501 for (i = 1; i < nops; i++)
9502 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9503 reg_names[regs[i]]);
9505 strcat (buf, "}\t%@ phole stm");
9507 output_asm_insn (buf, operands);
9511 /* Routines for use in generating RTL. */
9514 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9515 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9517 HOST_WIDE_INT offset = *offsetp;
9520 int sign = up ? 1 : -1;
9523 /* XScale has load-store double instructions, but they have stricter
9524 alignment requirements than load-store multiple, so we cannot
9527 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9528 the pipeline until completion.
9536 An ldr instruction takes 1-3 cycles, but does not block the
9545 Best case ldr will always win. However, the more ldr instructions
9546 we issue, the less likely we are to be able to schedule them well.
9547 Using ldr instructions also increases code size.
9549 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9550 for counts of 3 or 4 regs. */
9551 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9557 for (i = 0; i < count; i++)
9559 addr = plus_constant (from, i * 4 * sign);
9560 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9561 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9567 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9577 result = gen_rtx_PARALLEL (VOIDmode,
9578 rtvec_alloc (count + (write_back ? 1 : 0)));
9581 XVECEXP (result, 0, 0)
9582 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9587 for (j = 0; i < count; i++, j++)
9589 addr = plus_constant (from, j * 4 * sign);
9590 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9591 XVECEXP (result, 0, i)
9592 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9603 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9604 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9606 HOST_WIDE_INT offset = *offsetp;
9609 int sign = up ? 1 : -1;
9612 /* See arm_gen_load_multiple for discussion of
9613 the pros/cons of ldm/stm usage for XScale. */
9614 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9620 for (i = 0; i < count; i++)
9622 addr = plus_constant (to, i * 4 * sign);
9623 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9624 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9630 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9640 result = gen_rtx_PARALLEL (VOIDmode,
9641 rtvec_alloc (count + (write_back ? 1 : 0)));
9644 XVECEXP (result, 0, 0)
9645 = gen_rtx_SET (VOIDmode, to,
9646 plus_constant (to, count * 4 * sign));
9651 for (j = 0; i < count; i++, j++)
9653 addr = plus_constant (to, j * 4 * sign);
9654 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9655 XVECEXP (result, 0, i)
9656 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9667 arm_gen_movmemqi (rtx *operands)
9669 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9670 HOST_WIDE_INT srcoffset, dstoffset;
9672 rtx src, dst, srcbase, dstbase;
9673 rtx part_bytes_reg = NULL;
9676 if (GET_CODE (operands[2]) != CONST_INT
9677 || GET_CODE (operands[3]) != CONST_INT
9678 || INTVAL (operands[2]) > 64
9679 || INTVAL (operands[3]) & 3)
9682 dstbase = operands[0];
9683 srcbase = operands[1];
9685 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9686 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9688 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9689 out_words_to_go = INTVAL (operands[2]) / 4;
9690 last_bytes = INTVAL (operands[2]) & 3;
9691 dstoffset = srcoffset = 0;
9693 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9694 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9696 for (i = 0; in_words_to_go >= 2; i+=4)
9698 if (in_words_to_go > 4)
9699 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9700 srcbase, &srcoffset));
9702 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9703 FALSE, srcbase, &srcoffset));
9705 if (out_words_to_go)
9707 if (out_words_to_go > 4)
9708 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9709 dstbase, &dstoffset));
9710 else if (out_words_to_go != 1)
9711 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9715 dstbase, &dstoffset));
9718 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9719 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9720 if (last_bytes != 0)
9722 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9728 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9729 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9732 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9733 if (out_words_to_go)
9737 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9738 sreg = copy_to_reg (mem);
9740 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9741 emit_move_insn (mem, sreg);
9744 gcc_assert (!in_words_to_go); /* Sanity check */
9749 gcc_assert (in_words_to_go > 0);
9751 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9752 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9755 gcc_assert (!last_bytes || part_bytes_reg);
9757 if (BYTES_BIG_ENDIAN && last_bytes)
9759 rtx tmp = gen_reg_rtx (SImode);
9761 /* The bytes we want are in the top end of the word. */
9762 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9763 GEN_INT (8 * (4 - last_bytes))));
9764 part_bytes_reg = tmp;
9768 mem = adjust_automodify_address (dstbase, QImode,
9769 plus_constant (dst, last_bytes - 1),
9770 dstoffset + last_bytes - 1);
9771 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9775 tmp = gen_reg_rtx (SImode);
9776 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9777 part_bytes_reg = tmp;
9786 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9787 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9791 rtx tmp = gen_reg_rtx (SImode);
9792 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9793 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9794 part_bytes_reg = tmp;
9801 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9802 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9809 /* Select a dominance comparison mode if possible for a test of the general
9810 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9811 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9812 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9813 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9814 In all cases OP will be either EQ or NE, but we don't need to know which
9815 here. If we are unable to support a dominance comparison we return
9816 CC mode. This will then fail to match for the RTL expressions that
9817 generate this call. */
9819 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9821 enum rtx_code cond1, cond2;
9824 /* Currently we will probably get the wrong result if the individual
9825 comparisons are not simple. This also ensures that it is safe to
9826 reverse a comparison if necessary. */
9827 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9829 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9833 /* The if_then_else variant of this tests the second condition if the
9834 first passes, but is true if the first fails. Reverse the first
9835 condition to get a true "inclusive-or" expression. */
9836 if (cond_or == DOM_CC_NX_OR_Y)
9837 cond1 = reverse_condition (cond1);
9839 /* If the comparisons are not equal, and one doesn't dominate the other,
9840 then we can't do this. */
9842 && !comparison_dominates_p (cond1, cond2)
9843 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9848 enum rtx_code temp = cond1;
9856 if (cond_or == DOM_CC_X_AND_Y)
9861 case EQ: return CC_DEQmode;
9862 case LE: return CC_DLEmode;
9863 case LEU: return CC_DLEUmode;
9864 case GE: return CC_DGEmode;
9865 case GEU: return CC_DGEUmode;
9866 default: gcc_unreachable ();
9870 if (cond_or == DOM_CC_X_AND_Y)
9886 if (cond_or == DOM_CC_X_AND_Y)
9902 if (cond_or == DOM_CC_X_AND_Y)
9918 if (cond_or == DOM_CC_X_AND_Y)
9933 /* The remaining cases only occur when both comparisons are the
9936 gcc_assert (cond1 == cond2);
9940 gcc_assert (cond1 == cond2);
9944 gcc_assert (cond1 == cond2);
9948 gcc_assert (cond1 == cond2);
9952 gcc_assert (cond1 == cond2);
9961 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9963 /* All floating point compares return CCFP if it is an equality
9964 comparison, and CCFPE otherwise. */
9965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9985 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9994 /* A compare with a shifted operand. Because of canonicalization, the
9995 comparison will have to be swapped when we emit the assembler. */
9996 if (GET_MODE (y) == SImode
9997 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9998 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9999 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10000 || GET_CODE (x) == ROTATERT))
10003 /* This operation is performed swapped, but since we only rely on the Z
10004 flag we don't need an additional mode. */
10005 if (GET_MODE (y) == SImode
10006 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10007 && GET_CODE (x) == NEG
10008 && (op == EQ || op == NE))
10011 /* This is a special case that is used by combine to allow a
10012 comparison of a shifted byte load to be split into a zero-extend
10013 followed by a comparison of the shifted integer (only valid for
10014 equalities and unsigned inequalities). */
10015 if (GET_MODE (x) == SImode
10016 && GET_CODE (x) == ASHIFT
10017 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10018 && GET_CODE (XEXP (x, 0)) == SUBREG
10019 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10020 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10021 && (op == EQ || op == NE
10022 || op == GEU || op == GTU || op == LTU || op == LEU)
10023 && GET_CODE (y) == CONST_INT)
10026 /* A construct for a conditional compare, if the false arm contains
10027 0, then both conditions must be true, otherwise either condition
10028 must be true. Not all conditions are possible, so CCmode is
10029 returned if it can't be done. */
10030 if (GET_CODE (x) == IF_THEN_ELSE
10031 && (XEXP (x, 2) == const0_rtx
10032 || XEXP (x, 2) == const1_rtx)
10033 && COMPARISON_P (XEXP (x, 0))
10034 && COMPARISON_P (XEXP (x, 1)))
10035 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10036 INTVAL (XEXP (x, 2)));
10038 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10039 if (GET_CODE (x) == AND
10040 && COMPARISON_P (XEXP (x, 0))
10041 && COMPARISON_P (XEXP (x, 1)))
10042 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10045 if (GET_CODE (x) == IOR
10046 && COMPARISON_P (XEXP (x, 0))
10047 && COMPARISON_P (XEXP (x, 1)))
10048 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10051 /* An operation (on Thumb) where we want to test for a single bit.
10052 This is done by shifting that bit up into the top bit of a
10053 scratch register; we can then branch on the sign bit. */
10055 && GET_MODE (x) == SImode
10056 && (op == EQ || op == NE)
10057 && GET_CODE (x) == ZERO_EXTRACT
10058 && XEXP (x, 1) == const1_rtx)
10061 /* An operation that sets the condition codes as a side-effect, the
10062 V flag is not set correctly, so we can only use comparisons where
10063 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10065 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10066 if (GET_MODE (x) == SImode
10068 && (op == EQ || op == NE || op == LT || op == GE)
10069 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10070 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10071 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10072 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10073 || GET_CODE (x) == LSHIFTRT
10074 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10075 || GET_CODE (x) == ROTATERT
10076 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10077 return CC_NOOVmode;
10079 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10082 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10083 && GET_CODE (x) == PLUS
10084 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10087 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10089 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10091 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10098 /* A DImode comparison against zero can be implemented by
10099 or'ing the two halves together. */
10100 if (y == const0_rtx)
10103 /* We can do an equality test in three Thumb instructions. */
10113 /* DImode unsigned comparisons can be implemented by cmp +
10114 cmpeq without a scratch register. Not worth doing in
10125 /* DImode signed and unsigned comparisons can be implemented
10126 by cmp + sbcs with a scratch register, but that does not
10127 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10128 gcc_assert (op != EQ && op != NE);
10132 gcc_unreachable ();
10139 /* X and Y are two things to compare using CODE. Emit the compare insn and
10140 return the rtx for register 0 in the proper mode. FP means this is a
10141 floating point compare: I don't think that it is needed on the arm. */
10143 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10145 enum machine_mode mode;
10147 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10149 /* We might have X as a constant, Y as a register because of the predicates
10150 used for cmpdi. If so, force X to a register here. */
10151 if (dimode_comparison && !REG_P (x))
10152 x = force_reg (DImode, x);
10154 mode = SELECT_CC_MODE (code, x, y);
10155 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10157 if (dimode_comparison
10158 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10159 && mode != CC_CZmode)
10163 /* To compare two non-zero values for equality, XOR them and
10164 then compare against zero. Not used for ARM mode; there
10165 CC_CZmode is cheaper. */
10166 if (mode == CC_Zmode && y != const0_rtx)
10168 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10171 /* A scratch register is required. */
10172 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10173 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10174 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10177 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10182 /* Generate a sequence of insns that will generate the correct return
10183 address mask depending on the physical architecture that the program
10186 arm_gen_return_addr_mask (void)
10188 rtx reg = gen_reg_rtx (Pmode);
10190 emit_insn (gen_return_addr_mask (reg));
10195 arm_reload_in_hi (rtx *operands)
10197 rtx ref = operands[1];
10199 HOST_WIDE_INT offset = 0;
10201 if (GET_CODE (ref) == SUBREG)
10203 offset = SUBREG_BYTE (ref);
10204 ref = SUBREG_REG (ref);
10207 if (GET_CODE (ref) == REG)
10209 /* We have a pseudo which has been spilt onto the stack; there
10210 are two cases here: the first where there is a simple
10211 stack-slot replacement and a second where the stack-slot is
10212 out of range, or is used as a subreg. */
10213 if (reg_equiv_mem[REGNO (ref)])
10215 ref = reg_equiv_mem[REGNO (ref)];
10216 base = find_replacement (&XEXP (ref, 0));
10219 /* The slot is out of range, or was dressed up in a SUBREG. */
10220 base = reg_equiv_address[REGNO (ref)];
10223 base = find_replacement (&XEXP (ref, 0));
10225 /* Handle the case where the address is too complex to be offset by 1. */
10226 if (GET_CODE (base) == MINUS
10227 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10229 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10231 emit_set_insn (base_plus, base);
10234 else if (GET_CODE (base) == PLUS)
10236 /* The addend must be CONST_INT, or we would have dealt with it above. */
10237 HOST_WIDE_INT hi, lo;
10239 offset += INTVAL (XEXP (base, 1));
10240 base = XEXP (base, 0);
10242 /* Rework the address into a legal sequence of insns. */
10243 /* Valid range for lo is -4095 -> 4095 */
10246 : -((-offset) & 0xfff));
10248 /* Corner case, if lo is the max offset then we would be out of range
10249 once we have added the additional 1 below, so bump the msb into the
10250 pre-loading insn(s). */
10254 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10255 ^ (HOST_WIDE_INT) 0x80000000)
10256 - (HOST_WIDE_INT) 0x80000000);
10258 gcc_assert (hi + lo == offset);
10262 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10264 /* Get the base address; addsi3 knows how to handle constants
10265 that require more than one insn. */
10266 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10272 /* Operands[2] may overlap operands[0] (though it won't overlap
10273 operands[1]), that's why we asked for a DImode reg -- so we can
10274 use the bit that does not overlap. */
10275 if (REGNO (operands[2]) == REGNO (operands[0]))
10276 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10278 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10280 emit_insn (gen_zero_extendqisi2 (scratch,
10281 gen_rtx_MEM (QImode,
10282 plus_constant (base,
10284 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10285 gen_rtx_MEM (QImode,
10286 plus_constant (base,
10288 if (!BYTES_BIG_ENDIAN)
10289 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10290 gen_rtx_IOR (SImode,
10293 gen_rtx_SUBREG (SImode, operands[0], 0),
10297 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10298 gen_rtx_IOR (SImode,
10299 gen_rtx_ASHIFT (SImode, scratch,
10301 gen_rtx_SUBREG (SImode, operands[0], 0)));
10304 /* Handle storing a half-word to memory during reload by synthesizing as two
10305 byte stores. Take care not to clobber the input values until after we
10306 have moved them somewhere safe. This code assumes that if the DImode
10307 scratch in operands[2] overlaps either the input value or output address
10308 in some way, then that value must die in this insn (we absolutely need
10309 two scratch registers for some corner cases). */
10311 arm_reload_out_hi (rtx *operands)
10313 rtx ref = operands[0];
10314 rtx outval = operands[1];
10316 HOST_WIDE_INT offset = 0;
10318 if (GET_CODE (ref) == SUBREG)
10320 offset = SUBREG_BYTE (ref);
10321 ref = SUBREG_REG (ref);
10324 if (GET_CODE (ref) == REG)
10326 /* We have a pseudo which has been spilt onto the stack; there
10327 are two cases here: the first where there is a simple
10328 stack-slot replacement and a second where the stack-slot is
10329 out of range, or is used as a subreg. */
10330 if (reg_equiv_mem[REGNO (ref)])
10332 ref = reg_equiv_mem[REGNO (ref)];
10333 base = find_replacement (&XEXP (ref, 0));
10336 /* The slot is out of range, or was dressed up in a SUBREG. */
10337 base = reg_equiv_address[REGNO (ref)];
10340 base = find_replacement (&XEXP (ref, 0));
10342 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10344 /* Handle the case where the address is too complex to be offset by 1. */
10345 if (GET_CODE (base) == MINUS
10346 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10348 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10350 /* Be careful not to destroy OUTVAL. */
10351 if (reg_overlap_mentioned_p (base_plus, outval))
10353 /* Updating base_plus might destroy outval, see if we can
10354 swap the scratch and base_plus. */
10355 if (!reg_overlap_mentioned_p (scratch, outval))
10358 scratch = base_plus;
10363 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10365 /* Be conservative and copy OUTVAL into the scratch now,
10366 this should only be necessary if outval is a subreg
10367 of something larger than a word. */
10368 /* XXX Might this clobber base? I can't see how it can,
10369 since scratch is known to overlap with OUTVAL, and
10370 must be wider than a word. */
10371 emit_insn (gen_movhi (scratch_hi, outval));
10372 outval = scratch_hi;
10376 emit_set_insn (base_plus, base);
10379 else if (GET_CODE (base) == PLUS)
10381 /* The addend must be CONST_INT, or we would have dealt with it above. */
10382 HOST_WIDE_INT hi, lo;
10384 offset += INTVAL (XEXP (base, 1));
10385 base = XEXP (base, 0);
10387 /* Rework the address into a legal sequence of insns. */
10388 /* Valid range for lo is -4095 -> 4095 */
10391 : -((-offset) & 0xfff));
10393 /* Corner case, if lo is the max offset then we would be out of range
10394 once we have added the additional 1 below, so bump the msb into the
10395 pre-loading insn(s). */
10399 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10400 ^ (HOST_WIDE_INT) 0x80000000)
10401 - (HOST_WIDE_INT) 0x80000000);
10403 gcc_assert (hi + lo == offset);
10407 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10409 /* Be careful not to destroy OUTVAL. */
10410 if (reg_overlap_mentioned_p (base_plus, outval))
10412 /* Updating base_plus might destroy outval, see if we
10413 can swap the scratch and base_plus. */
10414 if (!reg_overlap_mentioned_p (scratch, outval))
10417 scratch = base_plus;
10422 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10424 /* Be conservative and copy outval into scratch now,
10425 this should only be necessary if outval is a
10426 subreg of something larger than a word. */
10427 /* XXX Might this clobber base? I can't see how it
10428 can, since scratch is known to overlap with
10430 emit_insn (gen_movhi (scratch_hi, outval));
10431 outval = scratch_hi;
10435 /* Get the base address; addsi3 knows how to handle constants
10436 that require more than one insn. */
10437 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10443 if (BYTES_BIG_ENDIAN)
10445 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10446 plus_constant (base, offset + 1)),
10447 gen_lowpart (QImode, outval)));
10448 emit_insn (gen_lshrsi3 (scratch,
10449 gen_rtx_SUBREG (SImode, outval, 0),
10451 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10452 gen_lowpart (QImode, scratch)));
10456 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10457 gen_lowpart (QImode, outval)));
10458 emit_insn (gen_lshrsi3 (scratch,
10459 gen_rtx_SUBREG (SImode, outval, 0),
10461 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10462 plus_constant (base, offset + 1)),
10463 gen_lowpart (QImode, scratch)));
10467 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10468 (padded to the size of a word) should be passed in a register. */
10471 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10473 if (TARGET_AAPCS_BASED)
10474 return must_pass_in_stack_var_size (mode, type);
10476 return must_pass_in_stack_var_size_or_pad (mode, type);
10480 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10481 Return true if an argument passed on the stack should be padded upwards,
10482 i.e. if the least-significant byte has useful data.
10483 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10484 aggregate types are placed in the lowest memory address. */
10487 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10489 if (!TARGET_AAPCS_BASED)
10490 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10492 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10499 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10500 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10501 byte of the register has useful data, and return the opposite if the
10502 most significant byte does.
10503 For AAPCS, small aggregates and small complex types are always padded
10507 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10508 tree type, int first ATTRIBUTE_UNUSED)
10510 if (TARGET_AAPCS_BASED
10511 && BYTES_BIG_ENDIAN
10512 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10513 && int_size_in_bytes (type) <= 4)
10516 /* Otherwise, use default padding. */
10517 return !BYTES_BIG_ENDIAN;
10521 /* Print a symbolic form of X to the debug file, F. */
10523 arm_print_value (FILE *f, rtx x)
10525 switch (GET_CODE (x))
10528 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10532 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10540 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10542 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10543 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10551 fprintf (f, "\"%s\"", XSTR (x, 0));
10555 fprintf (f, "`%s'", XSTR (x, 0));
10559 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10563 arm_print_value (f, XEXP (x, 0));
10567 arm_print_value (f, XEXP (x, 0));
10569 arm_print_value (f, XEXP (x, 1));
10577 fprintf (f, "????");
10582 /* Routines for manipulation of the constant pool. */
10584 /* Arm instructions cannot load a large constant directly into a
10585 register; they have to come from a pc relative load. The constant
10586 must therefore be placed in the addressable range of the pc
10587 relative load. Depending on the precise pc relative load
10588 instruction the range is somewhere between 256 bytes and 4k. This
10589 means that we often have to dump a constant inside a function, and
10590 generate code to branch around it.
10592 It is important to minimize this, since the branches will slow
10593 things down and make the code larger.
10595 Normally we can hide the table after an existing unconditional
10596 branch so that there is no interruption of the flow, but in the
10597 worst case the code looks like this:
10615 We fix this by performing a scan after scheduling, which notices
10616 which instructions need to have their operands fetched from the
10617 constant table and builds the table.
10619 The algorithm starts by building a table of all the constants that
10620 need fixing up and all the natural barriers in the function (places
10621 where a constant table can be dropped without breaking the flow).
10622 For each fixup we note how far the pc-relative replacement will be
10623 able to reach and the offset of the instruction into the function.
10625 Having built the table we then group the fixes together to form
10626 tables that are as large as possible (subject to addressing
10627 constraints) and emit each table of constants after the last
10628 barrier that is within range of all the instructions in the group.
10629 If a group does not contain a barrier, then we forcibly create one
10630 by inserting a jump instruction into the flow. Once the table has
10631 been inserted, the insns are then modified to reference the
10632 relevant entry in the pool.
10634 Possible enhancements to the algorithm (not implemented) are:
10636 1) For some processors and object formats, there may be benefit in
10637 aligning the pools to the start of cache lines; this alignment
10638 would need to be taken into account when calculating addressability
10641 /* These typedefs are located at the start of this file, so that
10642 they can be used in the prototypes there. This comment is to
10643 remind readers of that fact so that the following structures
10644 can be understood more easily.
10646 typedef struct minipool_node Mnode;
10647 typedef struct minipool_fixup Mfix; */
10649 struct minipool_node
10651 /* Doubly linked chain of entries. */
10654 /* The maximum offset into the code that this entry can be placed. While
10655 pushing fixes for forward references, all entries are sorted in order
10656 of increasing max_address. */
10657 HOST_WIDE_INT max_address;
10658 /* Similarly for an entry inserted for a backwards ref. */
10659 HOST_WIDE_INT min_address;
10660 /* The number of fixes referencing this entry. This can become zero
10661 if we "unpush" an entry. In this case we ignore the entry when we
10662 come to emit the code. */
10664 /* The offset from the start of the minipool. */
10665 HOST_WIDE_INT offset;
10666 /* The value in table. */
10668 /* The mode of value. */
10669 enum machine_mode mode;
10670 /* The size of the value. With iWMMXt enabled
10671 sizes > 4 also imply an alignment of 8-bytes. */
10675 struct minipool_fixup
10679 HOST_WIDE_INT address;
10681 enum machine_mode mode;
10685 HOST_WIDE_INT forwards;
10686 HOST_WIDE_INT backwards;
10689 /* Fixes less than a word need padding out to a word boundary. */
10690 #define MINIPOOL_FIX_SIZE(mode) \
10691 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10693 static Mnode * minipool_vector_head;
10694 static Mnode * minipool_vector_tail;
10695 static rtx minipool_vector_label;
10696 static int minipool_pad;
10698 /* The linked list of all minipool fixes required for this function. */
10699 Mfix * minipool_fix_head;
10700 Mfix * minipool_fix_tail;
10701 /* The fix entry for the current minipool, once it has been placed. */
10702 Mfix * minipool_barrier;
10704 /* Determines if INSN is the start of a jump table. Returns the end
10705 of the TABLE or NULL_RTX. */
10707 is_jump_table (rtx insn)
10711 if (GET_CODE (insn) == JUMP_INSN
10712 && JUMP_LABEL (insn) != NULL
10713 && ((table = next_real_insn (JUMP_LABEL (insn)))
10714 == next_real_insn (insn))
10716 && GET_CODE (table) == JUMP_INSN
10717 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10718 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10724 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10725 #define JUMP_TABLES_IN_TEXT_SECTION 0
10728 static HOST_WIDE_INT
10729 get_jump_table_size (rtx insn)
10731 /* ADDR_VECs only take room if read-only data does into the text
10733 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10735 rtx body = PATTERN (insn);
10736 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10737 HOST_WIDE_INT size;
10738 HOST_WIDE_INT modesize;
10740 modesize = GET_MODE_SIZE (GET_MODE (body));
10741 size = modesize * XVECLEN (body, elt);
10745 /* Round up size of TBB table to a halfword boundary. */
10746 size = (size + 1) & ~(HOST_WIDE_INT)1;
10749 /* No padding necessary for TBH. */
10752 /* Add two bytes for alignment on Thumb. */
10757 gcc_unreachable ();
10765 /* Move a minipool fix MP from its current location to before MAX_MP.
10766 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10767 constraints may need updating. */
10769 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10770 HOST_WIDE_INT max_address)
10772 /* The code below assumes these are different. */
10773 gcc_assert (mp != max_mp);
10775 if (max_mp == NULL)
10777 if (max_address < mp->max_address)
10778 mp->max_address = max_address;
10782 if (max_address > max_mp->max_address - mp->fix_size)
10783 mp->max_address = max_mp->max_address - mp->fix_size;
10785 mp->max_address = max_address;
10787 /* Unlink MP from its current position. Since max_mp is non-null,
10788 mp->prev must be non-null. */
10789 mp->prev->next = mp->next;
10790 if (mp->next != NULL)
10791 mp->next->prev = mp->prev;
10793 minipool_vector_tail = mp->prev;
10795 /* Re-insert it before MAX_MP. */
10797 mp->prev = max_mp->prev;
10800 if (mp->prev != NULL)
10801 mp->prev->next = mp;
10803 minipool_vector_head = mp;
10806 /* Save the new entry. */
10809 /* Scan over the preceding entries and adjust their addresses as
10811 while (mp->prev != NULL
10812 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10814 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10821 /* Add a constant to the minipool for a forward reference. Returns the
10822 node added or NULL if the constant will not fit in this pool. */
10824 add_minipool_forward_ref (Mfix *fix)
10826 /* If set, max_mp is the first pool_entry that has a lower
10827 constraint than the one we are trying to add. */
10828 Mnode * max_mp = NULL;
10829 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10832 /* If the minipool starts before the end of FIX->INSN then this FIX
10833 can not be placed into the current pool. Furthermore, adding the
10834 new constant pool entry may cause the pool to start FIX_SIZE bytes
10836 if (minipool_vector_head &&
10837 (fix->address + get_attr_length (fix->insn)
10838 >= minipool_vector_head->max_address - fix->fix_size))
10841 /* Scan the pool to see if a constant with the same value has
10842 already been added. While we are doing this, also note the
10843 location where we must insert the constant if it doesn't already
10845 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10847 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10848 && fix->mode == mp->mode
10849 && (GET_CODE (fix->value) != CODE_LABEL
10850 || (CODE_LABEL_NUMBER (fix->value)
10851 == CODE_LABEL_NUMBER (mp->value)))
10852 && rtx_equal_p (fix->value, mp->value))
10854 /* More than one fix references this entry. */
10856 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10859 /* Note the insertion point if necessary. */
10861 && mp->max_address > max_address)
10864 /* If we are inserting an 8-bytes aligned quantity and
10865 we have not already found an insertion point, then
10866 make sure that all such 8-byte aligned quantities are
10867 placed at the start of the pool. */
10868 if (ARM_DOUBLEWORD_ALIGN
10870 && fix->fix_size >= 8
10871 && mp->fix_size < 8)
10874 max_address = mp->max_address;
10878 /* The value is not currently in the minipool, so we need to create
10879 a new entry for it. If MAX_MP is NULL, the entry will be put on
10880 the end of the list since the placement is less constrained than
10881 any existing entry. Otherwise, we insert the new fix before
10882 MAX_MP and, if necessary, adjust the constraints on the other
10885 mp->fix_size = fix->fix_size;
10886 mp->mode = fix->mode;
10887 mp->value = fix->value;
10889 /* Not yet required for a backwards ref. */
10890 mp->min_address = -65536;
10892 if (max_mp == NULL)
10894 mp->max_address = max_address;
10896 mp->prev = minipool_vector_tail;
10898 if (mp->prev == NULL)
10900 minipool_vector_head = mp;
10901 minipool_vector_label = gen_label_rtx ();
10904 mp->prev->next = mp;
10906 minipool_vector_tail = mp;
10910 if (max_address > max_mp->max_address - mp->fix_size)
10911 mp->max_address = max_mp->max_address - mp->fix_size;
10913 mp->max_address = max_address;
10916 mp->prev = max_mp->prev;
10918 if (mp->prev != NULL)
10919 mp->prev->next = mp;
10921 minipool_vector_head = mp;
10924 /* Save the new entry. */
10927 /* Scan over the preceding entries and adjust their addresses as
10929 while (mp->prev != NULL
10930 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10932 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10940 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10941 HOST_WIDE_INT min_address)
10943 HOST_WIDE_INT offset;
10945 /* The code below assumes these are different. */
10946 gcc_assert (mp != min_mp);
10948 if (min_mp == NULL)
10950 if (min_address > mp->min_address)
10951 mp->min_address = min_address;
10955 /* We will adjust this below if it is too loose. */
10956 mp->min_address = min_address;
10958 /* Unlink MP from its current position. Since min_mp is non-null,
10959 mp->next must be non-null. */
10960 mp->next->prev = mp->prev;
10961 if (mp->prev != NULL)
10962 mp->prev->next = mp->next;
10964 minipool_vector_head = mp->next;
10966 /* Reinsert it after MIN_MP. */
10968 mp->next = min_mp->next;
10970 if (mp->next != NULL)
10971 mp->next->prev = mp;
10973 minipool_vector_tail = mp;
10979 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10981 mp->offset = offset;
10982 if (mp->refcount > 0)
10983 offset += mp->fix_size;
10985 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10986 mp->next->min_address = mp->min_address + mp->fix_size;
10992 /* Add a constant to the minipool for a backward reference. Returns the
10993 node added or NULL if the constant will not fit in this pool.
10995 Note that the code for insertion for a backwards reference can be
10996 somewhat confusing because the calculated offsets for each fix do
10997 not take into account the size of the pool (which is still under
11000 add_minipool_backward_ref (Mfix *fix)
11002 /* If set, min_mp is the last pool_entry that has a lower constraint
11003 than the one we are trying to add. */
11004 Mnode *min_mp = NULL;
11005 /* This can be negative, since it is only a constraint. */
11006 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11009 /* If we can't reach the current pool from this insn, or if we can't
11010 insert this entry at the end of the pool without pushing other
11011 fixes out of range, then we don't try. This ensures that we
11012 can't fail later on. */
11013 if (min_address >= minipool_barrier->address
11014 || (minipool_vector_tail->min_address + fix->fix_size
11015 >= minipool_barrier->address))
11018 /* Scan the pool to see if a constant with the same value has
11019 already been added. While we are doing this, also note the
11020 location where we must insert the constant if it doesn't already
11022 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11024 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11025 && fix->mode == mp->mode
11026 && (GET_CODE (fix->value) != CODE_LABEL
11027 || (CODE_LABEL_NUMBER (fix->value)
11028 == CODE_LABEL_NUMBER (mp->value)))
11029 && rtx_equal_p (fix->value, mp->value)
11030 /* Check that there is enough slack to move this entry to the
11031 end of the table (this is conservative). */
11032 && (mp->max_address
11033 > (minipool_barrier->address
11034 + minipool_vector_tail->offset
11035 + minipool_vector_tail->fix_size)))
11038 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11041 if (min_mp != NULL)
11042 mp->min_address += fix->fix_size;
11045 /* Note the insertion point if necessary. */
11046 if (mp->min_address < min_address)
11048 /* For now, we do not allow the insertion of 8-byte alignment
11049 requiring nodes anywhere but at the start of the pool. */
11050 if (ARM_DOUBLEWORD_ALIGN
11051 && fix->fix_size >= 8 && mp->fix_size < 8)
11056 else if (mp->max_address
11057 < minipool_barrier->address + mp->offset + fix->fix_size)
11059 /* Inserting before this entry would push the fix beyond
11060 its maximum address (which can happen if we have
11061 re-located a forwards fix); force the new fix to come
11063 if (ARM_DOUBLEWORD_ALIGN
11064 && fix->fix_size >= 8 && mp->fix_size < 8)
11069 min_address = mp->min_address + fix->fix_size;
11072 /* Do not insert a non-8-byte aligned quantity before 8-byte
11073 aligned quantities. */
11074 else if (ARM_DOUBLEWORD_ALIGN
11075 && fix->fix_size < 8
11076 && mp->fix_size >= 8)
11079 min_address = mp->min_address + fix->fix_size;
11084 /* We need to create a new entry. */
11086 mp->fix_size = fix->fix_size;
11087 mp->mode = fix->mode;
11088 mp->value = fix->value;
11090 mp->max_address = minipool_barrier->address + 65536;
11092 mp->min_address = min_address;
11094 if (min_mp == NULL)
11097 mp->next = minipool_vector_head;
11099 if (mp->next == NULL)
11101 minipool_vector_tail = mp;
11102 minipool_vector_label = gen_label_rtx ();
11105 mp->next->prev = mp;
11107 minipool_vector_head = mp;
11111 mp->next = min_mp->next;
11115 if (mp->next != NULL)
11116 mp->next->prev = mp;
11118 minipool_vector_tail = mp;
11121 /* Save the new entry. */
11129 /* Scan over the following entries and adjust their offsets. */
11130 while (mp->next != NULL)
11132 if (mp->next->min_address < mp->min_address + mp->fix_size)
11133 mp->next->min_address = mp->min_address + mp->fix_size;
11136 mp->next->offset = mp->offset + mp->fix_size;
11138 mp->next->offset = mp->offset;
11147 assign_minipool_offsets (Mfix *barrier)
11149 HOST_WIDE_INT offset = 0;
11152 minipool_barrier = barrier;
11154 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11156 mp->offset = offset;
11158 if (mp->refcount > 0)
11159 offset += mp->fix_size;
11163 /* Output the literal table */
11165 dump_minipool (rtx scan)
11171 if (ARM_DOUBLEWORD_ALIGN)
11172 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11173 if (mp->refcount > 0 && mp->fix_size >= 8)
11180 fprintf (dump_file,
11181 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11182 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11184 scan = emit_label_after (gen_label_rtx (), scan);
11185 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11186 scan = emit_label_after (minipool_vector_label, scan);
11188 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11190 if (mp->refcount > 0)
11194 fprintf (dump_file,
11195 ";; Offset %u, min %ld, max %ld ",
11196 (unsigned) mp->offset, (unsigned long) mp->min_address,
11197 (unsigned long) mp->max_address);
11198 arm_print_value (dump_file, mp->value);
11199 fputc ('\n', dump_file);
11202 switch (mp->fix_size)
11204 #ifdef HAVE_consttable_1
11206 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11210 #ifdef HAVE_consttable_2
11212 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11216 #ifdef HAVE_consttable_4
11218 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11222 #ifdef HAVE_consttable_8
11224 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11228 #ifdef HAVE_consttable_16
11230 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11235 gcc_unreachable ();
11243 minipool_vector_head = minipool_vector_tail = NULL;
11244 scan = emit_insn_after (gen_consttable_end (), scan);
11245 scan = emit_barrier_after (scan);
11248 /* Return the cost of forcibly inserting a barrier after INSN. */
11250 arm_barrier_cost (rtx insn)
11252 /* Basing the location of the pool on the loop depth is preferable,
11253 but at the moment, the basic block information seems to be
11254 corrupt by this stage of the compilation. */
11255 int base_cost = 50;
11256 rtx next = next_nonnote_insn (insn);
11258 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11261 switch (GET_CODE (insn))
11264 /* It will always be better to place the table before the label, rather
11273 return base_cost - 10;
11276 return base_cost + 10;
11280 /* Find the best place in the insn stream in the range
11281 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11282 Create the barrier by inserting a jump and add a new fix entry for
11285 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11287 HOST_WIDE_INT count = 0;
11289 rtx from = fix->insn;
11290 /* The instruction after which we will insert the jump. */
11291 rtx selected = NULL;
11293 /* The address at which the jump instruction will be placed. */
11294 HOST_WIDE_INT selected_address;
11296 HOST_WIDE_INT max_count = max_address - fix->address;
11297 rtx label = gen_label_rtx ();
11299 selected_cost = arm_barrier_cost (from);
11300 selected_address = fix->address;
11302 while (from && count < max_count)
11307 /* This code shouldn't have been called if there was a natural barrier
11309 gcc_assert (GET_CODE (from) != BARRIER);
11311 /* Count the length of this insn. */
11312 count += get_attr_length (from);
11314 /* If there is a jump table, add its length. */
11315 tmp = is_jump_table (from);
11318 count += get_jump_table_size (tmp);
11320 /* Jump tables aren't in a basic block, so base the cost on
11321 the dispatch insn. If we select this location, we will
11322 still put the pool after the table. */
11323 new_cost = arm_barrier_cost (from);
11325 if (count < max_count
11326 && (!selected || new_cost <= selected_cost))
11329 selected_cost = new_cost;
11330 selected_address = fix->address + count;
11333 /* Continue after the dispatch table. */
11334 from = NEXT_INSN (tmp);
11338 new_cost = arm_barrier_cost (from);
11340 if (count < max_count
11341 && (!selected || new_cost <= selected_cost))
11344 selected_cost = new_cost;
11345 selected_address = fix->address + count;
11348 from = NEXT_INSN (from);
11351 /* Make sure that we found a place to insert the jump. */
11352 gcc_assert (selected);
11354 /* Create a new JUMP_INSN that branches around a barrier. */
11355 from = emit_jump_insn_after (gen_jump (label), selected);
11356 JUMP_LABEL (from) = label;
11357 barrier = emit_barrier_after (from);
11358 emit_label_after (label, barrier);
11360 /* Create a minipool barrier entry for the new barrier. */
11361 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11362 new_fix->insn = barrier;
11363 new_fix->address = selected_address;
11364 new_fix->next = fix->next;
11365 fix->next = new_fix;
11370 /* Record that there is a natural barrier in the insn stream at
11373 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11375 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11378 fix->address = address;
11381 if (minipool_fix_head != NULL)
11382 minipool_fix_tail->next = fix;
11384 minipool_fix_head = fix;
11386 minipool_fix_tail = fix;
11389 /* Record INSN, which will need fixing up to load a value from the
11390 minipool. ADDRESS is the offset of the insn since the start of the
11391 function; LOC is a pointer to the part of the insn which requires
11392 fixing; VALUE is the constant that must be loaded, which is of type
11395 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11396 enum machine_mode mode, rtx value)
11398 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11401 fix->address = address;
11404 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11405 fix->value = value;
11406 fix->forwards = get_attr_pool_range (insn);
11407 fix->backwards = get_attr_neg_pool_range (insn);
11408 fix->minipool = NULL;
11410 /* If an insn doesn't have a range defined for it, then it isn't
11411 expecting to be reworked by this code. Better to stop now than
11412 to generate duff assembly code. */
11413 gcc_assert (fix->forwards || fix->backwards);
11415 /* If an entry requires 8-byte alignment then assume all constant pools
11416 require 4 bytes of padding. Trying to do this later on a per-pool
11417 basis is awkward because existing pool entries have to be modified. */
11418 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11423 fprintf (dump_file,
11424 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11425 GET_MODE_NAME (mode),
11426 INSN_UID (insn), (unsigned long) address,
11427 -1 * (long)fix->backwards, (long)fix->forwards);
11428 arm_print_value (dump_file, fix->value);
11429 fprintf (dump_file, "\n");
11432 /* Add it to the chain of fixes. */
11435 if (minipool_fix_head != NULL)
11436 minipool_fix_tail->next = fix;
11438 minipool_fix_head = fix;
11440 minipool_fix_tail = fix;
11443 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11444 Returns the number of insns needed, or 99 if we don't know how to
11447 arm_const_double_inline_cost (rtx val)
11449 rtx lowpart, highpart;
11450 enum machine_mode mode;
11452 mode = GET_MODE (val);
11454 if (mode == VOIDmode)
11457 gcc_assert (GET_MODE_SIZE (mode) == 8);
11459 lowpart = gen_lowpart (SImode, val);
11460 highpart = gen_highpart_mode (SImode, mode, val);
11462 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11463 gcc_assert (GET_CODE (highpart) == CONST_INT);
11465 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11466 NULL_RTX, NULL_RTX, 0, 0)
11467 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11468 NULL_RTX, NULL_RTX, 0, 0));
11471 /* Return true if it is worthwhile to split a 64-bit constant into two
11472 32-bit operations. This is the case if optimizing for size, or
11473 if we have load delay slots, or if one 32-bit part can be done with
11474 a single data operation. */
11476 arm_const_double_by_parts (rtx val)
11478 enum machine_mode mode = GET_MODE (val);
11481 if (optimize_size || arm_ld_sched)
11484 if (mode == VOIDmode)
11487 part = gen_highpart_mode (SImode, mode, val);
11489 gcc_assert (GET_CODE (part) == CONST_INT);
11491 if (const_ok_for_arm (INTVAL (part))
11492 || const_ok_for_arm (~INTVAL (part)))
11495 part = gen_lowpart (SImode, val);
11497 gcc_assert (GET_CODE (part) == CONST_INT);
11499 if (const_ok_for_arm (INTVAL (part))
11500 || const_ok_for_arm (~INTVAL (part)))
11506 /* Return true if it is possible to inline both the high and low parts
11507 of a 64-bit constant into 32-bit data processing instructions. */
11509 arm_const_double_by_immediates (rtx val)
11511 enum machine_mode mode = GET_MODE (val);
11514 if (mode == VOIDmode)
11517 part = gen_highpart_mode (SImode, mode, val);
11519 gcc_assert (GET_CODE (part) == CONST_INT);
11521 if (!const_ok_for_arm (INTVAL (part)))
11524 part = gen_lowpart (SImode, val);
11526 gcc_assert (GET_CODE (part) == CONST_INT);
11528 if (!const_ok_for_arm (INTVAL (part)))
11534 /* Scan INSN and note any of its operands that need fixing.
11535 If DO_PUSHES is false we do not actually push any of the fixups
11536 needed. The function returns TRUE if any fixups were needed/pushed.
11537 This is used by arm_memory_load_p() which needs to know about loads
11538 of constants that will be converted into minipool loads. */
11540 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11542 bool result = false;
11545 extract_insn (insn);
11547 if (!constrain_operands (1))
11548 fatal_insn_not_found (insn);
11550 if (recog_data.n_alternatives == 0)
11553 /* Fill in recog_op_alt with information about the constraints of
11555 preprocess_constraints ();
11557 for (opno = 0; opno < recog_data.n_operands; opno++)
11559 /* Things we need to fix can only occur in inputs. */
11560 if (recog_data.operand_type[opno] != OP_IN)
11563 /* If this alternative is a memory reference, then any mention
11564 of constants in this alternative is really to fool reload
11565 into allowing us to accept one there. We need to fix them up
11566 now so that we output the right code. */
11567 if (recog_op_alt[opno][which_alternative].memory_ok)
11569 rtx op = recog_data.operand[opno];
11571 if (CONSTANT_P (op))
11574 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11575 recog_data.operand_mode[opno], op);
11578 else if (GET_CODE (op) == MEM
11579 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11580 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11584 rtx cop = avoid_constant_pool_reference (op);
11586 /* Casting the address of something to a mode narrower
11587 than a word can cause avoid_constant_pool_reference()
11588 to return the pool reference itself. That's no good to
11589 us here. Lets just hope that we can use the
11590 constant pool value directly. */
11592 cop = get_pool_constant (XEXP (op, 0));
11594 push_minipool_fix (insn, address,
11595 recog_data.operand_loc[opno],
11596 recog_data.operand_mode[opno], cop);
11607 /* Convert instructions to their cc-clobbering variant if possible, since
11608 that allows us to use smaller encodings. */
11611 thumb2_reorg (void)
11616 INIT_REG_SET (&live);
11618 /* We are freeing block_for_insn in the toplev to keep compatibility
11619 with old MDEP_REORGS that are not CFG based. Recompute it now. */
11620 compute_bb_for_insn ();
11626 COPY_REG_SET (&live, DF_LR_OUT (bb));
11627 df_simulate_initialize_backwards (bb, &live);
11628 FOR_BB_INSNS_REVERSE (bb, insn)
11630 if (NONJUMP_INSN_P (insn)
11631 && !REGNO_REG_SET_P (&live, CC_REGNUM))
11633 rtx pat = PATTERN (insn);
11634 if (GET_CODE (pat) == SET
11635 && low_register_operand (XEXP (pat, 0), SImode)
11636 && thumb_16bit_operator (XEXP (pat, 1), SImode)
11637 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
11638 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
11640 rtx dst = XEXP (pat, 0);
11641 rtx src = XEXP (pat, 1);
11642 rtx op0 = XEXP (src, 0);
11643 if (rtx_equal_p (dst, op0)
11644 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
11646 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
11647 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
11648 rtvec vec = gen_rtvec (2, pat, clobber);
11649 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
11650 INSN_CODE (insn) = -1;
11654 if (NONDEBUG_INSN_P (insn))
11655 df_simulate_one_insn_backwards (bb, insn, &live);
11658 CLEAR_REG_SET (&live);
11661 /* Gcc puts the pool in the wrong place for ARM, since we can only
11662 load addresses a limited distance around the pc. We do some
11663 special munging to move the constant pool values to the correct
11664 point in the code. */
11669 HOST_WIDE_INT address = 0;
11675 minipool_fix_head = minipool_fix_tail = NULL;
11677 /* The first insn must always be a note, or the code below won't
11678 scan it properly. */
11679 insn = get_insns ();
11680 gcc_assert (GET_CODE (insn) == NOTE);
11683 /* Scan all the insns and record the operands that will need fixing. */
11684 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11686 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11687 && (arm_cirrus_insn_p (insn)
11688 || GET_CODE (insn) == JUMP_INSN
11689 || arm_memory_load_p (insn)))
11690 cirrus_reorg (insn);
11692 if (GET_CODE (insn) == BARRIER)
11693 push_minipool_barrier (insn, address);
11694 else if (INSN_P (insn))
11698 note_invalid_constants (insn, address, true);
11699 address += get_attr_length (insn);
11701 /* If the insn is a vector jump, add the size of the table
11702 and skip the table. */
11703 if ((table = is_jump_table (insn)) != NULL)
11705 address += get_jump_table_size (table);
11711 fix = minipool_fix_head;
11713 /* Now scan the fixups and perform the required changes. */
11718 Mfix * last_added_fix;
11719 Mfix * last_barrier = NULL;
11722 /* Skip any further barriers before the next fix. */
11723 while (fix && GET_CODE (fix->insn) == BARRIER)
11726 /* No more fixes. */
11730 last_added_fix = NULL;
11732 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11734 if (GET_CODE (ftmp->insn) == BARRIER)
11736 if (ftmp->address >= minipool_vector_head->max_address)
11739 last_barrier = ftmp;
11741 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11744 last_added_fix = ftmp; /* Keep track of the last fix added. */
11747 /* If we found a barrier, drop back to that; any fixes that we
11748 could have reached but come after the barrier will now go in
11749 the next mini-pool. */
11750 if (last_barrier != NULL)
11752 /* Reduce the refcount for those fixes that won't go into this
11754 for (fdel = last_barrier->next;
11755 fdel && fdel != ftmp;
11758 fdel->minipool->refcount--;
11759 fdel->minipool = NULL;
11762 ftmp = last_barrier;
11766 /* ftmp is first fix that we can't fit into this pool and
11767 there no natural barriers that we could use. Insert a
11768 new barrier in the code somewhere between the previous
11769 fix and this one, and arrange to jump around it. */
11770 HOST_WIDE_INT max_address;
11772 /* The last item on the list of fixes must be a barrier, so
11773 we can never run off the end of the list of fixes without
11774 last_barrier being set. */
11777 max_address = minipool_vector_head->max_address;
11778 /* Check that there isn't another fix that is in range that
11779 we couldn't fit into this pool because the pool was
11780 already too large: we need to put the pool before such an
11781 instruction. The pool itself may come just after the
11782 fix because create_fix_barrier also allows space for a
11783 jump instruction. */
11784 if (ftmp->address < max_address)
11785 max_address = ftmp->address + 1;
11787 last_barrier = create_fix_barrier (last_added_fix, max_address);
11790 assign_minipool_offsets (last_barrier);
11794 if (GET_CODE (ftmp->insn) != BARRIER
11795 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11802 /* Scan over the fixes we have identified for this pool, fixing them
11803 up and adding the constants to the pool itself. */
11804 for (this_fix = fix; this_fix && ftmp != this_fix;
11805 this_fix = this_fix->next)
11806 if (GET_CODE (this_fix->insn) != BARRIER)
11809 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11810 minipool_vector_label),
11811 this_fix->minipool->offset);
11812 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11815 dump_minipool (last_barrier->insn);
11819 /* From now on we must synthesize any constants that we can't handle
11820 directly. This can happen if the RTL gets split during final
11821 instruction generation. */
11822 after_arm_reorg = 1;
11824 /* Free the minipool memory. */
11825 obstack_free (&minipool_obstack, minipool_startobj);
11828 /* Routines to output assembly language. */
11830 /* If the rtx is the correct value then return the string of the number.
11831 In this way we can ensure that valid double constants are generated even
11832 when cross compiling. */
11834 fp_immediate_constant (rtx x)
11839 if (!fp_consts_inited)
11842 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11843 for (i = 0; i < 8; i++)
11844 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11845 return strings_fp[i];
11847 gcc_unreachable ();
11850 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11851 static const char *
11852 fp_const_from_val (REAL_VALUE_TYPE *r)
11856 if (!fp_consts_inited)
11859 for (i = 0; i < 8; i++)
11860 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11861 return strings_fp[i];
11863 gcc_unreachable ();
11866 /* Output the operands of a LDM/STM instruction to STREAM.
11867 MASK is the ARM register set mask of which only bits 0-15 are important.
11868 REG is the base register, either the frame pointer or the stack pointer,
11869 INSTR is the possibly suffixed load or store instruction.
11870 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11873 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11874 unsigned long mask, int rfe)
11877 bool not_first = FALSE;
11879 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11880 fputc ('\t', stream);
11881 asm_fprintf (stream, instr, reg);
11882 fputc ('{', stream);
11884 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11885 if (mask & (1 << i))
11888 fprintf (stream, ", ");
11890 asm_fprintf (stream, "%r", i);
11895 fprintf (stream, "}^\n");
11897 fprintf (stream, "}\n");
11901 /* Output a FLDMD instruction to STREAM.
11902 BASE if the register containing the address.
11903 REG and COUNT specify the register range.
11904 Extra registers may be added to avoid hardware bugs.
11906 We output FLDMD even for ARMv5 VFP implementations. Although
11907 FLDMD is technically not supported until ARMv6, it is believed
11908 that all VFP implementations support its use in this context. */
11911 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11915 /* Workaround ARM10 VFPr1 bug. */
11916 if (count == 2 && !arm_arch6)
11923 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11924 load into multiple parts if we have to handle more than 16 registers. */
11927 vfp_output_fldmd (stream, base, reg, 16);
11928 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11932 fputc ('\t', stream);
11933 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11935 for (i = reg; i < reg + count; i++)
11938 fputs (", ", stream);
11939 asm_fprintf (stream, "d%d", i);
11941 fputs ("}\n", stream);
11946 /* Output the assembly for a store multiple. */
11949 vfp_output_fstmd (rtx * operands)
11956 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11957 p = strlen (pattern);
11959 gcc_assert (GET_CODE (operands[1]) == REG);
11961 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11962 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11964 p += sprintf (&pattern[p], ", d%d", base + i);
11966 strcpy (&pattern[p], "}");
11968 output_asm_insn (pattern, operands);
11973 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11974 number of bytes pushed. */
11977 vfp_emit_fstmd (int base_reg, int count)
11984 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11985 register pairs are stored by a store multiple insn. We avoid this
11986 by pushing an extra pair. */
11987 if (count == 2 && !arm_arch6)
11989 if (base_reg == LAST_VFP_REGNUM - 3)
11994 /* FSTMD may not store more than 16 doubleword registers at once. Split
11995 larger stores into multiple parts (up to a maximum of two, in
12000 /* NOTE: base_reg is an internal register number, so each D register
12002 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12003 saved += vfp_emit_fstmd (base_reg, 16);
12007 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12008 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12010 reg = gen_rtx_REG (DFmode, base_reg);
12013 XVECEXP (par, 0, 0)
12014 = gen_rtx_SET (VOIDmode,
12017 gen_rtx_PRE_MODIFY (Pmode,
12020 (stack_pointer_rtx,
12023 gen_rtx_UNSPEC (BLKmode,
12024 gen_rtvec (1, reg),
12025 UNSPEC_PUSH_MULT));
12027 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12028 plus_constant (stack_pointer_rtx, -(count * 8)));
12029 RTX_FRAME_RELATED_P (tmp) = 1;
12030 XVECEXP (dwarf, 0, 0) = tmp;
12032 tmp = gen_rtx_SET (VOIDmode,
12033 gen_frame_mem (DFmode, stack_pointer_rtx),
12035 RTX_FRAME_RELATED_P (tmp) = 1;
12036 XVECEXP (dwarf, 0, 1) = tmp;
12038 for (i = 1; i < count; i++)
12040 reg = gen_rtx_REG (DFmode, base_reg);
12042 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12044 tmp = gen_rtx_SET (VOIDmode,
12045 gen_frame_mem (DFmode,
12046 plus_constant (stack_pointer_rtx,
12049 RTX_FRAME_RELATED_P (tmp) = 1;
12050 XVECEXP (dwarf, 0, i + 1) = tmp;
12053 par = emit_insn (par);
12054 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12055 RTX_FRAME_RELATED_P (par) = 1;
12060 /* Emit a call instruction with pattern PAT. ADDR is the address of
12061 the call target. */
12064 arm_emit_call_insn (rtx pat, rtx addr)
12068 insn = emit_call_insn (pat);
12070 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12071 If the call might use such an entry, add a use of the PIC register
12072 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12073 if (TARGET_VXWORKS_RTP
12075 && GET_CODE (addr) == SYMBOL_REF
12076 && (SYMBOL_REF_DECL (addr)
12077 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12078 : !SYMBOL_REF_LOCAL_P (addr)))
12080 require_pic_register ();
12081 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12085 /* Output a 'call' insn. */
12087 output_call (rtx *operands)
12089 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12091 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12092 if (REGNO (operands[0]) == LR_REGNUM)
12094 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12095 output_asm_insn ("mov%?\t%0, %|lr", operands);
12098 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12100 if (TARGET_INTERWORK || arm_arch4t)
12101 output_asm_insn ("bx%?\t%0", operands);
12103 output_asm_insn ("mov%?\t%|pc, %0", operands);
12108 /* Output a 'call' insn that is a reference in memory. This is
12109 disabled for ARMv5 and we prefer a blx instead because otherwise
12110 there's a significant performance overhead. */
12112 output_call_mem (rtx *operands)
12114 gcc_assert (!arm_arch5);
12115 if (TARGET_INTERWORK)
12117 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12118 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12119 output_asm_insn ("bx%?\t%|ip", operands);
12121 else if (regno_use_in (LR_REGNUM, operands[0]))
12123 /* LR is used in the memory address. We load the address in the
12124 first instruction. It's safe to use IP as the target of the
12125 load since the call will kill it anyway. */
12126 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12127 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12129 output_asm_insn ("bx%?\t%|ip", operands);
12131 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12135 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12136 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12143 /* Output a move from arm registers to an fpa registers.
12144 OPERANDS[0] is an fpa register.
12145 OPERANDS[1] is the first registers of an arm register pair. */
12147 output_mov_long_double_fpa_from_arm (rtx *operands)
12149 int arm_reg0 = REGNO (operands[1]);
12152 gcc_assert (arm_reg0 != IP_REGNUM);
12154 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12155 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12156 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12158 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12159 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12164 /* Output a move from an fpa register to arm registers.
12165 OPERANDS[0] is the first registers of an arm register pair.
12166 OPERANDS[1] is an fpa register. */
12168 output_mov_long_double_arm_from_fpa (rtx *operands)
12170 int arm_reg0 = REGNO (operands[0]);
12173 gcc_assert (arm_reg0 != IP_REGNUM);
12175 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12176 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12177 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12179 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12180 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12184 /* Output a move from arm registers to arm registers of a long double
12185 OPERANDS[0] is the destination.
12186 OPERANDS[1] is the source. */
12188 output_mov_long_double_arm_from_arm (rtx *operands)
12190 /* We have to be careful here because the two might overlap. */
12191 int dest_start = REGNO (operands[0]);
12192 int src_start = REGNO (operands[1]);
12196 if (dest_start < src_start)
12198 for (i = 0; i < 3; i++)
12200 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12201 ops[1] = gen_rtx_REG (SImode, src_start + i);
12202 output_asm_insn ("mov%?\t%0, %1", ops);
12207 for (i = 2; i >= 0; i--)
12209 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12210 ops[1] = gen_rtx_REG (SImode, src_start + i);
12211 output_asm_insn ("mov%?\t%0, %1", ops);
12219 arm_emit_movpair (rtx dest, rtx src)
12221 /* If the src is an immediate, simplify it. */
12222 if (CONST_INT_P (src))
12224 HOST_WIDE_INT val = INTVAL (src);
12225 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12226 if ((val >> 16) & 0x0000ffff)
12227 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12229 GEN_INT ((val >> 16) & 0x0000ffff));
12232 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12233 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12236 /* Output a move from arm registers to an fpa registers.
12237 OPERANDS[0] is an fpa register.
12238 OPERANDS[1] is the first registers of an arm register pair. */
12240 output_mov_double_fpa_from_arm (rtx *operands)
12242 int arm_reg0 = REGNO (operands[1]);
12245 gcc_assert (arm_reg0 != IP_REGNUM);
12247 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12248 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12249 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12250 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12254 /* Output a move from an fpa register to arm registers.
12255 OPERANDS[0] is the first registers of an arm register pair.
12256 OPERANDS[1] is an fpa register. */
12258 output_mov_double_arm_from_fpa (rtx *operands)
12260 int arm_reg0 = REGNO (operands[0]);
12263 gcc_assert (arm_reg0 != IP_REGNUM);
12265 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12266 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12267 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12268 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12272 /* Output a move between double words.
12273 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12274 or MEM<-REG and all MEMs must be offsettable addresses. */
12276 output_move_double (rtx *operands)
12278 enum rtx_code code0 = GET_CODE (operands[0]);
12279 enum rtx_code code1 = GET_CODE (operands[1]);
12284 unsigned int reg0 = REGNO (operands[0]);
12286 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12288 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12290 switch (GET_CODE (XEXP (operands[1], 0)))
12294 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12295 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12297 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12301 gcc_assert (TARGET_LDRD);
12302 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12307 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12309 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12314 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12316 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12320 gcc_assert (TARGET_LDRD);
12321 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12326 /* Autoicrement addressing modes should never have overlapping
12327 base and destination registers, and overlapping index registers
12328 are already prohibited, so this doesn't need to worry about
12330 otherops[0] = operands[0];
12331 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12332 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12334 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12336 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12338 /* Registers overlap so split out the increment. */
12339 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12340 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12344 /* Use a single insn if we can.
12345 FIXME: IWMMXT allows offsets larger than ldrd can
12346 handle, fix these up with a pair of ldr. */
12348 || GET_CODE (otherops[2]) != CONST_INT
12349 || (INTVAL (otherops[2]) > -256
12350 && INTVAL (otherops[2]) < 256))
12351 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12354 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12355 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12361 /* Use a single insn if we can.
12362 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12363 fix these up with a pair of ldr. */
12365 || GET_CODE (otherops[2]) != CONST_INT
12366 || (INTVAL (otherops[2]) > -256
12367 && INTVAL (otherops[2]) < 256))
12368 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12371 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12372 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12379 /* We might be able to use ldrd %0, %1 here. However the range is
12380 different to ldr/adr, and it is broken on some ARMv7-M
12381 implementations. */
12382 /* Use the second register of the pair to avoid problematic
12384 otherops[1] = operands[1];
12385 output_asm_insn ("adr%?\t%0, %1", otherops);
12386 operands[1] = otherops[0];
12388 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12390 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12393 /* ??? This needs checking for thumb2. */
12395 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12396 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12398 otherops[0] = operands[0];
12399 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12400 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12402 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12404 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12406 switch ((int) INTVAL (otherops[2]))
12409 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12414 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12419 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12423 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12424 operands[1] = otherops[0];
12426 && (GET_CODE (otherops[2]) == REG
12428 || (GET_CODE (otherops[2]) == CONST_INT
12429 && INTVAL (otherops[2]) > -256
12430 && INTVAL (otherops[2]) < 256)))
12432 if (reg_overlap_mentioned_p (operands[0],
12436 /* Swap base and index registers over to
12437 avoid a conflict. */
12439 otherops[1] = otherops[2];
12442 /* If both registers conflict, it will usually
12443 have been fixed by a splitter. */
12444 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12445 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12447 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12448 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12452 otherops[0] = operands[0];
12453 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12458 if (GET_CODE (otherops[2]) == CONST_INT)
12460 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12461 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12463 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12466 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12469 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12472 return "ldr%(d%)\t%0, [%1]";
12474 return "ldm%(ia%)\t%1, %M0";
12478 otherops[1] = adjust_address (operands[1], SImode, 4);
12479 /* Take care of overlapping base/data reg. */
12480 if (reg_mentioned_p (operands[0], operands[1]))
12482 output_asm_insn ("ldr%?\t%0, %1", otherops);
12483 output_asm_insn ("ldr%?\t%0, %1", operands);
12487 output_asm_insn ("ldr%?\t%0, %1", operands);
12488 output_asm_insn ("ldr%?\t%0, %1", otherops);
12495 /* Constraints should ensure this. */
12496 gcc_assert (code0 == MEM && code1 == REG);
12497 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12499 switch (GET_CODE (XEXP (operands[0], 0)))
12503 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12505 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12509 gcc_assert (TARGET_LDRD);
12510 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12515 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12517 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12522 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12524 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12528 gcc_assert (TARGET_LDRD);
12529 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12534 otherops[0] = operands[1];
12535 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12536 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12538 /* IWMMXT allows offsets larger than ldrd can handle,
12539 fix these up with a pair of ldr. */
12541 && GET_CODE (otherops[2]) == CONST_INT
12542 && (INTVAL(otherops[2]) <= -256
12543 || INTVAL(otherops[2]) >= 256))
12545 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12547 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12548 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12552 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12553 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12556 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12557 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12559 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12563 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12564 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12566 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12569 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12575 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12581 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12586 && (GET_CODE (otherops[2]) == REG
12588 || (GET_CODE (otherops[2]) == CONST_INT
12589 && INTVAL (otherops[2]) > -256
12590 && INTVAL (otherops[2]) < 256)))
12592 otherops[0] = operands[1];
12593 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12594 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12600 otherops[0] = adjust_address (operands[0], SImode, 4);
12601 otherops[1] = operands[1];
12602 output_asm_insn ("str%?\t%1, %0", operands);
12603 output_asm_insn ("str%?\t%H1, %0", otherops);
12610 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12611 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12614 output_move_quad (rtx *operands)
12616 if (REG_P (operands[0]))
12618 /* Load, or reg->reg move. */
12620 if (MEM_P (operands[1]))
12622 switch (GET_CODE (XEXP (operands[1], 0)))
12625 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12630 output_asm_insn ("adr%?\t%0, %1", operands);
12631 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12635 gcc_unreachable ();
12643 gcc_assert (REG_P (operands[1]));
12645 dest = REGNO (operands[0]);
12646 src = REGNO (operands[1]);
12648 /* This seems pretty dumb, but hopefully GCC won't try to do it
12651 for (i = 0; i < 4; i++)
12653 ops[0] = gen_rtx_REG (SImode, dest + i);
12654 ops[1] = gen_rtx_REG (SImode, src + i);
12655 output_asm_insn ("mov%?\t%0, %1", ops);
12658 for (i = 3; i >= 0; i--)
12660 ops[0] = gen_rtx_REG (SImode, dest + i);
12661 ops[1] = gen_rtx_REG (SImode, src + i);
12662 output_asm_insn ("mov%?\t%0, %1", ops);
12668 gcc_assert (MEM_P (operands[0]));
12669 gcc_assert (REG_P (operands[1]));
12670 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12672 switch (GET_CODE (XEXP (operands[0], 0)))
12675 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12679 gcc_unreachable ();
12686 /* Output a VFP load or store instruction. */
12689 output_move_vfp (rtx *operands)
12691 rtx reg, mem, addr, ops[2];
12692 int load = REG_P (operands[0]);
12693 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12694 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12697 enum machine_mode mode;
12699 reg = operands[!load];
12700 mem = operands[load];
12702 mode = GET_MODE (reg);
12704 gcc_assert (REG_P (reg));
12705 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12706 gcc_assert (mode == SFmode
12710 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12711 gcc_assert (MEM_P (mem));
12713 addr = XEXP (mem, 0);
12715 switch (GET_CODE (addr))
12718 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12719 ops[0] = XEXP (addr, 0);
12724 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12725 ops[0] = XEXP (addr, 0);
12730 templ = "f%s%c%%?\t%%%s0, %%1%s";
12736 sprintf (buff, templ,
12737 load ? "ld" : "st",
12740 integer_p ? "\t%@ int" : "");
12741 output_asm_insn (buff, ops);
12746 /* Output a Neon quad-word load or store, or a load or store for
12747 larger structure modes.
12749 WARNING: The ordering of elements is weird in big-endian mode,
12750 because we use VSTM, as required by the EABI. GCC RTL defines
12751 element ordering based on in-memory order. This can be differ
12752 from the architectural ordering of elements within a NEON register.
12753 The intrinsics defined in arm_neon.h use the NEON register element
12754 ordering, not the GCC RTL element ordering.
12756 For example, the in-memory ordering of a big-endian a quadword
12757 vector with 16-bit elements when stored from register pair {d0,d1}
12758 will be (lowest address first, d0[N] is NEON register element N):
12760 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12762 When necessary, quadword registers (dN, dN+1) are moved to ARM
12763 registers from rN in the order:
12765 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12767 So that STM/LDM can be used on vectors in ARM registers, and the
12768 same memory layout will result as if VSTM/VLDM were used. */
12771 output_move_neon (rtx *operands)
12773 rtx reg, mem, addr, ops[2];
12774 int regno, load = REG_P (operands[0]);
12777 enum machine_mode mode;
12779 reg = operands[!load];
12780 mem = operands[load];
12782 mode = GET_MODE (reg);
12784 gcc_assert (REG_P (reg));
12785 regno = REGNO (reg);
12786 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12787 || NEON_REGNO_OK_FOR_QUAD (regno));
12788 gcc_assert (VALID_NEON_DREG_MODE (mode)
12789 || VALID_NEON_QREG_MODE (mode)
12790 || VALID_NEON_STRUCT_MODE (mode));
12791 gcc_assert (MEM_P (mem));
12793 addr = XEXP (mem, 0);
12795 /* Strip off const from addresses like (const (plus (...))). */
12796 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12797 addr = XEXP (addr, 0);
12799 switch (GET_CODE (addr))
12802 templ = "v%smia%%?\t%%0!, %%h1";
12803 ops[0] = XEXP (addr, 0);
12808 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12809 templ = "v%smdb%%?\t%%0!, %%h1";
12810 ops[0] = XEXP (addr, 0);
12815 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12816 gcc_unreachable ();
12821 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12824 for (i = 0; i < nregs; i++)
12826 /* We're only using DImode here because it's a convenient size. */
12827 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12828 ops[1] = adjust_address (mem, DImode, 8 * i);
12829 if (reg_overlap_mentioned_p (ops[0], mem))
12831 gcc_assert (overlap == -1);
12836 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12837 output_asm_insn (buff, ops);
12842 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12843 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12844 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12845 output_asm_insn (buff, ops);
12852 templ = "v%smia%%?\t%%m0, %%h1";
12857 sprintf (buff, templ, load ? "ld" : "st");
12858 output_asm_insn (buff, ops);
12863 /* Compute and return the length of neon_mov<mode>, where <mode> is
12864 one of VSTRUCT modes: EI, OI, CI or XI. */
12866 arm_attr_length_move_neon (rtx insn)
12868 rtx reg, mem, addr;
12870 enum machine_mode mode;
12872 extract_insn_cached (insn);
12874 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
12876 mode = GET_MODE (recog_data.operand[0]);
12887 gcc_unreachable ();
12891 load = REG_P (recog_data.operand[0]);
12892 reg = recog_data.operand[!load];
12893 mem = recog_data.operand[load];
12895 gcc_assert (MEM_P (mem));
12897 mode = GET_MODE (reg);
12898 addr = XEXP (mem, 0);
12900 /* Strip off const from addresses like (const (plus (...))). */
12901 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12902 addr = XEXP (addr, 0);
12904 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
12906 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12913 /* Output an ADD r, s, #n where n may be too big for one instruction.
12914 If adding zero to one register, output nothing. */
12916 output_add_immediate (rtx *operands)
12918 HOST_WIDE_INT n = INTVAL (operands[2]);
12920 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12923 output_multi_immediate (operands,
12924 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12927 output_multi_immediate (operands,
12928 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12935 /* Output a multiple immediate operation.
12936 OPERANDS is the vector of operands referred to in the output patterns.
12937 INSTR1 is the output pattern to use for the first constant.
12938 INSTR2 is the output pattern to use for subsequent constants.
12939 IMMED_OP is the index of the constant slot in OPERANDS.
12940 N is the constant value. */
12941 static const char *
12942 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12943 int immed_op, HOST_WIDE_INT n)
12945 #if HOST_BITS_PER_WIDE_INT > 32
12951 /* Quick and easy output. */
12952 operands[immed_op] = const0_rtx;
12953 output_asm_insn (instr1, operands);
12958 const char * instr = instr1;
12960 /* Note that n is never zero here (which would give no output). */
12961 for (i = 0; i < 32; i += 2)
12965 operands[immed_op] = GEN_INT (n & (255 << i));
12966 output_asm_insn (instr, operands);
12976 /* Return the name of a shifter operation. */
12977 static const char *
12978 arm_shift_nmem(enum rtx_code code)
12983 return ARM_LSL_NAME;
12999 /* Return the appropriate ARM instruction for the operation code.
13000 The returned result should not be overwritten. OP is the rtx of the
13001 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13004 arithmetic_instr (rtx op, int shift_first_arg)
13006 switch (GET_CODE (op))
13012 return shift_first_arg ? "rsb" : "sub";
13027 return arm_shift_nmem(GET_CODE(op));
13030 gcc_unreachable ();
13034 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13035 for the operation code. The returned result should not be overwritten.
13036 OP is the rtx code of the shift.
13037 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13039 static const char *
13040 shift_op (rtx op, HOST_WIDE_INT *amountp)
13043 enum rtx_code code = GET_CODE (op);
13045 switch (GET_CODE (XEXP (op, 1)))
13053 *amountp = INTVAL (XEXP (op, 1));
13057 gcc_unreachable ();
13063 gcc_assert (*amountp != -1);
13064 *amountp = 32 - *amountp;
13067 /* Fall through. */
13073 mnem = arm_shift_nmem(code);
13077 /* We never have to worry about the amount being other than a
13078 power of 2, since this case can never be reloaded from a reg. */
13079 gcc_assert (*amountp != -1);
13080 *amountp = int_log2 (*amountp);
13081 return ARM_LSL_NAME;
13084 gcc_unreachable ();
13087 if (*amountp != -1)
13089 /* This is not 100% correct, but follows from the desire to merge
13090 multiplication by a power of 2 with the recognizer for a
13091 shift. >=32 is not a valid shift for "lsl", so we must try and
13092 output a shift that produces the correct arithmetical result.
13093 Using lsr #32 is identical except for the fact that the carry bit
13094 is not set correctly if we set the flags; but we never use the
13095 carry bit from such an operation, so we can ignore that. */
13096 if (code == ROTATERT)
13097 /* Rotate is just modulo 32. */
13099 else if (*amountp != (*amountp & 31))
13101 if (code == ASHIFT)
13106 /* Shifts of 0 are no-ops. */
13114 /* Obtain the shift from the POWER of two. */
13116 static HOST_WIDE_INT
13117 int_log2 (HOST_WIDE_INT power)
13119 HOST_WIDE_INT shift = 0;
13121 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13123 gcc_assert (shift <= 31);
13130 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13131 because /bin/as is horribly restrictive. The judgement about
13132 whether or not each character is 'printable' (and can be output as
13133 is) or not (and must be printed with an octal escape) must be made
13134 with reference to the *host* character set -- the situation is
13135 similar to that discussed in the comments above pp_c_char in
13136 c-pretty-print.c. */
13138 #define MAX_ASCII_LEN 51
13141 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13144 int len_so_far = 0;
13146 fputs ("\t.ascii\t\"", stream);
13148 for (i = 0; i < len; i++)
13152 if (len_so_far >= MAX_ASCII_LEN)
13154 fputs ("\"\n\t.ascii\t\"", stream);
13160 if (c == '\\' || c == '\"')
13162 putc ('\\', stream);
13170 fprintf (stream, "\\%03o", c);
13175 fputs ("\"\n", stream);
13178 /* Compute the register save mask for registers 0 through 12
13179 inclusive. This code is used by arm_compute_save_reg_mask. */
13181 static unsigned long
13182 arm_compute_save_reg0_reg12_mask (void)
13184 unsigned long func_type = arm_current_func_type ();
13185 unsigned long save_reg_mask = 0;
13188 if (IS_INTERRUPT (func_type))
13190 unsigned int max_reg;
13191 /* Interrupt functions must not corrupt any registers,
13192 even call clobbered ones. If this is a leaf function
13193 we can just examine the registers used by the RTL, but
13194 otherwise we have to assume that whatever function is
13195 called might clobber anything, and so we have to save
13196 all the call-clobbered registers as well. */
13197 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13198 /* FIQ handlers have registers r8 - r12 banked, so
13199 we only need to check r0 - r7, Normal ISRs only
13200 bank r14 and r15, so we must check up to r12.
13201 r13 is the stack pointer which is always preserved,
13202 so we do not need to consider it here. */
13207 for (reg = 0; reg <= max_reg; reg++)
13208 if (df_regs_ever_live_p (reg)
13209 || (! current_function_is_leaf && call_used_regs[reg]))
13210 save_reg_mask |= (1 << reg);
13212 /* Also save the pic base register if necessary. */
13214 && !TARGET_SINGLE_PIC_BASE
13215 && arm_pic_register != INVALID_REGNUM
13216 && crtl->uses_pic_offset_table)
13217 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13219 else if (IS_VOLATILE(func_type))
13221 /* For noreturn functions we historically omitted register saves
13222 altogether. However this really messes up debugging. As a
13223 compromise save just the frame pointers. Combined with the link
13224 register saved elsewhere this should be sufficient to get
13226 if (frame_pointer_needed)
13227 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13228 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13229 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13230 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13231 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13235 /* In the normal case we only need to save those registers
13236 which are call saved and which are used by this function. */
13237 for (reg = 0; reg <= 11; reg++)
13238 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13239 save_reg_mask |= (1 << reg);
13241 /* Handle the frame pointer as a special case. */
13242 if (frame_pointer_needed)
13243 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13245 /* If we aren't loading the PIC register,
13246 don't stack it even though it may be live. */
13248 && !TARGET_SINGLE_PIC_BASE
13249 && arm_pic_register != INVALID_REGNUM
13250 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13251 || crtl->uses_pic_offset_table))
13252 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13254 /* The prologue will copy SP into R0, so save it. */
13255 if (IS_STACKALIGN (func_type))
13256 save_reg_mask |= 1;
13259 /* Save registers so the exception handler can modify them. */
13260 if (crtl->calls_eh_return)
13266 reg = EH_RETURN_DATA_REGNO (i);
13267 if (reg == INVALID_REGNUM)
13269 save_reg_mask |= 1 << reg;
13273 return save_reg_mask;
13277 /* Compute the number of bytes used to store the static chain register on the
13278 stack, above the stack frame. We need to know this accurately to get the
13279 alignment of the rest of the stack frame correct. */
13281 static int arm_compute_static_chain_stack_bytes (void)
13283 unsigned long func_type = arm_current_func_type ();
13284 int static_chain_stack_bytes = 0;
13286 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13287 IS_NESTED (func_type) &&
13288 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13289 static_chain_stack_bytes = 4;
13291 return static_chain_stack_bytes;
13295 /* Compute a bit mask of which registers need to be
13296 saved on the stack for the current function.
13297 This is used by arm_get_frame_offsets, which may add extra registers. */
13299 static unsigned long
13300 arm_compute_save_reg_mask (void)
13302 unsigned int save_reg_mask = 0;
13303 unsigned long func_type = arm_current_func_type ();
13306 if (IS_NAKED (func_type))
13307 /* This should never really happen. */
13310 /* If we are creating a stack frame, then we must save the frame pointer,
13311 IP (which will hold the old stack pointer), LR and the PC. */
13312 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13314 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13317 | (1 << PC_REGNUM);
13319 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13321 /* Decide if we need to save the link register.
13322 Interrupt routines have their own banked link register,
13323 so they never need to save it.
13324 Otherwise if we do not use the link register we do not need to save
13325 it. If we are pushing other registers onto the stack however, we
13326 can save an instruction in the epilogue by pushing the link register
13327 now and then popping it back into the PC. This incurs extra memory
13328 accesses though, so we only do it when optimizing for size, and only
13329 if we know that we will not need a fancy return sequence. */
13330 if (df_regs_ever_live_p (LR_REGNUM)
13333 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13334 && !crtl->calls_eh_return))
13335 save_reg_mask |= 1 << LR_REGNUM;
13337 if (cfun->machine->lr_save_eliminated)
13338 save_reg_mask &= ~ (1 << LR_REGNUM);
13340 if (TARGET_REALLY_IWMMXT
13341 && ((bit_count (save_reg_mask)
13342 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13343 arm_compute_static_chain_stack_bytes())
13346 /* The total number of registers that are going to be pushed
13347 onto the stack is odd. We need to ensure that the stack
13348 is 64-bit aligned before we start to save iWMMXt registers,
13349 and also before we start to create locals. (A local variable
13350 might be a double or long long which we will load/store using
13351 an iWMMXt instruction). Therefore we need to push another
13352 ARM register, so that the stack will be 64-bit aligned. We
13353 try to avoid using the arg registers (r0 -r3) as they might be
13354 used to pass values in a tail call. */
13355 for (reg = 4; reg <= 12; reg++)
13356 if ((save_reg_mask & (1 << reg)) == 0)
13360 save_reg_mask |= (1 << reg);
13363 cfun->machine->sibcall_blocked = 1;
13364 save_reg_mask |= (1 << 3);
13368 /* We may need to push an additional register for use initializing the
13369 PIC base register. */
13370 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13371 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13373 reg = thumb_find_work_register (1 << 4);
13374 if (!call_used_regs[reg])
13375 save_reg_mask |= (1 << reg);
13378 return save_reg_mask;
13382 /* Compute a bit mask of which registers need to be
13383 saved on the stack for the current function. */
13384 static unsigned long
13385 thumb1_compute_save_reg_mask (void)
13387 unsigned long mask;
13391 for (reg = 0; reg < 12; reg ++)
13392 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13396 && !TARGET_SINGLE_PIC_BASE
13397 && arm_pic_register != INVALID_REGNUM
13398 && crtl->uses_pic_offset_table)
13399 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13401 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13402 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13403 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13405 /* LR will also be pushed if any lo regs are pushed. */
13406 if (mask & 0xff || thumb_force_lr_save ())
13407 mask |= (1 << LR_REGNUM);
13409 /* Make sure we have a low work register if we need one.
13410 We will need one if we are going to push a high register,
13411 but we are not currently intending to push a low register. */
13412 if ((mask & 0xff) == 0
13413 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13415 /* Use thumb_find_work_register to choose which register
13416 we will use. If the register is live then we will
13417 have to push it. Use LAST_LO_REGNUM as our fallback
13418 choice for the register to select. */
13419 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13420 /* Make sure the register returned by thumb_find_work_register is
13421 not part of the return value. */
13422 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13423 reg = LAST_LO_REGNUM;
13425 if (! call_used_regs[reg])
13429 /* The 504 below is 8 bytes less than 512 because there are two possible
13430 alignment words. We can't tell here if they will be present or not so we
13431 have to play it safe and assume that they are. */
13432 if ((CALLER_INTERWORKING_SLOT_SIZE +
13433 ROUND_UP_WORD (get_frame_size ()) +
13434 crtl->outgoing_args_size) >= 504)
13436 /* This is the same as the code in thumb1_expand_prologue() which
13437 determines which register to use for stack decrement. */
13438 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13439 if (mask & (1 << reg))
13442 if (reg > LAST_LO_REGNUM)
13444 /* Make sure we have a register available for stack decrement. */
13445 mask |= 1 << LAST_LO_REGNUM;
13453 /* Return the number of bytes required to save VFP registers. */
13455 arm_get_vfp_saved_size (void)
13457 unsigned int regno;
13462 /* Space for saved VFP registers. */
13463 if (TARGET_HARD_FLOAT && TARGET_VFP)
13466 for (regno = FIRST_VFP_REGNUM;
13467 regno < LAST_VFP_REGNUM;
13470 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13471 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13475 /* Workaround ARM10 VFPr1 bug. */
13476 if (count == 2 && !arm_arch6)
13478 saved += count * 8;
13487 if (count == 2 && !arm_arch6)
13489 saved += count * 8;
13496 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13497 everything bar the final return instruction. */
13499 output_return_instruction (rtx operand, int really_return, int reverse)
13501 char conditional[10];
13504 unsigned long live_regs_mask;
13505 unsigned long func_type;
13506 arm_stack_offsets *offsets;
13508 func_type = arm_current_func_type ();
13510 if (IS_NAKED (func_type))
13513 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13515 /* If this function was declared non-returning, and we have
13516 found a tail call, then we have to trust that the called
13517 function won't return. */
13522 /* Otherwise, trap an attempted return by aborting. */
13524 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13526 assemble_external_libcall (ops[1]);
13527 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13533 gcc_assert (!cfun->calls_alloca || really_return);
13535 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13537 cfun->machine->return_used_this_function = 1;
13539 offsets = arm_get_frame_offsets ();
13540 live_regs_mask = offsets->saved_regs_mask;
13542 if (live_regs_mask)
13544 const char * return_reg;
13546 /* If we do not have any special requirements for function exit
13547 (e.g. interworking) then we can load the return address
13548 directly into the PC. Otherwise we must load it into LR. */
13550 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13551 return_reg = reg_names[PC_REGNUM];
13553 return_reg = reg_names[LR_REGNUM];
13555 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13557 /* There are three possible reasons for the IP register
13558 being saved. 1) a stack frame was created, in which case
13559 IP contains the old stack pointer, or 2) an ISR routine
13560 corrupted it, or 3) it was saved to align the stack on
13561 iWMMXt. In case 1, restore IP into SP, otherwise just
13563 if (frame_pointer_needed)
13565 live_regs_mask &= ~ (1 << IP_REGNUM);
13566 live_regs_mask |= (1 << SP_REGNUM);
13569 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13572 /* On some ARM architectures it is faster to use LDR rather than
13573 LDM to load a single register. On other architectures, the
13574 cost is the same. In 26 bit mode, or for exception handlers,
13575 we have to use LDM to load the PC so that the CPSR is also
13577 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13578 if (live_regs_mask == (1U << reg))
13581 if (reg <= LAST_ARM_REGNUM
13582 && (reg != LR_REGNUM
13584 || ! IS_INTERRUPT (func_type)))
13586 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13587 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13594 /* Generate the load multiple instruction to restore the
13595 registers. Note we can get here, even if
13596 frame_pointer_needed is true, but only if sp already
13597 points to the base of the saved core registers. */
13598 if (live_regs_mask & (1 << SP_REGNUM))
13600 unsigned HOST_WIDE_INT stack_adjust;
13602 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13603 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13605 if (stack_adjust && arm_arch5 && TARGET_ARM)
13606 if (TARGET_UNIFIED_ASM)
13607 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13609 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13612 /* If we can't use ldmib (SA110 bug),
13613 then try to pop r3 instead. */
13615 live_regs_mask |= 1 << 3;
13617 if (TARGET_UNIFIED_ASM)
13618 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13620 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13624 if (TARGET_UNIFIED_ASM)
13625 sprintf (instr, "pop%s\t{", conditional);
13627 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13629 p = instr + strlen (instr);
13631 for (reg = 0; reg <= SP_REGNUM; reg++)
13632 if (live_regs_mask & (1 << reg))
13634 int l = strlen (reg_names[reg]);
13640 memcpy (p, ", ", 2);
13644 memcpy (p, "%|", 2);
13645 memcpy (p + 2, reg_names[reg], l);
13649 if (live_regs_mask & (1 << LR_REGNUM))
13651 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13652 /* If returning from an interrupt, restore the CPSR. */
13653 if (IS_INTERRUPT (func_type))
13660 output_asm_insn (instr, & operand);
13662 /* See if we need to generate an extra instruction to
13663 perform the actual function return. */
13665 && func_type != ARM_FT_INTERWORKED
13666 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13668 /* The return has already been handled
13669 by loading the LR into the PC. */
13676 switch ((int) ARM_FUNC_TYPE (func_type))
13680 /* ??? This is wrong for unified assembly syntax. */
13681 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13684 case ARM_FT_INTERWORKED:
13685 sprintf (instr, "bx%s\t%%|lr", conditional);
13688 case ARM_FT_EXCEPTION:
13689 /* ??? This is wrong for unified assembly syntax. */
13690 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13694 /* Use bx if it's available. */
13695 if (arm_arch5 || arm_arch4t)
13696 sprintf (instr, "bx%s\t%%|lr", conditional);
13698 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13702 output_asm_insn (instr, & operand);
13708 /* Write the function name into the code section, directly preceding
13709 the function prologue.
13711 Code will be output similar to this:
13713 .ascii "arm_poke_function_name", 0
13716 .word 0xff000000 + (t1 - t0)
13717 arm_poke_function_name
13719 stmfd sp!, {fp, ip, lr, pc}
13722 When performing a stack backtrace, code can inspect the value
13723 of 'pc' stored at 'fp' + 0. If the trace function then looks
13724 at location pc - 12 and the top 8 bits are set, then we know
13725 that there is a function name embedded immediately preceding this
13726 location and has length ((pc[-3]) & 0xff000000).
13728 We assume that pc is declared as a pointer to an unsigned long.
13730 It is of no benefit to output the function name if we are assembling
13731 a leaf function. These function types will not contain a stack
13732 backtrace structure, therefore it is not possible to determine the
13735 arm_poke_function_name (FILE *stream, const char *name)
13737 unsigned long alignlength;
13738 unsigned long length;
13741 length = strlen (name) + 1;
13742 alignlength = ROUND_UP_WORD (length);
13744 ASM_OUTPUT_ASCII (stream, name, length);
13745 ASM_OUTPUT_ALIGN (stream, 2);
13746 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13747 assemble_aligned_integer (UNITS_PER_WORD, x);
13750 /* Place some comments into the assembler stream
13751 describing the current function. */
13753 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13755 unsigned long func_type;
13759 thumb1_output_function_prologue (f, frame_size);
13763 /* Sanity check. */
13764 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13766 func_type = arm_current_func_type ();
13768 switch ((int) ARM_FUNC_TYPE (func_type))
13771 case ARM_FT_NORMAL:
13773 case ARM_FT_INTERWORKED:
13774 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13777 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13780 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13782 case ARM_FT_EXCEPTION:
13783 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13787 if (IS_NAKED (func_type))
13788 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13790 if (IS_VOLATILE (func_type))
13791 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13793 if (IS_NESTED (func_type))
13794 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13795 if (IS_STACKALIGN (func_type))
13796 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13798 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13800 crtl->args.pretend_args_size, frame_size);
13802 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13803 frame_pointer_needed,
13804 cfun->machine->uses_anonymous_args);
13806 if (cfun->machine->lr_save_eliminated)
13807 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13809 if (crtl->calls_eh_return)
13810 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13815 arm_output_epilogue (rtx sibling)
13818 unsigned long saved_regs_mask;
13819 unsigned long func_type;
13820 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13821 frame that is $fp + 4 for a non-variadic function. */
13822 int floats_offset = 0;
13824 FILE * f = asm_out_file;
13825 unsigned int lrm_count = 0;
13826 int really_return = (sibling == NULL);
13828 arm_stack_offsets *offsets;
13830 /* If we have already generated the return instruction
13831 then it is futile to generate anything else. */
13832 if (use_return_insn (FALSE, sibling) &&
13833 (cfun->machine->return_used_this_function != 0))
13836 func_type = arm_current_func_type ();
13838 if (IS_NAKED (func_type))
13839 /* Naked functions don't have epilogues. */
13842 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13846 /* A volatile function should never return. Call abort. */
13847 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13848 assemble_external_libcall (op);
13849 output_asm_insn ("bl\t%a0", &op);
13854 /* If we are throwing an exception, then we really must be doing a
13855 return, so we can't tail-call. */
13856 gcc_assert (!crtl->calls_eh_return || really_return);
13858 offsets = arm_get_frame_offsets ();
13859 saved_regs_mask = offsets->saved_regs_mask;
13862 lrm_count = bit_count (saved_regs_mask);
13864 floats_offset = offsets->saved_args;
13865 /* Compute how far away the floats will be. */
13866 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13867 if (saved_regs_mask & (1 << reg))
13868 floats_offset += 4;
13870 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13872 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13873 int vfp_offset = offsets->frame;
13875 if (TARGET_FPA_EMU2)
13877 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13878 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13880 floats_offset += 12;
13881 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13882 reg, FP_REGNUM, floats_offset - vfp_offset);
13887 start_reg = LAST_FPA_REGNUM;
13889 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13891 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13893 floats_offset += 12;
13895 /* We can't unstack more than four registers at once. */
13896 if (start_reg - reg == 3)
13898 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13899 reg, FP_REGNUM, floats_offset - vfp_offset);
13900 start_reg = reg - 1;
13905 if (reg != start_reg)
13906 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13907 reg + 1, start_reg - reg,
13908 FP_REGNUM, floats_offset - vfp_offset);
13909 start_reg = reg - 1;
13913 /* Just in case the last register checked also needs unstacking. */
13914 if (reg != start_reg)
13915 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13916 reg + 1, start_reg - reg,
13917 FP_REGNUM, floats_offset - vfp_offset);
13920 if (TARGET_HARD_FLOAT && TARGET_VFP)
13924 /* The fldmd insns do not have base+offset addressing
13925 modes, so we use IP to hold the address. */
13926 saved_size = arm_get_vfp_saved_size ();
13928 if (saved_size > 0)
13930 floats_offset += saved_size;
13931 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13932 FP_REGNUM, floats_offset - vfp_offset);
13934 start_reg = FIRST_VFP_REGNUM;
13935 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13937 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13938 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13940 if (start_reg != reg)
13941 vfp_output_fldmd (f, IP_REGNUM,
13942 (start_reg - FIRST_VFP_REGNUM) / 2,
13943 (reg - start_reg) / 2);
13944 start_reg = reg + 2;
13947 if (start_reg != reg)
13948 vfp_output_fldmd (f, IP_REGNUM,
13949 (start_reg - FIRST_VFP_REGNUM) / 2,
13950 (reg - start_reg) / 2);
13955 /* The frame pointer is guaranteed to be non-double-word aligned.
13956 This is because it is set to (old_stack_pointer - 4) and the
13957 old_stack_pointer was double word aligned. Thus the offset to
13958 the iWMMXt registers to be loaded must also be non-double-word
13959 sized, so that the resultant address *is* double-word aligned.
13960 We can ignore floats_offset since that was already included in
13961 the live_regs_mask. */
13962 lrm_count += (lrm_count % 2 ? 2 : 1);
13964 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13965 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13967 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13968 reg, FP_REGNUM, lrm_count * 4);
13973 /* saved_regs_mask should contain the IP, which at the time of stack
13974 frame generation actually contains the old stack pointer. So a
13975 quick way to unwind the stack is just pop the IP register directly
13976 into the stack pointer. */
13977 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13978 saved_regs_mask &= ~ (1 << IP_REGNUM);
13979 saved_regs_mask |= (1 << SP_REGNUM);
13981 /* There are two registers left in saved_regs_mask - LR and PC. We
13982 only need to restore the LR register (the return address), but to
13983 save time we can load it directly into the PC, unless we need a
13984 special function exit sequence, or we are not really returning. */
13986 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13987 && !crtl->calls_eh_return)
13988 /* Delete the LR from the register mask, so that the LR on
13989 the stack is loaded into the PC in the register mask. */
13990 saved_regs_mask &= ~ (1 << LR_REGNUM);
13992 saved_regs_mask &= ~ (1 << PC_REGNUM);
13994 /* We must use SP as the base register, because SP is one of the
13995 registers being restored. If an interrupt or page fault
13996 happens in the ldm instruction, the SP might or might not
13997 have been restored. That would be bad, as then SP will no
13998 longer indicate the safe area of stack, and we can get stack
13999 corruption. Using SP as the base register means that it will
14000 be reset correctly to the original value, should an interrupt
14001 occur. If the stack pointer already points at the right
14002 place, then omit the subtraction. */
14003 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14004 || cfun->calls_alloca)
14005 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14006 4 * bit_count (saved_regs_mask));
14007 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14009 if (IS_INTERRUPT (func_type))
14010 /* Interrupt handlers will have pushed the
14011 IP onto the stack, so restore it now. */
14012 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14016 /* This branch is executed for ARM mode (non-apcs frames) and
14017 Thumb-2 mode. Frame layout is essentially the same for those
14018 cases, except that in ARM mode frame pointer points to the
14019 first saved register, while in Thumb-2 mode the frame pointer points
14020 to the last saved register.
14022 It is possible to make frame pointer point to last saved
14023 register in both cases, and remove some conditionals below.
14024 That means that fp setup in prologue would be just "mov fp, sp"
14025 and sp restore in epilogue would be just "mov sp, fp", whereas
14026 now we have to use add/sub in those cases. However, the value
14027 of that would be marginal, as both mov and add/sub are 32-bit
14028 in ARM mode, and it would require extra conditionals
14029 in arm_expand_prologue to distingish ARM-apcs-frame case
14030 (where frame pointer is required to point at first register)
14031 and ARM-non-apcs-frame. Therefore, such change is postponed
14032 until real need arise. */
14033 unsigned HOST_WIDE_INT amount;
14035 /* Restore stack pointer if necessary. */
14036 if (TARGET_ARM && frame_pointer_needed)
14038 operands[0] = stack_pointer_rtx;
14039 operands[1] = hard_frame_pointer_rtx;
14041 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14042 output_add_immediate (operands);
14046 if (frame_pointer_needed)
14048 /* For Thumb-2 restore sp from the frame pointer.
14049 Operand restrictions mean we have to incrememnt FP, then copy
14051 amount = offsets->locals_base - offsets->saved_regs;
14052 operands[0] = hard_frame_pointer_rtx;
14056 unsigned long count;
14057 operands[0] = stack_pointer_rtx;
14058 amount = offsets->outgoing_args - offsets->saved_regs;
14059 /* pop call clobbered registers if it avoids a
14060 separate stack adjustment. */
14061 count = offsets->saved_regs - offsets->saved_args;
14064 && !crtl->calls_eh_return
14065 && bit_count(saved_regs_mask) * 4 == count
14066 && !IS_INTERRUPT (func_type)
14067 && !crtl->tail_call_emit)
14069 unsigned long mask;
14070 mask = (1 << (arm_size_return_regs() / 4)) - 1;
14072 mask &= ~saved_regs_mask;
14074 while (bit_count (mask) * 4 > amount)
14076 while ((mask & (1 << reg)) == 0)
14078 mask &= ~(1 << reg);
14080 if (bit_count (mask) * 4 == amount) {
14082 saved_regs_mask |= mask;
14089 operands[1] = operands[0];
14090 operands[2] = GEN_INT (amount);
14091 output_add_immediate (operands);
14093 if (frame_pointer_needed)
14094 asm_fprintf (f, "\tmov\t%r, %r\n",
14095 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14098 if (TARGET_FPA_EMU2)
14100 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14101 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14102 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14107 start_reg = FIRST_FPA_REGNUM;
14109 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14111 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14113 if (reg - start_reg == 3)
14115 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14116 start_reg, SP_REGNUM);
14117 start_reg = reg + 1;
14122 if (reg != start_reg)
14123 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14124 start_reg, reg - start_reg,
14127 start_reg = reg + 1;
14131 /* Just in case the last register checked also needs unstacking. */
14132 if (reg != start_reg)
14133 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14134 start_reg, reg - start_reg, SP_REGNUM);
14137 if (TARGET_HARD_FLOAT && TARGET_VFP)
14139 int end_reg = LAST_VFP_REGNUM + 1;
14141 /* Scan the registers in reverse order. We need to match
14142 any groupings made in the prologue and generate matching
14144 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14146 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14147 && (!df_regs_ever_live_p (reg + 1)
14148 || call_used_regs[reg + 1]))
14150 if (end_reg > reg + 2)
14151 vfp_output_fldmd (f, SP_REGNUM,
14152 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14153 (end_reg - (reg + 2)) / 2);
14157 if (end_reg > reg + 2)
14158 vfp_output_fldmd (f, SP_REGNUM, 0,
14159 (end_reg - (reg + 2)) / 2);
14163 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14164 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14165 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14167 /* If we can, restore the LR into the PC. */
14168 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14169 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14170 && !IS_STACKALIGN (func_type)
14172 && crtl->args.pretend_args_size == 0
14173 && saved_regs_mask & (1 << LR_REGNUM)
14174 && !crtl->calls_eh_return)
14176 saved_regs_mask &= ~ (1 << LR_REGNUM);
14177 saved_regs_mask |= (1 << PC_REGNUM);
14178 rfe = IS_INTERRUPT (func_type);
14183 /* Load the registers off the stack. If we only have one register
14184 to load use the LDR instruction - it is faster. For Thumb-2
14185 always use pop and the assembler will pick the best instruction.*/
14186 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14187 && !IS_INTERRUPT(func_type))
14189 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14191 else if (saved_regs_mask)
14193 if (saved_regs_mask & (1 << SP_REGNUM))
14194 /* Note - write back to the stack register is not enabled
14195 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14196 in the list of registers and if we add writeback the
14197 instruction becomes UNPREDICTABLE. */
14198 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14200 else if (TARGET_ARM)
14201 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14204 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14207 if (crtl->args.pretend_args_size)
14209 /* Unwind the pre-pushed regs. */
14210 operands[0] = operands[1] = stack_pointer_rtx;
14211 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14212 output_add_immediate (operands);
14216 /* We may have already restored PC directly from the stack. */
14217 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14220 /* Stack adjustment for exception handler. */
14221 if (crtl->calls_eh_return)
14222 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14223 ARM_EH_STACKADJ_REGNUM);
14225 /* Generate the return instruction. */
14226 switch ((int) ARM_FUNC_TYPE (func_type))
14230 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14233 case ARM_FT_EXCEPTION:
14234 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14237 case ARM_FT_INTERWORKED:
14238 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14242 if (IS_STACKALIGN (func_type))
14244 /* See comment in arm_expand_prologue. */
14245 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14247 if (arm_arch5 || arm_arch4t)
14248 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14250 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14258 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14259 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14261 arm_stack_offsets *offsets;
14267 /* Emit any call-via-reg trampolines that are needed for v4t support
14268 of call_reg and call_value_reg type insns. */
14269 for (regno = 0; regno < LR_REGNUM; regno++)
14271 rtx label = cfun->machine->call_via[regno];
14275 switch_to_section (function_section (current_function_decl));
14276 targetm.asm_out.internal_label (asm_out_file, "L",
14277 CODE_LABEL_NUMBER (label));
14278 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14282 /* ??? Probably not safe to set this here, since it assumes that a
14283 function will be emitted as assembly immediately after we generate
14284 RTL for it. This does not happen for inline functions. */
14285 cfun->machine->return_used_this_function = 0;
14287 else /* TARGET_32BIT */
14289 /* We need to take into account any stack-frame rounding. */
14290 offsets = arm_get_frame_offsets ();
14292 gcc_assert (!use_return_insn (FALSE, NULL)
14293 || (cfun->machine->return_used_this_function != 0)
14294 || offsets->saved_regs == offsets->outgoing_args
14295 || frame_pointer_needed);
14297 /* Reset the ARM-specific per-function variables. */
14298 after_arm_reorg = 0;
14302 /* Generate and emit an insn that we will recognize as a push_multi.
14303 Unfortunately, since this insn does not reflect very well the actual
14304 semantics of the operation, we need to annotate the insn for the benefit
14305 of DWARF2 frame unwind information. */
14307 emit_multi_reg_push (unsigned long mask)
14310 int num_dwarf_regs;
14314 int dwarf_par_index;
14317 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14318 if (mask & (1 << i))
14321 gcc_assert (num_regs && num_regs <= 16);
14323 /* We don't record the PC in the dwarf frame information. */
14324 num_dwarf_regs = num_regs;
14325 if (mask & (1 << PC_REGNUM))
14328 /* For the body of the insn we are going to generate an UNSPEC in
14329 parallel with several USEs. This allows the insn to be recognized
14330 by the push_multi pattern in the arm.md file.
14332 The body of the insn looks something like this:
14335 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14336 (const_int:SI <num>)))
14337 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14343 For the frame note however, we try to be more explicit and actually
14344 show each register being stored into the stack frame, plus a (single)
14345 decrement of the stack pointer. We do it this way in order to be
14346 friendly to the stack unwinding code, which only wants to see a single
14347 stack decrement per instruction. The RTL we generate for the note looks
14348 something like this:
14351 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14352 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14353 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14354 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14358 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14359 instead we'd have a parallel expression detailing all
14360 the stores to the various memory addresses so that debug
14361 information is more up-to-date. Remember however while writing
14362 this to take care of the constraints with the push instruction.
14364 Note also that this has to be taken care of for the VFP registers.
14366 For more see PR43399. */
14368 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14369 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14370 dwarf_par_index = 1;
14372 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14374 if (mask & (1 << i))
14376 reg = gen_rtx_REG (SImode, i);
14378 XVECEXP (par, 0, 0)
14379 = gen_rtx_SET (VOIDmode,
14382 gen_rtx_PRE_MODIFY (Pmode,
14385 (stack_pointer_rtx,
14388 gen_rtx_UNSPEC (BLKmode,
14389 gen_rtvec (1, reg),
14390 UNSPEC_PUSH_MULT));
14392 if (i != PC_REGNUM)
14394 tmp = gen_rtx_SET (VOIDmode,
14395 gen_frame_mem (SImode, stack_pointer_rtx),
14397 RTX_FRAME_RELATED_P (tmp) = 1;
14398 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14406 for (j = 1, i++; j < num_regs; i++)
14408 if (mask & (1 << i))
14410 reg = gen_rtx_REG (SImode, i);
14412 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14414 if (i != PC_REGNUM)
14417 = gen_rtx_SET (VOIDmode,
14420 plus_constant (stack_pointer_rtx,
14423 RTX_FRAME_RELATED_P (tmp) = 1;
14424 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14431 par = emit_insn (par);
14433 tmp = gen_rtx_SET (VOIDmode,
14435 plus_constant (stack_pointer_rtx, -4 * num_regs));
14436 RTX_FRAME_RELATED_P (tmp) = 1;
14437 XVECEXP (dwarf, 0, 0) = tmp;
14439 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14444 /* Calculate the size of the return value that is passed in registers. */
14446 arm_size_return_regs (void)
14448 enum machine_mode mode;
14450 if (crtl->return_rtx != 0)
14451 mode = GET_MODE (crtl->return_rtx);
14453 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14455 return GET_MODE_SIZE (mode);
14459 emit_sfm (int base_reg, int count)
14466 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14467 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14469 reg = gen_rtx_REG (XFmode, base_reg++);
14471 XVECEXP (par, 0, 0)
14472 = gen_rtx_SET (VOIDmode,
14475 gen_rtx_PRE_MODIFY (Pmode,
14478 (stack_pointer_rtx,
14481 gen_rtx_UNSPEC (BLKmode,
14482 gen_rtvec (1, reg),
14483 UNSPEC_PUSH_MULT));
14484 tmp = gen_rtx_SET (VOIDmode,
14485 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14486 RTX_FRAME_RELATED_P (tmp) = 1;
14487 XVECEXP (dwarf, 0, 1) = tmp;
14489 for (i = 1; i < count; i++)
14491 reg = gen_rtx_REG (XFmode, base_reg++);
14492 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14494 tmp = gen_rtx_SET (VOIDmode,
14495 gen_frame_mem (XFmode,
14496 plus_constant (stack_pointer_rtx,
14499 RTX_FRAME_RELATED_P (tmp) = 1;
14500 XVECEXP (dwarf, 0, i + 1) = tmp;
14503 tmp = gen_rtx_SET (VOIDmode,
14505 plus_constant (stack_pointer_rtx, -12 * count));
14507 RTX_FRAME_RELATED_P (tmp) = 1;
14508 XVECEXP (dwarf, 0, 0) = tmp;
14510 par = emit_insn (par);
14511 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14517 /* Return true if the current function needs to save/restore LR. */
14520 thumb_force_lr_save (void)
14522 return !cfun->machine->lr_save_eliminated
14523 && (!leaf_function_p ()
14524 || thumb_far_jump_used_p ()
14525 || df_regs_ever_live_p (LR_REGNUM));
14529 /* Compute the distance from register FROM to register TO.
14530 These can be the arg pointer (26), the soft frame pointer (25),
14531 the stack pointer (13) or the hard frame pointer (11).
14532 In thumb mode r7 is used as the soft frame pointer, if needed.
14533 Typical stack layout looks like this:
14535 old stack pointer -> | |
14538 | | saved arguments for
14539 | | vararg functions
14542 hard FP & arg pointer -> | | \
14550 soft frame pointer -> | | /
14555 locals base pointer -> | | /
14560 current stack pointer -> | | /
14563 For a given function some or all of these stack components
14564 may not be needed, giving rise to the possibility of
14565 eliminating some of the registers.
14567 The values returned by this function must reflect the behavior
14568 of arm_expand_prologue() and arm_compute_save_reg_mask().
14570 The sign of the number returned reflects the direction of stack
14571 growth, so the values are positive for all eliminations except
14572 from the soft frame pointer to the hard frame pointer.
14574 SFP may point just inside the local variables block to ensure correct
14578 /* Calculate stack offsets. These are used to calculate register elimination
14579 offsets and in prologue/epilogue code. Also calculates which registers
14580 should be saved. */
14582 static arm_stack_offsets *
14583 arm_get_frame_offsets (void)
14585 struct arm_stack_offsets *offsets;
14586 unsigned long func_type;
14590 HOST_WIDE_INT frame_size;
14593 offsets = &cfun->machine->stack_offsets;
14595 /* We need to know if we are a leaf function. Unfortunately, it
14596 is possible to be called after start_sequence has been called,
14597 which causes get_insns to return the insns for the sequence,
14598 not the function, which will cause leaf_function_p to return
14599 the incorrect result.
14601 to know about leaf functions once reload has completed, and the
14602 frame size cannot be changed after that time, so we can safely
14603 use the cached value. */
14605 if (reload_completed)
14608 /* Initially this is the size of the local variables. It will translated
14609 into an offset once we have determined the size of preceding data. */
14610 frame_size = ROUND_UP_WORD (get_frame_size ());
14612 leaf = leaf_function_p ();
14614 /* Space for variadic functions. */
14615 offsets->saved_args = crtl->args.pretend_args_size;
14617 /* In Thumb mode this is incorrect, but never used. */
14618 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14619 arm_compute_static_chain_stack_bytes();
14623 unsigned int regno;
14625 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14626 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14627 saved = core_saved;
14629 /* We know that SP will be doubleword aligned on entry, and we must
14630 preserve that condition at any subroutine call. We also require the
14631 soft frame pointer to be doubleword aligned. */
14633 if (TARGET_REALLY_IWMMXT)
14635 /* Check for the call-saved iWMMXt registers. */
14636 for (regno = FIRST_IWMMXT_REGNUM;
14637 regno <= LAST_IWMMXT_REGNUM;
14639 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14643 func_type = arm_current_func_type ();
14644 if (! IS_VOLATILE (func_type))
14646 /* Space for saved FPA registers. */
14647 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14648 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14651 /* Space for saved VFP registers. */
14652 if (TARGET_HARD_FLOAT && TARGET_VFP)
14653 saved += arm_get_vfp_saved_size ();
14656 else /* TARGET_THUMB1 */
14658 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14659 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14660 saved = core_saved;
14661 if (TARGET_BACKTRACE)
14665 /* Saved registers include the stack frame. */
14666 offsets->saved_regs = offsets->saved_args + saved +
14667 arm_compute_static_chain_stack_bytes();
14668 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14669 /* A leaf function does not need any stack alignment if it has nothing
14671 if (leaf && frame_size == 0)
14673 offsets->outgoing_args = offsets->soft_frame;
14674 offsets->locals_base = offsets->soft_frame;
14678 /* Ensure SFP has the correct alignment. */
14679 if (ARM_DOUBLEWORD_ALIGN
14680 && (offsets->soft_frame & 7))
14682 offsets->soft_frame += 4;
14683 /* Try to align stack by pushing an extra reg. Don't bother doing this
14684 when there is a stack frame as the alignment will be rolled into
14685 the normal stack adjustment. */
14686 if (frame_size + crtl->outgoing_args_size == 0)
14690 /* If it is safe to use r3, then do so. This sometimes
14691 generates better code on Thumb-2 by avoiding the need to
14692 use 32-bit push/pop instructions. */
14693 if (!crtl->tail_call_emit
14694 && arm_size_return_regs () <= 12)
14699 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14701 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14710 offsets->saved_regs += 4;
14711 offsets->saved_regs_mask |= (1 << reg);
14716 offsets->locals_base = offsets->soft_frame + frame_size;
14717 offsets->outgoing_args = (offsets->locals_base
14718 + crtl->outgoing_args_size);
14720 if (ARM_DOUBLEWORD_ALIGN)
14722 /* Ensure SP remains doubleword aligned. */
14723 if (offsets->outgoing_args & 7)
14724 offsets->outgoing_args += 4;
14725 gcc_assert (!(offsets->outgoing_args & 7));
14732 /* Calculate the relative offsets for the different stack pointers. Positive
14733 offsets are in the direction of stack growth. */
14736 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14738 arm_stack_offsets *offsets;
14740 offsets = arm_get_frame_offsets ();
14742 /* OK, now we have enough information to compute the distances.
14743 There must be an entry in these switch tables for each pair
14744 of registers in ELIMINABLE_REGS, even if some of the entries
14745 seem to be redundant or useless. */
14748 case ARG_POINTER_REGNUM:
14751 case THUMB_HARD_FRAME_POINTER_REGNUM:
14754 case FRAME_POINTER_REGNUM:
14755 /* This is the reverse of the soft frame pointer
14756 to hard frame pointer elimination below. */
14757 return offsets->soft_frame - offsets->saved_args;
14759 case ARM_HARD_FRAME_POINTER_REGNUM:
14760 /* This is only non-zero in the case where the static chain register
14761 is stored above the frame. */
14762 return offsets->frame - offsets->saved_args - 4;
14764 case STACK_POINTER_REGNUM:
14765 /* If nothing has been pushed on the stack at all
14766 then this will return -4. This *is* correct! */
14767 return offsets->outgoing_args - (offsets->saved_args + 4);
14770 gcc_unreachable ();
14772 gcc_unreachable ();
14774 case FRAME_POINTER_REGNUM:
14777 case THUMB_HARD_FRAME_POINTER_REGNUM:
14780 case ARM_HARD_FRAME_POINTER_REGNUM:
14781 /* The hard frame pointer points to the top entry in the
14782 stack frame. The soft frame pointer to the bottom entry
14783 in the stack frame. If there is no stack frame at all,
14784 then they are identical. */
14786 return offsets->frame - offsets->soft_frame;
14788 case STACK_POINTER_REGNUM:
14789 return offsets->outgoing_args - offsets->soft_frame;
14792 gcc_unreachable ();
14794 gcc_unreachable ();
14797 /* You cannot eliminate from the stack pointer.
14798 In theory you could eliminate from the hard frame
14799 pointer to the stack pointer, but this will never
14800 happen, since if a stack frame is not needed the
14801 hard frame pointer will never be used. */
14802 gcc_unreachable ();
14806 /* Given FROM and TO register numbers, say whether this elimination is
14807 allowed. Frame pointer elimination is automatically handled.
14809 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14810 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14811 pointer, we must eliminate FRAME_POINTER_REGNUM into
14812 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14813 ARG_POINTER_REGNUM. */
14816 arm_can_eliminate (const int from, const int to)
14818 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14819 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14820 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14821 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14825 /* Emit RTL to save coprocessor registers on function entry. Returns the
14826 number of bytes pushed. */
14829 arm_save_coproc_regs(void)
14831 int saved_size = 0;
14833 unsigned start_reg;
14836 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14837 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14839 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14840 insn = gen_rtx_MEM (V2SImode, insn);
14841 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14842 RTX_FRAME_RELATED_P (insn) = 1;
14846 /* Save any floating point call-saved registers used by this
14848 if (TARGET_FPA_EMU2)
14850 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14851 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14853 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14854 insn = gen_rtx_MEM (XFmode, insn);
14855 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14856 RTX_FRAME_RELATED_P (insn) = 1;
14862 start_reg = LAST_FPA_REGNUM;
14864 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14866 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14868 if (start_reg - reg == 3)
14870 insn = emit_sfm (reg, 4);
14871 RTX_FRAME_RELATED_P (insn) = 1;
14873 start_reg = reg - 1;
14878 if (start_reg != reg)
14880 insn = emit_sfm (reg + 1, start_reg - reg);
14881 RTX_FRAME_RELATED_P (insn) = 1;
14882 saved_size += (start_reg - reg) * 12;
14884 start_reg = reg - 1;
14888 if (start_reg != reg)
14890 insn = emit_sfm (reg + 1, start_reg - reg);
14891 saved_size += (start_reg - reg) * 12;
14892 RTX_FRAME_RELATED_P (insn) = 1;
14895 if (TARGET_HARD_FLOAT && TARGET_VFP)
14897 start_reg = FIRST_VFP_REGNUM;
14899 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14901 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14902 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14904 if (start_reg != reg)
14905 saved_size += vfp_emit_fstmd (start_reg,
14906 (reg - start_reg) / 2);
14907 start_reg = reg + 2;
14910 if (start_reg != reg)
14911 saved_size += vfp_emit_fstmd (start_reg,
14912 (reg - start_reg) / 2);
14918 /* Set the Thumb frame pointer from the stack pointer. */
14921 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14923 HOST_WIDE_INT amount;
14926 amount = offsets->outgoing_args - offsets->locals_base;
14928 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14929 stack_pointer_rtx, GEN_INT (amount)));
14932 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14933 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14934 expects the first two operands to be the same. */
14937 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14939 hard_frame_pointer_rtx));
14943 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14944 hard_frame_pointer_rtx,
14945 stack_pointer_rtx));
14947 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14948 plus_constant (stack_pointer_rtx, amount));
14949 RTX_FRAME_RELATED_P (dwarf) = 1;
14950 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14953 RTX_FRAME_RELATED_P (insn) = 1;
14956 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14959 arm_expand_prologue (void)
14964 unsigned long live_regs_mask;
14965 unsigned long func_type;
14967 int saved_pretend_args = 0;
14968 int saved_regs = 0;
14969 unsigned HOST_WIDE_INT args_to_push;
14970 arm_stack_offsets *offsets;
14972 func_type = arm_current_func_type ();
14974 /* Naked functions don't have prologues. */
14975 if (IS_NAKED (func_type))
14978 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14979 args_to_push = crtl->args.pretend_args_size;
14981 /* Compute which register we will have to save onto the stack. */
14982 offsets = arm_get_frame_offsets ();
14983 live_regs_mask = offsets->saved_regs_mask;
14985 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14987 if (IS_STACKALIGN (func_type))
14992 /* Handle a word-aligned stack pointer. We generate the following:
14997 <save and restore r0 in normal prologue/epilogue>
15001 The unwinder doesn't need to know about the stack realignment.
15002 Just tell it we saved SP in r0. */
15003 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15005 r0 = gen_rtx_REG (SImode, 0);
15006 r1 = gen_rtx_REG (SImode, 1);
15007 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15008 compiler won't choke. */
15009 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15010 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15011 insn = gen_movsi (r0, stack_pointer_rtx);
15012 RTX_FRAME_RELATED_P (insn) = 1;
15013 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15015 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15016 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15019 /* For APCS frames, if IP register is clobbered
15020 when creating frame, save that register in a special
15022 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15024 if (IS_INTERRUPT (func_type))
15026 /* Interrupt functions must not corrupt any registers.
15027 Creating a frame pointer however, corrupts the IP
15028 register, so we must push it first. */
15029 insn = emit_multi_reg_push (1 << IP_REGNUM);
15031 /* Do not set RTX_FRAME_RELATED_P on this insn.
15032 The dwarf stack unwinding code only wants to see one
15033 stack decrement per function, and this is not it. If
15034 this instruction is labeled as being part of the frame
15035 creation sequence then dwarf2out_frame_debug_expr will
15036 die when it encounters the assignment of IP to FP
15037 later on, since the use of SP here establishes SP as
15038 the CFA register and not IP.
15040 Anyway this instruction is not really part of the stack
15041 frame creation although it is part of the prologue. */
15043 else if (IS_NESTED (func_type))
15045 /* The Static chain register is the same as the IP register
15046 used as a scratch register during stack frame creation.
15047 To get around this need to find somewhere to store IP
15048 whilst the frame is being created. We try the following
15051 1. The last argument register.
15052 2. A slot on the stack above the frame. (This only
15053 works if the function is not a varargs function).
15054 3. Register r3, after pushing the argument registers
15057 Note - we only need to tell the dwarf2 backend about the SP
15058 adjustment in the second variant; the static chain register
15059 doesn't need to be unwound, as it doesn't contain a value
15060 inherited from the caller. */
15062 if (df_regs_ever_live_p (3) == false)
15063 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15064 else if (args_to_push == 0)
15068 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15071 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15072 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15075 /* Just tell the dwarf backend that we adjusted SP. */
15076 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15077 plus_constant (stack_pointer_rtx,
15079 RTX_FRAME_RELATED_P (insn) = 1;
15080 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15084 /* Store the args on the stack. */
15085 if (cfun->machine->uses_anonymous_args)
15086 insn = emit_multi_reg_push
15087 ((0xf0 >> (args_to_push / 4)) & 0xf);
15090 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15091 GEN_INT (- args_to_push)));
15093 RTX_FRAME_RELATED_P (insn) = 1;
15095 saved_pretend_args = 1;
15096 fp_offset = args_to_push;
15099 /* Now reuse r3 to preserve IP. */
15100 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15104 insn = emit_set_insn (ip_rtx,
15105 plus_constant (stack_pointer_rtx, fp_offset));
15106 RTX_FRAME_RELATED_P (insn) = 1;
15111 /* Push the argument registers, or reserve space for them. */
15112 if (cfun->machine->uses_anonymous_args)
15113 insn = emit_multi_reg_push
15114 ((0xf0 >> (args_to_push / 4)) & 0xf);
15117 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15118 GEN_INT (- args_to_push)));
15119 RTX_FRAME_RELATED_P (insn) = 1;
15122 /* If this is an interrupt service routine, and the link register
15123 is going to be pushed, and we're not generating extra
15124 push of IP (needed when frame is needed and frame layout if apcs),
15125 subtracting four from LR now will mean that the function return
15126 can be done with a single instruction. */
15127 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15128 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15129 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15132 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15134 emit_set_insn (lr, plus_constant (lr, -4));
15137 if (live_regs_mask)
15139 saved_regs += bit_count (live_regs_mask) * 4;
15140 if (optimize_size && !frame_pointer_needed
15141 && saved_regs == offsets->saved_regs - offsets->saved_args)
15143 /* If no coprocessor registers are being pushed and we don't have
15144 to worry about a frame pointer then push extra registers to
15145 create the stack frame. This is done is a way that does not
15146 alter the frame layout, so is independent of the epilogue. */
15150 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15152 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15153 if (frame && n * 4 >= frame)
15156 live_regs_mask |= (1 << n) - 1;
15157 saved_regs += frame;
15160 insn = emit_multi_reg_push (live_regs_mask);
15161 RTX_FRAME_RELATED_P (insn) = 1;
15164 if (! IS_VOLATILE (func_type))
15165 saved_regs += arm_save_coproc_regs ();
15167 if (frame_pointer_needed && TARGET_ARM)
15169 /* Create the new frame pointer. */
15170 if (TARGET_APCS_FRAME)
15172 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15173 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15174 RTX_FRAME_RELATED_P (insn) = 1;
15176 if (IS_NESTED (func_type))
15178 /* Recover the static chain register. */
15179 if (!df_regs_ever_live_p (3)
15180 || saved_pretend_args)
15181 insn = gen_rtx_REG (SImode, 3);
15182 else /* if (crtl->args.pretend_args_size == 0) */
15184 insn = plus_constant (hard_frame_pointer_rtx, 4);
15185 insn = gen_frame_mem (SImode, insn);
15187 emit_set_insn (ip_rtx, insn);
15188 /* Add a USE to stop propagate_one_insn() from barfing. */
15189 emit_insn (gen_prologue_use (ip_rtx));
15194 insn = GEN_INT (saved_regs - 4);
15195 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15196 stack_pointer_rtx, insn));
15197 RTX_FRAME_RELATED_P (insn) = 1;
15201 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15203 /* This add can produce multiple insns for a large constant, so we
15204 need to get tricky. */
15205 rtx last = get_last_insn ();
15207 amount = GEN_INT (offsets->saved_args + saved_regs
15208 - offsets->outgoing_args);
15210 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15214 last = last ? NEXT_INSN (last) : get_insns ();
15215 RTX_FRAME_RELATED_P (last) = 1;
15217 while (last != insn);
15219 /* If the frame pointer is needed, emit a special barrier that
15220 will prevent the scheduler from moving stores to the frame
15221 before the stack adjustment. */
15222 if (frame_pointer_needed)
15223 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15224 hard_frame_pointer_rtx));
15228 if (frame_pointer_needed && TARGET_THUMB2)
15229 thumb_set_frame_pointer (offsets);
15231 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15233 unsigned long mask;
15235 mask = live_regs_mask;
15236 mask &= THUMB2_WORK_REGS;
15237 if (!IS_NESTED (func_type))
15238 mask |= (1 << IP_REGNUM);
15239 arm_load_pic_register (mask);
15242 /* If we are profiling, make sure no instructions are scheduled before
15243 the call to mcount. Similarly if the user has requested no
15244 scheduling in the prolog. Similarly if we want non-call exceptions
15245 using the EABI unwinder, to prevent faulting instructions from being
15246 swapped with a stack adjustment. */
15247 if (crtl->profile || !TARGET_SCHED_PROLOG
15248 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15249 emit_insn (gen_blockage ());
15251 /* If the link register is being kept alive, with the return address in it,
15252 then make sure that it does not get reused by the ce2 pass. */
15253 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15254 cfun->machine->lr_save_eliminated = 1;
15257 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15259 arm_print_condition (FILE *stream)
15261 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15263 /* Branch conversion is not implemented for Thumb-2. */
15266 output_operand_lossage ("predicated Thumb instruction");
15269 if (current_insn_predicate != NULL)
15271 output_operand_lossage
15272 ("predicated instruction in conditional sequence");
15276 fputs (arm_condition_codes[arm_current_cc], stream);
15278 else if (current_insn_predicate)
15280 enum arm_cond_code code;
15284 output_operand_lossage ("predicated Thumb instruction");
15288 code = get_arm_condition_code (current_insn_predicate);
15289 fputs (arm_condition_codes[code], stream);
15294 /* If CODE is 'd', then the X is a condition operand and the instruction
15295 should only be executed if the condition is true.
15296 if CODE is 'D', then the X is a condition operand and the instruction
15297 should only be executed if the condition is false: however, if the mode
15298 of the comparison is CCFPEmode, then always execute the instruction -- we
15299 do this because in these circumstances !GE does not necessarily imply LT;
15300 in these cases the instruction pattern will take care to make sure that
15301 an instruction containing %d will follow, thereby undoing the effects of
15302 doing this instruction unconditionally.
15303 If CODE is 'N' then X is a floating point operand that must be negated
15305 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15306 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15308 arm_print_operand (FILE *stream, rtx x, int code)
15313 fputs (ASM_COMMENT_START, stream);
15317 fputs (user_label_prefix, stream);
15321 fputs (REGISTER_PREFIX, stream);
15325 arm_print_condition (stream);
15329 /* Nothing in unified syntax, otherwise the current condition code. */
15330 if (!TARGET_UNIFIED_ASM)
15331 arm_print_condition (stream);
15335 /* The current condition code in unified syntax, otherwise nothing. */
15336 if (TARGET_UNIFIED_ASM)
15337 arm_print_condition (stream);
15341 /* The current condition code for a condition code setting instruction.
15342 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15343 if (TARGET_UNIFIED_ASM)
15345 fputc('s', stream);
15346 arm_print_condition (stream);
15350 arm_print_condition (stream);
15351 fputc('s', stream);
15356 /* If the instruction is conditionally executed then print
15357 the current condition code, otherwise print 's'. */
15358 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15359 if (current_insn_predicate)
15360 arm_print_condition (stream);
15362 fputc('s', stream);
15365 /* %# is a "break" sequence. It doesn't output anything, but is used to
15366 separate e.g. operand numbers from following text, if that text consists
15367 of further digits which we don't want to be part of the operand
15375 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15376 r = real_value_negate (&r);
15377 fprintf (stream, "%s", fp_const_from_val (&r));
15381 /* An integer or symbol address without a preceding # sign. */
15383 switch (GET_CODE (x))
15386 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15390 output_addr_const (stream, x);
15394 gcc_unreachable ();
15399 if (GET_CODE (x) == CONST_INT)
15402 val = ARM_SIGN_EXTEND (~INTVAL (x));
15403 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15407 putc ('~', stream);
15408 output_addr_const (stream, x);
15413 /* The low 16 bits of an immediate constant. */
15414 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15418 fprintf (stream, "%s", arithmetic_instr (x, 1));
15421 /* Truncate Cirrus shift counts. */
15423 if (GET_CODE (x) == CONST_INT)
15425 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15428 arm_print_operand (stream, x, 0);
15432 fprintf (stream, "%s", arithmetic_instr (x, 0));
15440 if (!shift_operator (x, SImode))
15442 output_operand_lossage ("invalid shift operand");
15446 shift = shift_op (x, &val);
15450 fprintf (stream, ", %s ", shift);
15452 arm_print_operand (stream, XEXP (x, 1), 0);
15454 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15459 /* An explanation of the 'Q', 'R' and 'H' register operands:
15461 In a pair of registers containing a DI or DF value the 'Q'
15462 operand returns the register number of the register containing
15463 the least significant part of the value. The 'R' operand returns
15464 the register number of the register containing the most
15465 significant part of the value.
15467 The 'H' operand returns the higher of the two register numbers.
15468 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15469 same as the 'Q' operand, since the most significant part of the
15470 value is held in the lower number register. The reverse is true
15471 on systems where WORDS_BIG_ENDIAN is false.
15473 The purpose of these operands is to distinguish between cases
15474 where the endian-ness of the values is important (for example
15475 when they are added together), and cases where the endian-ness
15476 is irrelevant, but the order of register operations is important.
15477 For example when loading a value from memory into a register
15478 pair, the endian-ness does not matter. Provided that the value
15479 from the lower memory address is put into the lower numbered
15480 register, and the value from the higher address is put into the
15481 higher numbered register, the load will work regardless of whether
15482 the value being loaded is big-wordian or little-wordian. The
15483 order of the two register loads can matter however, if the address
15484 of the memory location is actually held in one of the registers
15485 being overwritten by the load.
15487 The 'Q' and 'R' constraints are also available for 64-bit
15490 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15492 rtx part = gen_lowpart (SImode, x);
15493 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15497 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15499 output_operand_lossage ("invalid operand for code '%c'", code);
15503 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15507 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15509 enum machine_mode mode = GET_MODE (x);
15512 if (mode == VOIDmode)
15514 part = gen_highpart_mode (SImode, mode, x);
15515 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15519 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15521 output_operand_lossage ("invalid operand for code '%c'", code);
15525 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15529 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15531 output_operand_lossage ("invalid operand for code '%c'", code);
15535 asm_fprintf (stream, "%r", REGNO (x) + 1);
15539 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15541 output_operand_lossage ("invalid operand for code '%c'", code);
15545 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15549 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15551 output_operand_lossage ("invalid operand for code '%c'", code);
15555 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15559 asm_fprintf (stream, "%r",
15560 GET_CODE (XEXP (x, 0)) == REG
15561 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15565 asm_fprintf (stream, "{%r-%r}",
15567 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15570 /* Like 'M', but writing doubleword vector registers, for use by Neon
15574 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15575 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15577 asm_fprintf (stream, "{d%d}", regno);
15579 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15584 /* CONST_TRUE_RTX means always -- that's the default. */
15585 if (x == const_true_rtx)
15588 if (!COMPARISON_P (x))
15590 output_operand_lossage ("invalid operand for code '%c'", code);
15594 fputs (arm_condition_codes[get_arm_condition_code (x)],
15599 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15600 want to do that. */
15601 if (x == const_true_rtx)
15603 output_operand_lossage ("instruction never executed");
15606 if (!COMPARISON_P (x))
15608 output_operand_lossage ("invalid operand for code '%c'", code);
15612 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15613 (get_arm_condition_code (x))],
15617 /* Cirrus registers can be accessed in a variety of ways:
15618 single floating point (f)
15619 double floating point (d)
15621 64bit integer (dx). */
15622 case 'W': /* Cirrus register in F mode. */
15623 case 'X': /* Cirrus register in D mode. */
15624 case 'Y': /* Cirrus register in FX mode. */
15625 case 'Z': /* Cirrus register in DX mode. */
15626 gcc_assert (GET_CODE (x) == REG
15627 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15629 fprintf (stream, "mv%s%s",
15631 : code == 'X' ? "d"
15632 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15636 /* Print cirrus register in the mode specified by the register's mode. */
15639 int mode = GET_MODE (x);
15641 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15643 output_operand_lossage ("invalid operand for code '%c'", code);
15647 fprintf (stream, "mv%s%s",
15648 mode == DFmode ? "d"
15649 : mode == SImode ? "fx"
15650 : mode == DImode ? "dx"
15651 : "f", reg_names[REGNO (x)] + 2);
15657 if (GET_CODE (x) != REG
15658 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15659 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15660 /* Bad value for wCG register number. */
15662 output_operand_lossage ("invalid operand for code '%c'", code);
15667 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15670 /* Print an iWMMXt control register name. */
15672 if (GET_CODE (x) != CONST_INT
15674 || INTVAL (x) >= 16)
15675 /* Bad value for wC register number. */
15677 output_operand_lossage ("invalid operand for code '%c'", code);
15683 static const char * wc_reg_names [16] =
15685 "wCID", "wCon", "wCSSF", "wCASF",
15686 "wC4", "wC5", "wC6", "wC7",
15687 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15688 "wC12", "wC13", "wC14", "wC15"
15691 fprintf (stream, wc_reg_names [INTVAL (x)]);
15695 /* Print the high single-precision register of a VFP double-precision
15699 int mode = GET_MODE (x);
15702 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15704 output_operand_lossage ("invalid operand for code '%c'", code);
15709 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15711 output_operand_lossage ("invalid operand for code '%c'", code);
15715 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15719 /* Print a VFP/Neon double precision or quad precision register name. */
15723 int mode = GET_MODE (x);
15724 int is_quad = (code == 'q');
15727 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15729 output_operand_lossage ("invalid operand for code '%c'", code);
15733 if (GET_CODE (x) != REG
15734 || !IS_VFP_REGNUM (REGNO (x)))
15736 output_operand_lossage ("invalid operand for code '%c'", code);
15741 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15742 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15744 output_operand_lossage ("invalid operand for code '%c'", code);
15748 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15749 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15753 /* These two codes print the low/high doubleword register of a Neon quad
15754 register, respectively. For pair-structure types, can also print
15755 low/high quadword registers. */
15759 int mode = GET_MODE (x);
15762 if ((GET_MODE_SIZE (mode) != 16
15763 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15765 output_operand_lossage ("invalid operand for code '%c'", code);
15770 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15772 output_operand_lossage ("invalid operand for code '%c'", code);
15776 if (GET_MODE_SIZE (mode) == 16)
15777 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15778 + (code == 'f' ? 1 : 0));
15780 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15781 + (code == 'f' ? 1 : 0));
15785 /* Print a VFPv3 floating-point constant, represented as an integer
15789 int index = vfp3_const_double_index (x);
15790 gcc_assert (index != -1);
15791 fprintf (stream, "%d", index);
15795 /* Print bits representing opcode features for Neon.
15797 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15798 and polynomials as unsigned.
15800 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15802 Bit 2 is 1 for rounding functions, 0 otherwise. */
15804 /* Identify the type as 's', 'u', 'p' or 'f'. */
15807 HOST_WIDE_INT bits = INTVAL (x);
15808 fputc ("uspf"[bits & 3], stream);
15812 /* Likewise, but signed and unsigned integers are both 'i'. */
15815 HOST_WIDE_INT bits = INTVAL (x);
15816 fputc ("iipf"[bits & 3], stream);
15820 /* As for 'T', but emit 'u' instead of 'p'. */
15823 HOST_WIDE_INT bits = INTVAL (x);
15824 fputc ("usuf"[bits & 3], stream);
15828 /* Bit 2: rounding (vs none). */
15831 HOST_WIDE_INT bits = INTVAL (x);
15832 fputs ((bits & 4) != 0 ? "r" : "", stream);
15836 /* Memory operand for vld1/vst1 instruction. */
15840 bool postinc = FALSE;
15841 gcc_assert (GET_CODE (x) == MEM);
15842 addr = XEXP (x, 0);
15843 if (GET_CODE (addr) == POST_INC)
15846 addr = XEXP (addr, 0);
15848 asm_fprintf (stream, "[%r]", REGNO (addr));
15850 fputs("!", stream);
15854 /* Translate an S register number into a D register number and element index. */
15857 int mode = GET_MODE (x);
15860 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15862 output_operand_lossage ("invalid operand for code '%c'", code);
15867 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15869 output_operand_lossage ("invalid operand for code '%c'", code);
15873 regno = regno - FIRST_VFP_REGNUM;
15874 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15878 /* Register specifier for vld1.16/vst1.16. Translate the S register
15879 number into a D register number and element index. */
15882 int mode = GET_MODE (x);
15885 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15887 output_operand_lossage ("invalid operand for code '%c'", code);
15892 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15894 output_operand_lossage ("invalid operand for code '%c'", code);
15898 regno = regno - FIRST_VFP_REGNUM;
15899 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15906 output_operand_lossage ("missing operand");
15910 switch (GET_CODE (x))
15913 asm_fprintf (stream, "%r", REGNO (x));
15917 output_memory_reference_mode = GET_MODE (x);
15918 output_address (XEXP (x, 0));
15925 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15926 sizeof (fpstr), 0, 1);
15927 fprintf (stream, "#%s", fpstr);
15930 fprintf (stream, "#%s", fp_immediate_constant (x));
15934 gcc_assert (GET_CODE (x) != NEG);
15935 fputc ('#', stream);
15936 if (GET_CODE (x) == HIGH)
15938 fputs (":lower16:", stream);
15942 output_addr_const (stream, x);
15948 /* Target hook for printing a memory address. */
15950 arm_print_operand_address (FILE *stream, rtx x)
15954 int is_minus = GET_CODE (x) == MINUS;
15956 if (GET_CODE (x) == REG)
15957 asm_fprintf (stream, "[%r, #0]", REGNO (x));
15958 else if (GET_CODE (x) == PLUS || is_minus)
15960 rtx base = XEXP (x, 0);
15961 rtx index = XEXP (x, 1);
15962 HOST_WIDE_INT offset = 0;
15963 if (GET_CODE (base) != REG
15964 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
15966 /* Ensure that BASE is a register. */
15967 /* (one of them must be). */
15968 /* Also ensure the SP is not used as in index register. */
15973 switch (GET_CODE (index))
15976 offset = INTVAL (index);
15979 asm_fprintf (stream, "[%r, #%wd]",
15980 REGNO (base), offset);
15984 asm_fprintf (stream, "[%r, %s%r]",
15985 REGNO (base), is_minus ? "-" : "",
15995 asm_fprintf (stream, "[%r, %s%r",
15996 REGNO (base), is_minus ? "-" : "",
15997 REGNO (XEXP (index, 0)));
15998 arm_print_operand (stream, index, 'S');
15999 fputs ("]", stream);
16004 gcc_unreachable ();
16007 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16008 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16010 extern enum machine_mode output_memory_reference_mode;
16012 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16014 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16015 asm_fprintf (stream, "[%r, #%s%d]!",
16016 REGNO (XEXP (x, 0)),
16017 GET_CODE (x) == PRE_DEC ? "-" : "",
16018 GET_MODE_SIZE (output_memory_reference_mode));
16020 asm_fprintf (stream, "[%r], #%s%d",
16021 REGNO (XEXP (x, 0)),
16022 GET_CODE (x) == POST_DEC ? "-" : "",
16023 GET_MODE_SIZE (output_memory_reference_mode));
16025 else if (GET_CODE (x) == PRE_MODIFY)
16027 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16028 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16029 asm_fprintf (stream, "#%wd]!",
16030 INTVAL (XEXP (XEXP (x, 1), 1)));
16032 asm_fprintf (stream, "%r]!",
16033 REGNO (XEXP (XEXP (x, 1), 1)));
16035 else if (GET_CODE (x) == POST_MODIFY)
16037 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16038 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16039 asm_fprintf (stream, "#%wd",
16040 INTVAL (XEXP (XEXP (x, 1), 1)));
16042 asm_fprintf (stream, "%r",
16043 REGNO (XEXP (XEXP (x, 1), 1)));
16045 else output_addr_const (stream, x);
16049 if (GET_CODE (x) == REG)
16050 asm_fprintf (stream, "[%r]", REGNO (x));
16051 else if (GET_CODE (x) == POST_INC)
16052 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16053 else if (GET_CODE (x) == PLUS)
16055 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16056 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16057 asm_fprintf (stream, "[%r, #%wd]",
16058 REGNO (XEXP (x, 0)),
16059 INTVAL (XEXP (x, 1)));
16061 asm_fprintf (stream, "[%r, %r]",
16062 REGNO (XEXP (x, 0)),
16063 REGNO (XEXP (x, 1)));
16066 output_addr_const (stream, x);
16070 /* Target hook for indicating whether a punctuation character for
16071 TARGET_PRINT_OPERAND is valid. */
16073 arm_print_operand_punct_valid_p (unsigned char code)
16075 return (code == '@' || code == '|' || code == '.'
16076 || code == '(' || code == ')' || code == '#'
16077 || (TARGET_32BIT && (code == '?'))
16078 || (TARGET_THUMB2 && (code == '!'))
16079 || (TARGET_THUMB && (code == '_')));
16082 /* Target hook for assembling integer objects. The ARM version needs to
16083 handle word-sized values specially. */
16085 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16087 enum machine_mode mode;
16089 if (size == UNITS_PER_WORD && aligned_p)
16091 fputs ("\t.word\t", asm_out_file);
16092 output_addr_const (asm_out_file, x);
16094 /* Mark symbols as position independent. We only do this in the
16095 .text segment, not in the .data segment. */
16096 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16097 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16099 /* See legitimize_pic_address for an explanation of the
16100 TARGET_VXWORKS_RTP check. */
16101 if (TARGET_VXWORKS_RTP
16102 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16103 fputs ("(GOT)", asm_out_file);
16105 fputs ("(GOTOFF)", asm_out_file);
16107 fputc ('\n', asm_out_file);
16111 mode = GET_MODE (x);
16113 if (arm_vector_mode_supported_p (mode))
16117 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16119 units = CONST_VECTOR_NUNITS (x);
16120 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16122 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16123 for (i = 0; i < units; i++)
16125 rtx elt = CONST_VECTOR_ELT (x, i);
16127 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16130 for (i = 0; i < units; i++)
16132 rtx elt = CONST_VECTOR_ELT (x, i);
16133 REAL_VALUE_TYPE rval;
16135 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16138 (rval, GET_MODE_INNER (mode),
16139 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16145 return default_assemble_integer (x, size, aligned_p);
16149 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16153 if (!TARGET_AAPCS_BASED)
16156 default_named_section_asm_out_constructor
16157 : default_named_section_asm_out_destructor) (symbol, priority);
16161 /* Put these in the .init_array section, using a special relocation. */
16162 if (priority != DEFAULT_INIT_PRIORITY)
16165 sprintf (buf, "%s.%.5u",
16166 is_ctor ? ".init_array" : ".fini_array",
16168 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16175 switch_to_section (s);
16176 assemble_align (POINTER_SIZE);
16177 fputs ("\t.word\t", asm_out_file);
16178 output_addr_const (asm_out_file, symbol);
16179 fputs ("(target1)\n", asm_out_file);
16182 /* Add a function to the list of static constructors. */
16185 arm_elf_asm_constructor (rtx symbol, int priority)
16187 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16190 /* Add a function to the list of static destructors. */
16193 arm_elf_asm_destructor (rtx symbol, int priority)
16195 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16198 /* A finite state machine takes care of noticing whether or not instructions
16199 can be conditionally executed, and thus decrease execution time and code
16200 size by deleting branch instructions. The fsm is controlled by
16201 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16203 /* The state of the fsm controlling condition codes are:
16204 0: normal, do nothing special
16205 1: make ASM_OUTPUT_OPCODE not output this instruction
16206 2: make ASM_OUTPUT_OPCODE not output this instruction
16207 3: make instructions conditional
16208 4: make instructions conditional
16210 State transitions (state->state by whom under condition):
16211 0 -> 1 final_prescan_insn if the `target' is a label
16212 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16213 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16214 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16215 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16216 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16217 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16218 (the target insn is arm_target_insn).
16220 If the jump clobbers the conditions then we use states 2 and 4.
16222 A similar thing can be done with conditional return insns.
16224 XXX In case the `target' is an unconditional branch, this conditionalising
16225 of the instructions always reduces code size, but not always execution
16226 time. But then, I want to reduce the code size to somewhere near what
16227 /bin/cc produces. */
16229 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16230 instructions. When a COND_EXEC instruction is seen the subsequent
16231 instructions are scanned so that multiple conditional instructions can be
16232 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16233 specify the length and true/false mask for the IT block. These will be
16234 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16236 /* Returns the index of the ARM condition code string in
16237 `arm_condition_codes'. COMPARISON should be an rtx like
16238 `(eq (...) (...))'. */
16239 static enum arm_cond_code
16240 get_arm_condition_code (rtx comparison)
16242 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16243 enum arm_cond_code code;
16244 enum rtx_code comp_code = GET_CODE (comparison);
16246 if (GET_MODE_CLASS (mode) != MODE_CC)
16247 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16248 XEXP (comparison, 1));
16252 case CC_DNEmode: code = ARM_NE; goto dominance;
16253 case CC_DEQmode: code = ARM_EQ; goto dominance;
16254 case CC_DGEmode: code = ARM_GE; goto dominance;
16255 case CC_DGTmode: code = ARM_GT; goto dominance;
16256 case CC_DLEmode: code = ARM_LE; goto dominance;
16257 case CC_DLTmode: code = ARM_LT; goto dominance;
16258 case CC_DGEUmode: code = ARM_CS; goto dominance;
16259 case CC_DGTUmode: code = ARM_HI; goto dominance;
16260 case CC_DLEUmode: code = ARM_LS; goto dominance;
16261 case CC_DLTUmode: code = ARM_CC;
16264 gcc_assert (comp_code == EQ || comp_code == NE);
16266 if (comp_code == EQ)
16267 return ARM_INVERSE_CONDITION_CODE (code);
16273 case NE: return ARM_NE;
16274 case EQ: return ARM_EQ;
16275 case GE: return ARM_PL;
16276 case LT: return ARM_MI;
16277 default: gcc_unreachable ();
16283 case NE: return ARM_NE;
16284 case EQ: return ARM_EQ;
16285 default: gcc_unreachable ();
16291 case NE: return ARM_MI;
16292 case EQ: return ARM_PL;
16293 default: gcc_unreachable ();
16298 /* These encodings assume that AC=1 in the FPA system control
16299 byte. This allows us to handle all cases except UNEQ and
16303 case GE: return ARM_GE;
16304 case GT: return ARM_GT;
16305 case LE: return ARM_LS;
16306 case LT: return ARM_MI;
16307 case NE: return ARM_NE;
16308 case EQ: return ARM_EQ;
16309 case ORDERED: return ARM_VC;
16310 case UNORDERED: return ARM_VS;
16311 case UNLT: return ARM_LT;
16312 case UNLE: return ARM_LE;
16313 case UNGT: return ARM_HI;
16314 case UNGE: return ARM_PL;
16315 /* UNEQ and LTGT do not have a representation. */
16316 case UNEQ: /* Fall through. */
16317 case LTGT: /* Fall through. */
16318 default: gcc_unreachable ();
16324 case NE: return ARM_NE;
16325 case EQ: return ARM_EQ;
16326 case GE: return ARM_LE;
16327 case GT: return ARM_LT;
16328 case LE: return ARM_GE;
16329 case LT: return ARM_GT;
16330 case GEU: return ARM_LS;
16331 case GTU: return ARM_CC;
16332 case LEU: return ARM_CS;
16333 case LTU: return ARM_HI;
16334 default: gcc_unreachable ();
16340 case LTU: return ARM_CS;
16341 case GEU: return ARM_CC;
16342 default: gcc_unreachable ();
16348 case NE: return ARM_NE;
16349 case EQ: return ARM_EQ;
16350 case GEU: return ARM_CS;
16351 case GTU: return ARM_HI;
16352 case LEU: return ARM_LS;
16353 case LTU: return ARM_CC;
16354 default: gcc_unreachable ();
16360 case GE: return ARM_GE;
16361 case LT: return ARM_LT;
16362 case GEU: return ARM_CS;
16363 case LTU: return ARM_CC;
16364 default: gcc_unreachable ();
16370 case NE: return ARM_NE;
16371 case EQ: return ARM_EQ;
16372 case GE: return ARM_GE;
16373 case GT: return ARM_GT;
16374 case LE: return ARM_LE;
16375 case LT: return ARM_LT;
16376 case GEU: return ARM_CS;
16377 case GTU: return ARM_HI;
16378 case LEU: return ARM_LS;
16379 case LTU: return ARM_CC;
16380 default: gcc_unreachable ();
16383 default: gcc_unreachable ();
16387 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16390 thumb2_final_prescan_insn (rtx insn)
16392 rtx first_insn = insn;
16393 rtx body = PATTERN (insn);
16395 enum arm_cond_code code;
16399 /* Remove the previous insn from the count of insns to be output. */
16400 if (arm_condexec_count)
16401 arm_condexec_count--;
16403 /* Nothing to do if we are already inside a conditional block. */
16404 if (arm_condexec_count)
16407 if (GET_CODE (body) != COND_EXEC)
16410 /* Conditional jumps are implemented directly. */
16411 if (GET_CODE (insn) == JUMP_INSN)
16414 predicate = COND_EXEC_TEST (body);
16415 arm_current_cc = get_arm_condition_code (predicate);
16417 n = get_attr_ce_count (insn);
16418 arm_condexec_count = 1;
16419 arm_condexec_mask = (1 << n) - 1;
16420 arm_condexec_masklen = n;
16421 /* See if subsequent instructions can be combined into the same block. */
16424 insn = next_nonnote_insn (insn);
16426 /* Jumping into the middle of an IT block is illegal, so a label or
16427 barrier terminates the block. */
16428 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16431 body = PATTERN (insn);
16432 /* USE and CLOBBER aren't really insns, so just skip them. */
16433 if (GET_CODE (body) == USE
16434 || GET_CODE (body) == CLOBBER)
16437 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16438 if (GET_CODE (body) != COND_EXEC)
16440 /* Allow up to 4 conditionally executed instructions in a block. */
16441 n = get_attr_ce_count (insn);
16442 if (arm_condexec_masklen + n > 4)
16445 predicate = COND_EXEC_TEST (body);
16446 code = get_arm_condition_code (predicate);
16447 mask = (1 << n) - 1;
16448 if (arm_current_cc == code)
16449 arm_condexec_mask |= (mask << arm_condexec_masklen);
16450 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16453 arm_condexec_count++;
16454 arm_condexec_masklen += n;
16456 /* A jump must be the last instruction in a conditional block. */
16457 if (GET_CODE(insn) == JUMP_INSN)
16460 /* Restore recog_data (getting the attributes of other insns can
16461 destroy this array, but final.c assumes that it remains intact
16462 across this call). */
16463 extract_constrain_insn_cached (first_insn);
16467 arm_final_prescan_insn (rtx insn)
16469 /* BODY will hold the body of INSN. */
16470 rtx body = PATTERN (insn);
16472 /* This will be 1 if trying to repeat the trick, and things need to be
16473 reversed if it appears to fail. */
16476 /* If we start with a return insn, we only succeed if we find another one. */
16477 int seeking_return = 0;
16479 /* START_INSN will hold the insn from where we start looking. This is the
16480 first insn after the following code_label if REVERSE is true. */
16481 rtx start_insn = insn;
16483 /* If in state 4, check if the target branch is reached, in order to
16484 change back to state 0. */
16485 if (arm_ccfsm_state == 4)
16487 if (insn == arm_target_insn)
16489 arm_target_insn = NULL;
16490 arm_ccfsm_state = 0;
16495 /* If in state 3, it is possible to repeat the trick, if this insn is an
16496 unconditional branch to a label, and immediately following this branch
16497 is the previous target label which is only used once, and the label this
16498 branch jumps to is not too far off. */
16499 if (arm_ccfsm_state == 3)
16501 if (simplejump_p (insn))
16503 start_insn = next_nonnote_insn (start_insn);
16504 if (GET_CODE (start_insn) == BARRIER)
16506 /* XXX Isn't this always a barrier? */
16507 start_insn = next_nonnote_insn (start_insn);
16509 if (GET_CODE (start_insn) == CODE_LABEL
16510 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16511 && LABEL_NUSES (start_insn) == 1)
16516 else if (GET_CODE (body) == RETURN)
16518 start_insn = next_nonnote_insn (start_insn);
16519 if (GET_CODE (start_insn) == BARRIER)
16520 start_insn = next_nonnote_insn (start_insn);
16521 if (GET_CODE (start_insn) == CODE_LABEL
16522 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16523 && LABEL_NUSES (start_insn) == 1)
16526 seeking_return = 1;
16535 gcc_assert (!arm_ccfsm_state || reverse);
16536 if (GET_CODE (insn) != JUMP_INSN)
16539 /* This jump might be paralleled with a clobber of the condition codes
16540 the jump should always come first */
16541 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16542 body = XVECEXP (body, 0, 0);
16545 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16546 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16549 int fail = FALSE, succeed = FALSE;
16550 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16551 int then_not_else = TRUE;
16552 rtx this_insn = start_insn, label = 0;
16554 /* Register the insn jumped to. */
16557 if (!seeking_return)
16558 label = XEXP (SET_SRC (body), 0);
16560 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16561 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16562 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16564 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16565 then_not_else = FALSE;
16567 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16568 seeking_return = 1;
16569 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16571 seeking_return = 1;
16572 then_not_else = FALSE;
16575 gcc_unreachable ();
16577 /* See how many insns this branch skips, and what kind of insns. If all
16578 insns are okay, and the label or unconditional branch to the same
16579 label is not too far away, succeed. */
16580 for (insns_skipped = 0;
16581 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16585 this_insn = next_nonnote_insn (this_insn);
16589 switch (GET_CODE (this_insn))
16592 /* Succeed if it is the target label, otherwise fail since
16593 control falls in from somewhere else. */
16594 if (this_insn == label)
16596 arm_ccfsm_state = 1;
16604 /* Succeed if the following insn is the target label.
16606 If return insns are used then the last insn in a function
16607 will be a barrier. */
16608 this_insn = next_nonnote_insn (this_insn);
16609 if (this_insn && this_insn == label)
16611 arm_ccfsm_state = 1;
16619 /* The AAPCS says that conditional calls should not be
16620 used since they make interworking inefficient (the
16621 linker can't transform BL<cond> into BLX). That's
16622 only a problem if the machine has BLX. */
16629 /* Succeed if the following insn is the target label, or
16630 if the following two insns are a barrier and the
16632 this_insn = next_nonnote_insn (this_insn);
16633 if (this_insn && GET_CODE (this_insn) == BARRIER)
16634 this_insn = next_nonnote_insn (this_insn);
16636 if (this_insn && this_insn == label
16637 && insns_skipped < max_insns_skipped)
16639 arm_ccfsm_state = 1;
16647 /* If this is an unconditional branch to the same label, succeed.
16648 If it is to another label, do nothing. If it is conditional,
16650 /* XXX Probably, the tests for SET and the PC are
16653 scanbody = PATTERN (this_insn);
16654 if (GET_CODE (scanbody) == SET
16655 && GET_CODE (SET_DEST (scanbody)) == PC)
16657 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16658 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16660 arm_ccfsm_state = 2;
16663 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16666 /* Fail if a conditional return is undesirable (e.g. on a
16667 StrongARM), but still allow this if optimizing for size. */
16668 else if (GET_CODE (scanbody) == RETURN
16669 && !use_return_insn (TRUE, NULL)
16672 else if (GET_CODE (scanbody) == RETURN
16675 arm_ccfsm_state = 2;
16678 else if (GET_CODE (scanbody) == PARALLEL)
16680 switch (get_attr_conds (this_insn))
16690 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16695 /* Instructions using or affecting the condition codes make it
16697 scanbody = PATTERN (this_insn);
16698 if (!(GET_CODE (scanbody) == SET
16699 || GET_CODE (scanbody) == PARALLEL)
16700 || get_attr_conds (this_insn) != CONDS_NOCOND)
16703 /* A conditional cirrus instruction must be followed by
16704 a non Cirrus instruction. However, since we
16705 conditionalize instructions in this function and by
16706 the time we get here we can't add instructions
16707 (nops), because shorten_branches() has already been
16708 called, we will disable conditionalizing Cirrus
16709 instructions to be safe. */
16710 if (GET_CODE (scanbody) != USE
16711 && GET_CODE (scanbody) != CLOBBER
16712 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16722 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16723 arm_target_label = CODE_LABEL_NUMBER (label);
16726 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16728 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16730 this_insn = next_nonnote_insn (this_insn);
16731 gcc_assert (!this_insn
16732 || (GET_CODE (this_insn) != BARRIER
16733 && GET_CODE (this_insn) != CODE_LABEL));
16737 /* Oh, dear! we ran off the end.. give up. */
16738 extract_constrain_insn_cached (insn);
16739 arm_ccfsm_state = 0;
16740 arm_target_insn = NULL;
16743 arm_target_insn = this_insn;
16746 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16749 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16751 if (reverse || then_not_else)
16752 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16755 /* Restore recog_data (getting the attributes of other insns can
16756 destroy this array, but final.c assumes that it remains intact
16757 across this call. */
16758 extract_constrain_insn_cached (insn);
16762 /* Output IT instructions. */
16764 thumb2_asm_output_opcode (FILE * stream)
16769 if (arm_condexec_mask)
16771 for (n = 0; n < arm_condexec_masklen; n++)
16772 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16774 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16775 arm_condition_codes[arm_current_cc]);
16776 arm_condexec_mask = 0;
16780 /* Returns true if REGNO is a valid register
16781 for holding a quantity of type MODE. */
16783 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16785 if (GET_MODE_CLASS (mode) == MODE_CC)
16786 return (regno == CC_REGNUM
16787 || (TARGET_HARD_FLOAT && TARGET_VFP
16788 && regno == VFPCC_REGNUM));
16791 /* For the Thumb we only allow values bigger than SImode in
16792 registers 0 - 6, so that there is always a second low
16793 register available to hold the upper part of the value.
16794 We probably we ought to ensure that the register is the
16795 start of an even numbered register pair. */
16796 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16798 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16799 && IS_CIRRUS_REGNUM (regno))
16800 /* We have outlawed SI values in Cirrus registers because they
16801 reside in the lower 32 bits, but SF values reside in the
16802 upper 32 bits. This causes gcc all sorts of grief. We can't
16803 even split the registers into pairs because Cirrus SI values
16804 get sign extended to 64bits-- aldyh. */
16805 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16807 if (TARGET_HARD_FLOAT && TARGET_VFP
16808 && IS_VFP_REGNUM (regno))
16810 if (mode == SFmode || mode == SImode)
16811 return VFP_REGNO_OK_FOR_SINGLE (regno);
16813 if (mode == DFmode)
16814 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16816 /* VFP registers can hold HFmode values, but there is no point in
16817 putting them there unless we have hardware conversion insns. */
16818 if (mode == HFmode)
16819 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16822 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16823 || (VALID_NEON_QREG_MODE (mode)
16824 && NEON_REGNO_OK_FOR_QUAD (regno))
16825 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16826 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16827 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16828 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16829 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16834 if (TARGET_REALLY_IWMMXT)
16836 if (IS_IWMMXT_GR_REGNUM (regno))
16837 return mode == SImode;
16839 if (IS_IWMMXT_REGNUM (regno))
16840 return VALID_IWMMXT_REG_MODE (mode);
16843 /* We allow almost any value to be stored in the general registers.
16844 Restrict doubleword quantities to even register pairs so that we can
16845 use ldrd. Do not allow very large Neon structure opaque modes in
16846 general registers; they would use too many. */
16847 if (regno <= LAST_ARM_REGNUM)
16848 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16849 && ARM_NUM_REGS (mode) <= 4;
16851 if (regno == FRAME_POINTER_REGNUM
16852 || regno == ARG_POINTER_REGNUM)
16853 /* We only allow integers in the fake hard registers. */
16854 return GET_MODE_CLASS (mode) == MODE_INT;
16856 /* The only registers left are the FPA registers
16857 which we only allow to hold FP values. */
16858 return (TARGET_HARD_FLOAT && TARGET_FPA
16859 && GET_MODE_CLASS (mode) == MODE_FLOAT
16860 && regno >= FIRST_FPA_REGNUM
16861 && regno <= LAST_FPA_REGNUM);
16864 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16865 not used in arm mode. */
16868 arm_regno_class (int regno)
16872 if (regno == STACK_POINTER_REGNUM)
16874 if (regno == CC_REGNUM)
16881 if (TARGET_THUMB2 && regno < 8)
16884 if ( regno <= LAST_ARM_REGNUM
16885 || regno == FRAME_POINTER_REGNUM
16886 || regno == ARG_POINTER_REGNUM)
16887 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16889 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16890 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16892 if (IS_CIRRUS_REGNUM (regno))
16893 return CIRRUS_REGS;
16895 if (IS_VFP_REGNUM (regno))
16897 if (regno <= D7_VFP_REGNUM)
16898 return VFP_D0_D7_REGS;
16899 else if (regno <= LAST_LO_VFP_REGNUM)
16900 return VFP_LO_REGS;
16902 return VFP_HI_REGS;
16905 if (IS_IWMMXT_REGNUM (regno))
16906 return IWMMXT_REGS;
16908 if (IS_IWMMXT_GR_REGNUM (regno))
16909 return IWMMXT_GR_REGS;
16914 /* Handle a special case when computing the offset
16915 of an argument from the frame pointer. */
16917 arm_debugger_arg_offset (int value, rtx addr)
16921 /* We are only interested if dbxout_parms() failed to compute the offset. */
16925 /* We can only cope with the case where the address is held in a register. */
16926 if (GET_CODE (addr) != REG)
16929 /* If we are using the frame pointer to point at the argument, then
16930 an offset of 0 is correct. */
16931 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16934 /* If we are using the stack pointer to point at the
16935 argument, then an offset of 0 is correct. */
16936 /* ??? Check this is consistent with thumb2 frame layout. */
16937 if ((TARGET_THUMB || !frame_pointer_needed)
16938 && REGNO (addr) == SP_REGNUM)
16941 /* Oh dear. The argument is pointed to by a register rather
16942 than being held in a register, or being stored at a known
16943 offset from the frame pointer. Since GDB only understands
16944 those two kinds of argument we must translate the address
16945 held in the register into an offset from the frame pointer.
16946 We do this by searching through the insns for the function
16947 looking to see where this register gets its value. If the
16948 register is initialized from the frame pointer plus an offset
16949 then we are in luck and we can continue, otherwise we give up.
16951 This code is exercised by producing debugging information
16952 for a function with arguments like this:
16954 double func (double a, double b, int c, double d) {return d;}
16956 Without this code the stab for parameter 'd' will be set to
16957 an offset of 0 from the frame pointer, rather than 8. */
16959 /* The if() statement says:
16961 If the insn is a normal instruction
16962 and if the insn is setting the value in a register
16963 and if the register being set is the register holding the address of the argument
16964 and if the address is computing by an addition
16965 that involves adding to a register
16966 which is the frame pointer
16971 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16973 if ( GET_CODE (insn) == INSN
16974 && GET_CODE (PATTERN (insn)) == SET
16975 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16976 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16977 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16978 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16979 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16982 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16991 warning (0, "unable to compute real location of stacked parameter");
16992 value = 8; /* XXX magic hack */
16998 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17001 if ((MASK) & insn_flags) \
17002 add_builtin_function ((NAME), (TYPE), (CODE), \
17003 BUILT_IN_MD, NULL, NULL_TREE); \
17007 struct builtin_description
17009 const unsigned int mask;
17010 const enum insn_code icode;
17011 const char * const name;
17012 const enum arm_builtins code;
17013 const enum rtx_code comparison;
17014 const unsigned int flag;
17017 static const struct builtin_description bdesc_2arg[] =
17019 #define IWMMXT_BUILTIN(code, string, builtin) \
17020 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17021 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17023 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17024 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17025 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17026 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17027 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17028 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17029 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17030 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17031 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17032 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17033 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17034 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17035 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17036 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17037 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17038 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17039 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17040 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17041 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17042 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17043 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17044 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17045 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17046 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17047 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17048 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17049 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17050 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17051 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17052 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17053 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17054 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17055 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17056 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17057 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17058 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17059 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17060 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17061 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17062 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17063 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17064 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17065 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17066 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17067 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17068 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17069 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17070 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17071 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17072 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17073 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17074 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17075 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17076 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17077 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17078 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17079 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17080 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17082 #define IWMMXT_BUILTIN2(code, builtin) \
17083 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17085 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17086 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17087 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17088 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17089 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17090 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17091 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17092 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17093 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17094 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17095 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17096 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17097 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17098 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17099 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17100 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17101 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17102 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17103 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17104 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17105 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17106 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17107 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17108 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17109 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17110 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17111 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17112 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17113 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17114 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17115 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17116 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17119 static const struct builtin_description bdesc_1arg[] =
17121 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17122 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17123 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17124 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17125 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17126 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17127 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17128 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17129 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17130 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17131 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17132 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17133 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17134 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17135 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17136 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17137 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17138 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17141 /* Set up all the iWMMXt builtins. This is
17142 not called if TARGET_IWMMXT is zero. */
17145 arm_init_iwmmxt_builtins (void)
17147 const struct builtin_description * d;
17149 tree endlink = void_list_node;
17151 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17152 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17153 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17156 = build_function_type (integer_type_node,
17157 tree_cons (NULL_TREE, integer_type_node, endlink));
17158 tree v8qi_ftype_v8qi_v8qi_int
17159 = build_function_type (V8QI_type_node,
17160 tree_cons (NULL_TREE, V8QI_type_node,
17161 tree_cons (NULL_TREE, V8QI_type_node,
17162 tree_cons (NULL_TREE,
17165 tree v4hi_ftype_v4hi_int
17166 = build_function_type (V4HI_type_node,
17167 tree_cons (NULL_TREE, V4HI_type_node,
17168 tree_cons (NULL_TREE, integer_type_node,
17170 tree v2si_ftype_v2si_int
17171 = build_function_type (V2SI_type_node,
17172 tree_cons (NULL_TREE, V2SI_type_node,
17173 tree_cons (NULL_TREE, integer_type_node,
17175 tree v2si_ftype_di_di
17176 = build_function_type (V2SI_type_node,
17177 tree_cons (NULL_TREE, long_long_integer_type_node,
17178 tree_cons (NULL_TREE, long_long_integer_type_node,
17180 tree di_ftype_di_int
17181 = build_function_type (long_long_integer_type_node,
17182 tree_cons (NULL_TREE, long_long_integer_type_node,
17183 tree_cons (NULL_TREE, integer_type_node,
17185 tree di_ftype_di_int_int
17186 = build_function_type (long_long_integer_type_node,
17187 tree_cons (NULL_TREE, long_long_integer_type_node,
17188 tree_cons (NULL_TREE, integer_type_node,
17189 tree_cons (NULL_TREE,
17192 tree int_ftype_v8qi
17193 = build_function_type (integer_type_node,
17194 tree_cons (NULL_TREE, V8QI_type_node,
17196 tree int_ftype_v4hi
17197 = build_function_type (integer_type_node,
17198 tree_cons (NULL_TREE, V4HI_type_node,
17200 tree int_ftype_v2si
17201 = build_function_type (integer_type_node,
17202 tree_cons (NULL_TREE, V2SI_type_node,
17204 tree int_ftype_v8qi_int
17205 = build_function_type (integer_type_node,
17206 tree_cons (NULL_TREE, V8QI_type_node,
17207 tree_cons (NULL_TREE, integer_type_node,
17209 tree int_ftype_v4hi_int
17210 = build_function_type (integer_type_node,
17211 tree_cons (NULL_TREE, V4HI_type_node,
17212 tree_cons (NULL_TREE, integer_type_node,
17214 tree int_ftype_v2si_int
17215 = build_function_type (integer_type_node,
17216 tree_cons (NULL_TREE, V2SI_type_node,
17217 tree_cons (NULL_TREE, integer_type_node,
17219 tree v8qi_ftype_v8qi_int_int
17220 = build_function_type (V8QI_type_node,
17221 tree_cons (NULL_TREE, V8QI_type_node,
17222 tree_cons (NULL_TREE, integer_type_node,
17223 tree_cons (NULL_TREE,
17226 tree v4hi_ftype_v4hi_int_int
17227 = build_function_type (V4HI_type_node,
17228 tree_cons (NULL_TREE, V4HI_type_node,
17229 tree_cons (NULL_TREE, integer_type_node,
17230 tree_cons (NULL_TREE,
17233 tree v2si_ftype_v2si_int_int
17234 = build_function_type (V2SI_type_node,
17235 tree_cons (NULL_TREE, V2SI_type_node,
17236 tree_cons (NULL_TREE, integer_type_node,
17237 tree_cons (NULL_TREE,
17240 /* Miscellaneous. */
17241 tree v8qi_ftype_v4hi_v4hi
17242 = build_function_type (V8QI_type_node,
17243 tree_cons (NULL_TREE, V4HI_type_node,
17244 tree_cons (NULL_TREE, V4HI_type_node,
17246 tree v4hi_ftype_v2si_v2si
17247 = build_function_type (V4HI_type_node,
17248 tree_cons (NULL_TREE, V2SI_type_node,
17249 tree_cons (NULL_TREE, V2SI_type_node,
17251 tree v2si_ftype_v4hi_v4hi
17252 = build_function_type (V2SI_type_node,
17253 tree_cons (NULL_TREE, V4HI_type_node,
17254 tree_cons (NULL_TREE, V4HI_type_node,
17256 tree v2si_ftype_v8qi_v8qi
17257 = build_function_type (V2SI_type_node,
17258 tree_cons (NULL_TREE, V8QI_type_node,
17259 tree_cons (NULL_TREE, V8QI_type_node,
17261 tree v4hi_ftype_v4hi_di
17262 = build_function_type (V4HI_type_node,
17263 tree_cons (NULL_TREE, V4HI_type_node,
17264 tree_cons (NULL_TREE,
17265 long_long_integer_type_node,
17267 tree v2si_ftype_v2si_di
17268 = build_function_type (V2SI_type_node,
17269 tree_cons (NULL_TREE, V2SI_type_node,
17270 tree_cons (NULL_TREE,
17271 long_long_integer_type_node,
17273 tree void_ftype_int_int
17274 = build_function_type (void_type_node,
17275 tree_cons (NULL_TREE, integer_type_node,
17276 tree_cons (NULL_TREE, integer_type_node,
17279 = build_function_type (long_long_unsigned_type_node, endlink);
17281 = build_function_type (long_long_integer_type_node,
17282 tree_cons (NULL_TREE, V8QI_type_node,
17285 = build_function_type (long_long_integer_type_node,
17286 tree_cons (NULL_TREE, V4HI_type_node,
17289 = build_function_type (long_long_integer_type_node,
17290 tree_cons (NULL_TREE, V2SI_type_node,
17292 tree v2si_ftype_v4hi
17293 = build_function_type (V2SI_type_node,
17294 tree_cons (NULL_TREE, V4HI_type_node,
17296 tree v4hi_ftype_v8qi
17297 = build_function_type (V4HI_type_node,
17298 tree_cons (NULL_TREE, V8QI_type_node,
17301 tree di_ftype_di_v4hi_v4hi
17302 = build_function_type (long_long_unsigned_type_node,
17303 tree_cons (NULL_TREE,
17304 long_long_unsigned_type_node,
17305 tree_cons (NULL_TREE, V4HI_type_node,
17306 tree_cons (NULL_TREE,
17310 tree di_ftype_v4hi_v4hi
17311 = build_function_type (long_long_unsigned_type_node,
17312 tree_cons (NULL_TREE, V4HI_type_node,
17313 tree_cons (NULL_TREE, V4HI_type_node,
17316 /* Normal vector binops. */
17317 tree v8qi_ftype_v8qi_v8qi
17318 = build_function_type (V8QI_type_node,
17319 tree_cons (NULL_TREE, V8QI_type_node,
17320 tree_cons (NULL_TREE, V8QI_type_node,
17322 tree v4hi_ftype_v4hi_v4hi
17323 = build_function_type (V4HI_type_node,
17324 tree_cons (NULL_TREE, V4HI_type_node,
17325 tree_cons (NULL_TREE, V4HI_type_node,
17327 tree v2si_ftype_v2si_v2si
17328 = build_function_type (V2SI_type_node,
17329 tree_cons (NULL_TREE, V2SI_type_node,
17330 tree_cons (NULL_TREE, V2SI_type_node,
17332 tree di_ftype_di_di
17333 = build_function_type (long_long_unsigned_type_node,
17334 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17335 tree_cons (NULL_TREE,
17336 long_long_unsigned_type_node,
17339 /* Add all builtins that are more or less simple operations on two
17341 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17343 /* Use one of the operands; the target can have a different mode for
17344 mask-generating compares. */
17345 enum machine_mode mode;
17351 mode = insn_data[d->icode].operand[1].mode;
17356 type = v8qi_ftype_v8qi_v8qi;
17359 type = v4hi_ftype_v4hi_v4hi;
17362 type = v2si_ftype_v2si_v2si;
17365 type = di_ftype_di_di;
17369 gcc_unreachable ();
17372 def_mbuiltin (d->mask, d->name, type, d->code);
17375 /* Add the remaining MMX insns with somewhat more complicated types. */
17376 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17377 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17378 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17380 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17381 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17382 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17383 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17384 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17385 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17387 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17388 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17389 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17390 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17391 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17392 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17394 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17395 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17396 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17397 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17398 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17401 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17405 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17412 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17468 arm_init_tls_builtins (void)
17472 ftype = build_function_type (ptr_type_node, void_list_node);
17473 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17474 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17476 TREE_NOTHROW (decl) = 1;
17477 TREE_READONLY (decl) = 1;
17480 enum neon_builtin_type_bits {
17496 #define v8qi_UP T_V8QI
17497 #define v4hi_UP T_V4HI
17498 #define v2si_UP T_V2SI
17499 #define v2sf_UP T_V2SF
17501 #define v16qi_UP T_V16QI
17502 #define v8hi_UP T_V8HI
17503 #define v4si_UP T_V4SI
17504 #define v4sf_UP T_V4SF
17505 #define v2di_UP T_V2DI
17510 #define UP(X) X##_UP
17545 NEON_LOADSTRUCTLANE,
17547 NEON_STORESTRUCTLANE,
17556 const neon_itype itype;
17558 const enum insn_code codes[T_MAX];
17559 const unsigned int num_vars;
17560 unsigned int base_fcode;
17561 } neon_builtin_datum;
17563 #define CF(N,X) CODE_FOR_neon_##N##X
17565 #define VAR1(T, N, A) \
17566 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17567 #define VAR2(T, N, A, B) \
17568 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17569 #define VAR3(T, N, A, B, C) \
17570 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17571 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17572 #define VAR4(T, N, A, B, C, D) \
17573 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17574 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17575 #define VAR5(T, N, A, B, C, D, E) \
17576 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17577 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17578 #define VAR6(T, N, A, B, C, D, E, F) \
17579 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17580 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17581 #define VAR7(T, N, A, B, C, D, E, F, G) \
17582 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17583 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17585 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17586 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17588 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17589 CF (N, G), CF (N, H) }, 8, 0
17590 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17591 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17592 | UP (H) | UP (I), \
17593 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17594 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17595 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17596 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17597 | UP (H) | UP (I) | UP (J), \
17598 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17599 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17601 /* The mode entries in the following table correspond to the "key" type of the
17602 instruction variant, i.e. equivalent to that which would be specified after
17603 the assembler mnemonic, which usually refers to the last vector operand.
17604 (Signed/unsigned/polynomial types are not differentiated between though, and
17605 are all mapped onto the same mode for a given element size.) The modes
17606 listed per instruction should be the same as those defined for that
17607 instruction's pattern in neon.md.
17608 WARNING: Variants should be listed in the same increasing order as
17609 neon_builtin_type_bits. */
17611 static neon_builtin_datum neon_builtin_data[] =
17613 { VAR10 (BINOP, vadd,
17614 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17615 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17616 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17617 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17618 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17619 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17620 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17621 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17622 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17623 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17624 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17625 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17626 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17627 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17628 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17629 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17630 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17631 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17632 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17633 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17634 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17635 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17636 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17637 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17638 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17639 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17640 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17641 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17642 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17643 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17644 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17645 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17646 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17647 { VAR10 (BINOP, vsub,
17648 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17649 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17650 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17651 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17652 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17653 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17654 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17655 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17656 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17657 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17658 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17659 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17660 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17661 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17662 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17663 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17664 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17665 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17666 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17667 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17668 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17669 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17670 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17671 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17672 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17673 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17674 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17675 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17676 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17677 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17678 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17679 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17680 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17681 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17682 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17683 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17684 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17685 /* FIXME: vget_lane supports more variants than this! */
17686 { VAR10 (GETLANE, vget_lane,
17687 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17688 { VAR10 (SETLANE, vset_lane,
17689 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17690 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17691 { VAR10 (DUP, vdup_n,
17692 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17693 { VAR10 (DUPLANE, vdup_lane,
17694 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17695 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17696 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17697 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17698 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17699 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17700 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17701 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17702 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17703 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17704 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17705 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17706 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17707 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17708 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17709 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17710 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17711 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17712 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17713 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17714 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17715 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17716 { VAR10 (BINOP, vext,
17717 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17718 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17719 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17720 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17721 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17722 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17723 { VAR10 (SELECT, vbsl,
17724 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17725 { VAR1 (VTBL, vtbl1, v8qi) },
17726 { VAR1 (VTBL, vtbl2, v8qi) },
17727 { VAR1 (VTBL, vtbl3, v8qi) },
17728 { VAR1 (VTBL, vtbl4, v8qi) },
17729 { VAR1 (VTBX, vtbx1, v8qi) },
17730 { VAR1 (VTBX, vtbx2, v8qi) },
17731 { VAR1 (VTBX, vtbx3, v8qi) },
17732 { VAR1 (VTBX, vtbx4, v8qi) },
17733 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17734 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17735 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17736 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17737 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17738 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17739 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17740 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17741 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17742 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17743 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17744 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17745 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17746 { VAR10 (LOAD1, vld1,
17747 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17748 { VAR10 (LOAD1LANE, vld1_lane,
17749 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17750 { VAR10 (LOAD1, vld1_dup,
17751 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17752 { VAR10 (STORE1, vst1,
17753 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17754 { VAR10 (STORE1LANE, vst1_lane,
17755 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17756 { VAR9 (LOADSTRUCT,
17757 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17758 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17759 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17760 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17761 { VAR9 (STORESTRUCT, vst2,
17762 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17763 { VAR7 (STORESTRUCTLANE, vst2_lane,
17764 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17765 { VAR9 (LOADSTRUCT,
17766 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17767 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17768 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17769 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17770 { VAR9 (STORESTRUCT, vst3,
17771 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17772 { VAR7 (STORESTRUCTLANE, vst3_lane,
17773 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17774 { VAR9 (LOADSTRUCT, vld4,
17775 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17776 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17777 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17778 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17779 { VAR9 (STORESTRUCT, vst4,
17780 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17781 { VAR7 (STORESTRUCTLANE, vst4_lane,
17782 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17783 { VAR10 (LOGICBINOP, vand,
17784 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17785 { VAR10 (LOGICBINOP, vorr,
17786 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17787 { VAR10 (BINOP, veor,
17788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17789 { VAR10 (LOGICBINOP, vbic,
17790 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17791 { VAR10 (LOGICBINOP, vorn,
17792 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17808 arm_init_neon_builtins (void)
17810 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17812 tree neon_intQI_type_node;
17813 tree neon_intHI_type_node;
17814 tree neon_polyQI_type_node;
17815 tree neon_polyHI_type_node;
17816 tree neon_intSI_type_node;
17817 tree neon_intDI_type_node;
17818 tree neon_float_type_node;
17820 tree intQI_pointer_node;
17821 tree intHI_pointer_node;
17822 tree intSI_pointer_node;
17823 tree intDI_pointer_node;
17824 tree float_pointer_node;
17826 tree const_intQI_node;
17827 tree const_intHI_node;
17828 tree const_intSI_node;
17829 tree const_intDI_node;
17830 tree const_float_node;
17832 tree const_intQI_pointer_node;
17833 tree const_intHI_pointer_node;
17834 tree const_intSI_pointer_node;
17835 tree const_intDI_pointer_node;
17836 tree const_float_pointer_node;
17838 tree V8QI_type_node;
17839 tree V4HI_type_node;
17840 tree V2SI_type_node;
17841 tree V2SF_type_node;
17842 tree V16QI_type_node;
17843 tree V8HI_type_node;
17844 tree V4SI_type_node;
17845 tree V4SF_type_node;
17846 tree V2DI_type_node;
17848 tree intUQI_type_node;
17849 tree intUHI_type_node;
17850 tree intUSI_type_node;
17851 tree intUDI_type_node;
17853 tree intEI_type_node;
17854 tree intOI_type_node;
17855 tree intCI_type_node;
17856 tree intXI_type_node;
17858 tree V8QI_pointer_node;
17859 tree V4HI_pointer_node;
17860 tree V2SI_pointer_node;
17861 tree V2SF_pointer_node;
17862 tree V16QI_pointer_node;
17863 tree V8HI_pointer_node;
17864 tree V4SI_pointer_node;
17865 tree V4SF_pointer_node;
17866 tree V2DI_pointer_node;
17868 tree void_ftype_pv8qi_v8qi_v8qi;
17869 tree void_ftype_pv4hi_v4hi_v4hi;
17870 tree void_ftype_pv2si_v2si_v2si;
17871 tree void_ftype_pv2sf_v2sf_v2sf;
17872 tree void_ftype_pdi_di_di;
17873 tree void_ftype_pv16qi_v16qi_v16qi;
17874 tree void_ftype_pv8hi_v8hi_v8hi;
17875 tree void_ftype_pv4si_v4si_v4si;
17876 tree void_ftype_pv4sf_v4sf_v4sf;
17877 tree void_ftype_pv2di_v2di_v2di;
17879 tree reinterp_ftype_dreg[5][5];
17880 tree reinterp_ftype_qreg[5][5];
17881 tree dreg_types[5], qreg_types[5];
17883 /* Create distinguished type nodes for NEON vector element types,
17884 and pointers to values of such types, so we can detect them later. */
17885 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17886 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17887 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17888 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17889 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17890 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17891 neon_float_type_node = make_node (REAL_TYPE);
17892 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17893 layout_type (neon_float_type_node);
17895 /* Define typedefs which exactly correspond to the modes we are basing vector
17896 types on. If you change these names you'll need to change
17897 the table used by arm_mangle_type too. */
17898 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17899 "__builtin_neon_qi");
17900 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17901 "__builtin_neon_hi");
17902 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17903 "__builtin_neon_si");
17904 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17905 "__builtin_neon_sf");
17906 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17907 "__builtin_neon_di");
17908 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17909 "__builtin_neon_poly8");
17910 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17911 "__builtin_neon_poly16");
17913 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17914 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17915 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17916 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17917 float_pointer_node = build_pointer_type (neon_float_type_node);
17919 /* Next create constant-qualified versions of the above types. */
17920 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17922 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17924 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17926 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17928 const_float_node = build_qualified_type (neon_float_type_node,
17931 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17932 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17933 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17934 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17935 const_float_pointer_node = build_pointer_type (const_float_node);
17937 /* Now create vector types based on our NEON element types. */
17938 /* 64-bit vectors. */
17940 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17942 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17944 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17946 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17947 /* 128-bit vectors. */
17949 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17951 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17953 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17955 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17957 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17959 /* Unsigned integer types for various mode sizes. */
17960 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17961 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17962 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17963 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17965 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17966 "__builtin_neon_uqi");
17967 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17968 "__builtin_neon_uhi");
17969 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17970 "__builtin_neon_usi");
17971 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17972 "__builtin_neon_udi");
17974 /* Opaque integer types for structures of vectors. */
17975 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17976 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17977 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17978 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17980 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17981 "__builtin_neon_ti");
17982 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17983 "__builtin_neon_ei");
17984 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17985 "__builtin_neon_oi");
17986 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17987 "__builtin_neon_ci");
17988 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17989 "__builtin_neon_xi");
17991 /* Pointers to vector types. */
17992 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17993 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17994 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17995 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17996 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17997 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17998 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17999 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18000 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18002 /* Operations which return results as pairs. */
18003 void_ftype_pv8qi_v8qi_v8qi =
18004 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18005 V8QI_type_node, NULL);
18006 void_ftype_pv4hi_v4hi_v4hi =
18007 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18008 V4HI_type_node, NULL);
18009 void_ftype_pv2si_v2si_v2si =
18010 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18011 V2SI_type_node, NULL);
18012 void_ftype_pv2sf_v2sf_v2sf =
18013 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18014 V2SF_type_node, NULL);
18015 void_ftype_pdi_di_di =
18016 build_function_type_list (void_type_node, intDI_pointer_node,
18017 neon_intDI_type_node, neon_intDI_type_node, NULL);
18018 void_ftype_pv16qi_v16qi_v16qi =
18019 build_function_type_list (void_type_node, V16QI_pointer_node,
18020 V16QI_type_node, V16QI_type_node, NULL);
18021 void_ftype_pv8hi_v8hi_v8hi =
18022 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18023 V8HI_type_node, NULL);
18024 void_ftype_pv4si_v4si_v4si =
18025 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18026 V4SI_type_node, NULL);
18027 void_ftype_pv4sf_v4sf_v4sf =
18028 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18029 V4SF_type_node, NULL);
18030 void_ftype_pv2di_v2di_v2di =
18031 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18032 V2DI_type_node, NULL);
18034 dreg_types[0] = V8QI_type_node;
18035 dreg_types[1] = V4HI_type_node;
18036 dreg_types[2] = V2SI_type_node;
18037 dreg_types[3] = V2SF_type_node;
18038 dreg_types[4] = neon_intDI_type_node;
18040 qreg_types[0] = V16QI_type_node;
18041 qreg_types[1] = V8HI_type_node;
18042 qreg_types[2] = V4SI_type_node;
18043 qreg_types[3] = V4SF_type_node;
18044 qreg_types[4] = V2DI_type_node;
18046 for (i = 0; i < 5; i++)
18049 for (j = 0; j < 5; j++)
18051 reinterp_ftype_dreg[i][j]
18052 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18053 reinterp_ftype_qreg[i][j]
18054 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18058 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18060 neon_builtin_datum *d = &neon_builtin_data[i];
18061 unsigned int j, codeidx = 0;
18063 d->base_fcode = fcode;
18065 for (j = 0; j < T_MAX; j++)
18067 const char* const modenames[] = {
18068 "v8qi", "v4hi", "v2si", "v2sf", "di",
18069 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18073 enum insn_code icode;
18074 int is_load = 0, is_store = 0;
18076 if ((d->bits & (1 << j)) == 0)
18079 icode = d->codes[codeidx++];
18084 case NEON_LOAD1LANE:
18085 case NEON_LOADSTRUCT:
18086 case NEON_LOADSTRUCTLANE:
18088 /* Fall through. */
18090 case NEON_STORE1LANE:
18091 case NEON_STORESTRUCT:
18092 case NEON_STORESTRUCTLANE:
18095 /* Fall through. */
18098 case NEON_LOGICBINOP:
18099 case NEON_SHIFTINSERT:
18106 case NEON_SHIFTIMM:
18107 case NEON_SHIFTACC:
18113 case NEON_LANEMULL:
18114 case NEON_LANEMULH:
18116 case NEON_SCALARMUL:
18117 case NEON_SCALARMULL:
18118 case NEON_SCALARMULH:
18119 case NEON_SCALARMAC:
18125 tree return_type = void_type_node, args = void_list_node;
18127 /* Build a function type directly from the insn_data for this
18128 builtin. The build_function_type() function takes care of
18129 removing duplicates for us. */
18130 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18134 if (is_load && k == 1)
18136 /* Neon load patterns always have the memory operand
18137 (a SImode pointer) in the operand 1 position. We
18138 want a const pointer to the element type in that
18140 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18146 eltype = const_intQI_pointer_node;
18151 eltype = const_intHI_pointer_node;
18156 eltype = const_intSI_pointer_node;
18161 eltype = const_float_pointer_node;
18166 eltype = const_intDI_pointer_node;
18169 default: gcc_unreachable ();
18172 else if (is_store && k == 0)
18174 /* Similarly, Neon store patterns use operand 0 as
18175 the memory location to store to (a SImode pointer).
18176 Use a pointer to the element type of the store in
18178 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18184 eltype = intQI_pointer_node;
18189 eltype = intHI_pointer_node;
18194 eltype = intSI_pointer_node;
18199 eltype = float_pointer_node;
18204 eltype = intDI_pointer_node;
18207 default: gcc_unreachable ();
18212 switch (insn_data[icode].operand[k].mode)
18214 case VOIDmode: eltype = void_type_node; break;
18216 case QImode: eltype = neon_intQI_type_node; break;
18217 case HImode: eltype = neon_intHI_type_node; break;
18218 case SImode: eltype = neon_intSI_type_node; break;
18219 case SFmode: eltype = neon_float_type_node; break;
18220 case DImode: eltype = neon_intDI_type_node; break;
18221 case TImode: eltype = intTI_type_node; break;
18222 case EImode: eltype = intEI_type_node; break;
18223 case OImode: eltype = intOI_type_node; break;
18224 case CImode: eltype = intCI_type_node; break;
18225 case XImode: eltype = intXI_type_node; break;
18226 /* 64-bit vectors. */
18227 case V8QImode: eltype = V8QI_type_node; break;
18228 case V4HImode: eltype = V4HI_type_node; break;
18229 case V2SImode: eltype = V2SI_type_node; break;
18230 case V2SFmode: eltype = V2SF_type_node; break;
18231 /* 128-bit vectors. */
18232 case V16QImode: eltype = V16QI_type_node; break;
18233 case V8HImode: eltype = V8HI_type_node; break;
18234 case V4SImode: eltype = V4SI_type_node; break;
18235 case V4SFmode: eltype = V4SF_type_node; break;
18236 case V2DImode: eltype = V2DI_type_node; break;
18237 default: gcc_unreachable ();
18241 if (k == 0 && !is_store)
18242 return_type = eltype;
18244 args = tree_cons (NULL_TREE, eltype, args);
18247 ftype = build_function_type (return_type, args);
18251 case NEON_RESULTPAIR:
18253 switch (insn_data[icode].operand[1].mode)
18255 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18256 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18257 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18258 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18259 case DImode: ftype = void_ftype_pdi_di_di; break;
18260 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18261 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18262 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18263 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18264 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18265 default: gcc_unreachable ();
18270 case NEON_REINTERP:
18272 /* We iterate over 5 doubleword types, then 5 quadword
18275 switch (insn_data[icode].operand[0].mode)
18277 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18278 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18279 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18280 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18281 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18282 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18283 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18284 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18285 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18286 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18287 default: gcc_unreachable ();
18293 gcc_unreachable ();
18296 gcc_assert (ftype != NULL);
18298 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18300 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18307 arm_init_fp16_builtins (void)
18309 tree fp16_type = make_node (REAL_TYPE);
18310 TYPE_PRECISION (fp16_type) = 16;
18311 layout_type (fp16_type);
18312 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18316 arm_init_builtins (void)
18318 arm_init_tls_builtins ();
18320 if (TARGET_REALLY_IWMMXT)
18321 arm_init_iwmmxt_builtins ();
18324 arm_init_neon_builtins ();
18326 if (arm_fp16_format)
18327 arm_init_fp16_builtins ();
18330 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18332 static const char *
18333 arm_invalid_parameter_type (const_tree t)
18335 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18336 return N_("function parameters cannot have __fp16 type");
18340 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18342 static const char *
18343 arm_invalid_return_type (const_tree t)
18345 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18346 return N_("functions cannot return __fp16 type");
18350 /* Implement TARGET_PROMOTED_TYPE. */
18353 arm_promoted_type (const_tree t)
18355 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18356 return float_type_node;
18360 /* Implement TARGET_CONVERT_TO_TYPE.
18361 Specifically, this hook implements the peculiarity of the ARM
18362 half-precision floating-point C semantics that requires conversions between
18363 __fp16 to or from double to do an intermediate conversion to float. */
18366 arm_convert_to_type (tree type, tree expr)
18368 tree fromtype = TREE_TYPE (expr);
18369 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18371 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18372 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18373 return convert (type, convert (float_type_node, expr));
18377 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18378 This simply adds HFmode as a supported mode; even though we don't
18379 implement arithmetic on this type directly, it's supported by
18380 optabs conversions, much the way the double-word arithmetic is
18381 special-cased in the default hook. */
18384 arm_scalar_mode_supported_p (enum machine_mode mode)
18386 if (mode == HFmode)
18387 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18389 return default_scalar_mode_supported_p (mode);
18392 /* Errors in the source file can cause expand_expr to return const0_rtx
18393 where we expect a vector. To avoid crashing, use one of the vector
18394 clear instructions. */
18397 safe_vector_operand (rtx x, enum machine_mode mode)
18399 if (x != const0_rtx)
18401 x = gen_reg_rtx (mode);
18403 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18404 : gen_rtx_SUBREG (DImode, x, 0)));
18408 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18411 arm_expand_binop_builtin (enum insn_code icode,
18412 tree exp, rtx target)
18415 tree arg0 = CALL_EXPR_ARG (exp, 0);
18416 tree arg1 = CALL_EXPR_ARG (exp, 1);
18417 rtx op0 = expand_normal (arg0);
18418 rtx op1 = expand_normal (arg1);
18419 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18420 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18421 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18423 if (VECTOR_MODE_P (mode0))
18424 op0 = safe_vector_operand (op0, mode0);
18425 if (VECTOR_MODE_P (mode1))
18426 op1 = safe_vector_operand (op1, mode1);
18429 || GET_MODE (target) != tmode
18430 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18431 target = gen_reg_rtx (tmode);
18433 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18435 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18436 op0 = copy_to_mode_reg (mode0, op0);
18437 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18438 op1 = copy_to_mode_reg (mode1, op1);
18440 pat = GEN_FCN (icode) (target, op0, op1);
18447 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18450 arm_expand_unop_builtin (enum insn_code icode,
18451 tree exp, rtx target, int do_load)
18454 tree arg0 = CALL_EXPR_ARG (exp, 0);
18455 rtx op0 = expand_normal (arg0);
18456 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18457 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18460 || GET_MODE (target) != tmode
18461 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18462 target = gen_reg_rtx (tmode);
18464 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18467 if (VECTOR_MODE_P (mode0))
18468 op0 = safe_vector_operand (op0, mode0);
18470 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18471 op0 = copy_to_mode_reg (mode0, op0);
18474 pat = GEN_FCN (icode) (target, op0);
18482 neon_builtin_compare (const void *a, const void *b)
18484 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18485 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18486 unsigned int soughtcode = key->base_fcode;
18488 if (soughtcode >= memb->base_fcode
18489 && soughtcode < memb->base_fcode + memb->num_vars)
18491 else if (soughtcode < memb->base_fcode)
18497 static enum insn_code
18498 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18500 neon_builtin_datum key, *found;
18503 key.base_fcode = fcode;
18504 found = (neon_builtin_datum *)
18505 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18506 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18507 gcc_assert (found);
18508 idx = fcode - (int) found->base_fcode;
18509 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18512 *itype = found->itype;
18514 return found->codes[idx];
18518 NEON_ARG_COPY_TO_REG,
18523 #define NEON_MAX_BUILTIN_ARGS 5
18525 /* Expand a Neon builtin. */
18527 arm_expand_neon_args (rtx target, int icode, int have_retval,
18532 tree arg[NEON_MAX_BUILTIN_ARGS];
18533 rtx op[NEON_MAX_BUILTIN_ARGS];
18534 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18535 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18540 || GET_MODE (target) != tmode
18541 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18542 target = gen_reg_rtx (tmode);
18544 va_start (ap, exp);
18548 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18550 if (thisarg == NEON_ARG_STOP)
18554 arg[argc] = CALL_EXPR_ARG (exp, argc);
18555 op[argc] = expand_normal (arg[argc]);
18556 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18560 case NEON_ARG_COPY_TO_REG:
18561 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18562 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18563 (op[argc], mode[argc]))
18564 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18567 case NEON_ARG_CONSTANT:
18568 /* FIXME: This error message is somewhat unhelpful. */
18569 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18570 (op[argc], mode[argc]))
18571 error ("argument must be a constant");
18574 case NEON_ARG_STOP:
18575 gcc_unreachable ();
18588 pat = GEN_FCN (icode) (target, op[0]);
18592 pat = GEN_FCN (icode) (target, op[0], op[1]);
18596 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18600 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18604 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18608 gcc_unreachable ();
18614 pat = GEN_FCN (icode) (op[0]);
18618 pat = GEN_FCN (icode) (op[0], op[1]);
18622 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18626 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18630 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18634 gcc_unreachable ();
18645 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18646 constants defined per-instruction or per instruction-variant. Instead, the
18647 required info is looked up in the table neon_builtin_data. */
18649 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18652 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18659 return arm_expand_neon_args (target, icode, 1, exp,
18660 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18664 case NEON_SCALARMUL:
18665 case NEON_SCALARMULL:
18666 case NEON_SCALARMULH:
18667 case NEON_SHIFTINSERT:
18668 case NEON_LOGICBINOP:
18669 return arm_expand_neon_args (target, icode, 1, exp,
18670 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18674 return arm_expand_neon_args (target, icode, 1, exp,
18675 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18676 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18680 case NEON_SHIFTIMM:
18681 return arm_expand_neon_args (target, icode, 1, exp,
18682 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18686 return arm_expand_neon_args (target, icode, 1, exp,
18687 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18691 case NEON_REINTERP:
18692 return arm_expand_neon_args (target, icode, 1, exp,
18693 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18697 return arm_expand_neon_args (target, icode, 1, exp,
18698 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18700 case NEON_RESULTPAIR:
18701 return arm_expand_neon_args (target, icode, 0, exp,
18702 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18706 case NEON_LANEMULL:
18707 case NEON_LANEMULH:
18708 return arm_expand_neon_args (target, icode, 1, exp,
18709 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18710 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18713 return arm_expand_neon_args (target, icode, 1, exp,
18714 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18715 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18717 case NEON_SHIFTACC:
18718 return arm_expand_neon_args (target, icode, 1, exp,
18719 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18720 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18722 case NEON_SCALARMAC:
18723 return arm_expand_neon_args (target, icode, 1, exp,
18724 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18725 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18729 return arm_expand_neon_args (target, icode, 1, exp,
18730 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18734 case NEON_LOADSTRUCT:
18735 return arm_expand_neon_args (target, icode, 1, exp,
18736 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18738 case NEON_LOAD1LANE:
18739 case NEON_LOADSTRUCTLANE:
18740 return arm_expand_neon_args (target, icode, 1, exp,
18741 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18745 case NEON_STORESTRUCT:
18746 return arm_expand_neon_args (target, icode, 0, exp,
18747 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18749 case NEON_STORE1LANE:
18750 case NEON_STORESTRUCTLANE:
18751 return arm_expand_neon_args (target, icode, 0, exp,
18752 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18756 gcc_unreachable ();
18759 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18761 neon_reinterpret (rtx dest, rtx src)
18763 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18766 /* Emit code to place a Neon pair result in memory locations (with equal
18769 neon_emit_pair_result_insn (enum machine_mode mode,
18770 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18773 rtx mem = gen_rtx_MEM (mode, destaddr);
18774 rtx tmp1 = gen_reg_rtx (mode);
18775 rtx tmp2 = gen_reg_rtx (mode);
18777 emit_insn (intfn (tmp1, op1, tmp2, op2));
18779 emit_move_insn (mem, tmp1);
18780 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18781 emit_move_insn (mem, tmp2);
18784 /* Set up operands for a register copy from src to dest, taking care not to
18785 clobber registers in the process.
18786 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18787 be called with a large N, so that should be OK. */
18790 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18792 unsigned int copied = 0, opctr = 0;
18793 unsigned int done = (1 << count) - 1;
18796 while (copied != done)
18798 for (i = 0; i < count; i++)
18802 for (j = 0; good && j < count; j++)
18803 if (i != j && (copied & (1 << j)) == 0
18804 && reg_overlap_mentioned_p (src[j], dest[i]))
18809 operands[opctr++] = dest[i];
18810 operands[opctr++] = src[i];
18816 gcc_assert (opctr == count * 2);
18819 /* Expand an expression EXP that calls a built-in function,
18820 with result going to TARGET if that's convenient
18821 (and in mode MODE if that's convenient).
18822 SUBTARGET may be used as the target for computing one of EXP's operands.
18823 IGNORE is nonzero if the value is to be ignored. */
18826 arm_expand_builtin (tree exp,
18828 rtx subtarget ATTRIBUTE_UNUSED,
18829 enum machine_mode mode ATTRIBUTE_UNUSED,
18830 int ignore ATTRIBUTE_UNUSED)
18832 const struct builtin_description * d;
18833 enum insn_code icode;
18834 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18842 int fcode = DECL_FUNCTION_CODE (fndecl);
18844 enum machine_mode tmode;
18845 enum machine_mode mode0;
18846 enum machine_mode mode1;
18847 enum machine_mode mode2;
18849 if (fcode >= ARM_BUILTIN_NEON_BASE)
18850 return arm_expand_neon_builtin (fcode, exp, target);
18854 case ARM_BUILTIN_TEXTRMSB:
18855 case ARM_BUILTIN_TEXTRMUB:
18856 case ARM_BUILTIN_TEXTRMSH:
18857 case ARM_BUILTIN_TEXTRMUH:
18858 case ARM_BUILTIN_TEXTRMSW:
18859 case ARM_BUILTIN_TEXTRMUW:
18860 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18861 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18862 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18863 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18864 : CODE_FOR_iwmmxt_textrmw);
18866 arg0 = CALL_EXPR_ARG (exp, 0);
18867 arg1 = CALL_EXPR_ARG (exp, 1);
18868 op0 = expand_normal (arg0);
18869 op1 = expand_normal (arg1);
18870 tmode = insn_data[icode].operand[0].mode;
18871 mode0 = insn_data[icode].operand[1].mode;
18872 mode1 = insn_data[icode].operand[2].mode;
18874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18875 op0 = copy_to_mode_reg (mode0, op0);
18876 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18878 /* @@@ better error message */
18879 error ("selector must be an immediate");
18880 return gen_reg_rtx (tmode);
18883 || GET_MODE (target) != tmode
18884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18885 target = gen_reg_rtx (tmode);
18886 pat = GEN_FCN (icode) (target, op0, op1);
18892 case ARM_BUILTIN_TINSRB:
18893 case ARM_BUILTIN_TINSRH:
18894 case ARM_BUILTIN_TINSRW:
18895 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18896 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18897 : CODE_FOR_iwmmxt_tinsrw);
18898 arg0 = CALL_EXPR_ARG (exp, 0);
18899 arg1 = CALL_EXPR_ARG (exp, 1);
18900 arg2 = CALL_EXPR_ARG (exp, 2);
18901 op0 = expand_normal (arg0);
18902 op1 = expand_normal (arg1);
18903 op2 = expand_normal (arg2);
18904 tmode = insn_data[icode].operand[0].mode;
18905 mode0 = insn_data[icode].operand[1].mode;
18906 mode1 = insn_data[icode].operand[2].mode;
18907 mode2 = insn_data[icode].operand[3].mode;
18909 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18910 op0 = copy_to_mode_reg (mode0, op0);
18911 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18912 op1 = copy_to_mode_reg (mode1, op1);
18913 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18915 /* @@@ better error message */
18916 error ("selector must be an immediate");
18920 || GET_MODE (target) != tmode
18921 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18922 target = gen_reg_rtx (tmode);
18923 pat = GEN_FCN (icode) (target, op0, op1, op2);
18929 case ARM_BUILTIN_SETWCX:
18930 arg0 = CALL_EXPR_ARG (exp, 0);
18931 arg1 = CALL_EXPR_ARG (exp, 1);
18932 op0 = force_reg (SImode, expand_normal (arg0));
18933 op1 = expand_normal (arg1);
18934 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18937 case ARM_BUILTIN_GETWCX:
18938 arg0 = CALL_EXPR_ARG (exp, 0);
18939 op0 = expand_normal (arg0);
18940 target = gen_reg_rtx (SImode);
18941 emit_insn (gen_iwmmxt_tmrc (target, op0));
18944 case ARM_BUILTIN_WSHUFH:
18945 icode = CODE_FOR_iwmmxt_wshufh;
18946 arg0 = CALL_EXPR_ARG (exp, 0);
18947 arg1 = CALL_EXPR_ARG (exp, 1);
18948 op0 = expand_normal (arg0);
18949 op1 = expand_normal (arg1);
18950 tmode = insn_data[icode].operand[0].mode;
18951 mode1 = insn_data[icode].operand[1].mode;
18952 mode2 = insn_data[icode].operand[2].mode;
18954 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18955 op0 = copy_to_mode_reg (mode1, op0);
18956 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18958 /* @@@ better error message */
18959 error ("mask must be an immediate");
18963 || GET_MODE (target) != tmode
18964 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18965 target = gen_reg_rtx (tmode);
18966 pat = GEN_FCN (icode) (target, op0, op1);
18972 case ARM_BUILTIN_WSADB:
18973 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18974 case ARM_BUILTIN_WSADH:
18975 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18976 case ARM_BUILTIN_WSADBZ:
18977 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18978 case ARM_BUILTIN_WSADHZ:
18979 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18981 /* Several three-argument builtins. */
18982 case ARM_BUILTIN_WMACS:
18983 case ARM_BUILTIN_WMACU:
18984 case ARM_BUILTIN_WALIGN:
18985 case ARM_BUILTIN_TMIA:
18986 case ARM_BUILTIN_TMIAPH:
18987 case ARM_BUILTIN_TMIATT:
18988 case ARM_BUILTIN_TMIATB:
18989 case ARM_BUILTIN_TMIABT:
18990 case ARM_BUILTIN_TMIABB:
18991 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18992 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18993 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18994 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18995 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18996 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18997 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18998 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18999 : CODE_FOR_iwmmxt_walign);
19000 arg0 = CALL_EXPR_ARG (exp, 0);
19001 arg1 = CALL_EXPR_ARG (exp, 1);
19002 arg2 = CALL_EXPR_ARG (exp, 2);
19003 op0 = expand_normal (arg0);
19004 op1 = expand_normal (arg1);
19005 op2 = expand_normal (arg2);
19006 tmode = insn_data[icode].operand[0].mode;
19007 mode0 = insn_data[icode].operand[1].mode;
19008 mode1 = insn_data[icode].operand[2].mode;
19009 mode2 = insn_data[icode].operand[3].mode;
19011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19012 op0 = copy_to_mode_reg (mode0, op0);
19013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19014 op1 = copy_to_mode_reg (mode1, op1);
19015 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19016 op2 = copy_to_mode_reg (mode2, op2);
19018 || GET_MODE (target) != tmode
19019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19020 target = gen_reg_rtx (tmode);
19021 pat = GEN_FCN (icode) (target, op0, op1, op2);
19027 case ARM_BUILTIN_WZERO:
19028 target = gen_reg_rtx (DImode);
19029 emit_insn (gen_iwmmxt_clrdi (target));
19032 case ARM_BUILTIN_THREAD_POINTER:
19033 return arm_load_tp (target);
19039 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19040 if (d->code == (const enum arm_builtins) fcode)
19041 return arm_expand_binop_builtin (d->icode, exp, target);
19043 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19044 if (d->code == (const enum arm_builtins) fcode)
19045 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19047 /* @@@ Should really do something sensible here. */
19051 /* Return the number (counting from 0) of
19052 the least significant set bit in MASK. */
19055 number_of_first_bit_set (unsigned mask)
19060 (mask & (1 << bit)) == 0;
19067 /* Emit code to push or pop registers to or from the stack. F is the
19068 assembly file. MASK is the registers to push or pop. PUSH is
19069 nonzero if we should push, and zero if we should pop. For debugging
19070 output, if pushing, adjust CFA_OFFSET by the amount of space added
19071 to the stack. REAL_REGS should have the same number of bits set as
19072 MASK, and will be used instead (in the same order) to describe which
19073 registers were saved - this is used to mark the save slots when we
19074 push high registers after moving them to low registers. */
19076 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19077 unsigned long real_regs)
19080 int lo_mask = mask & 0xFF;
19081 int pushed_words = 0;
19085 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19087 /* Special case. Do not generate a POP PC statement here, do it in
19089 thumb_exit (f, -1);
19093 if (ARM_EABI_UNWIND_TABLES && push)
19095 fprintf (f, "\t.save\t{");
19096 for (regno = 0; regno < 15; regno++)
19098 if (real_regs & (1 << regno))
19100 if (real_regs & ((1 << regno) -1))
19102 asm_fprintf (f, "%r", regno);
19105 fprintf (f, "}\n");
19108 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19110 /* Look at the low registers first. */
19111 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19115 asm_fprintf (f, "%r", regno);
19117 if ((lo_mask & ~1) != 0)
19124 if (push && (mask & (1 << LR_REGNUM)))
19126 /* Catch pushing the LR. */
19130 asm_fprintf (f, "%r", LR_REGNUM);
19134 else if (!push && (mask & (1 << PC_REGNUM)))
19136 /* Catch popping the PC. */
19137 if (TARGET_INTERWORK || TARGET_BACKTRACE
19138 || crtl->calls_eh_return)
19140 /* The PC is never poped directly, instead
19141 it is popped into r3 and then BX is used. */
19142 fprintf (f, "}\n");
19144 thumb_exit (f, -1);
19153 asm_fprintf (f, "%r", PC_REGNUM);
19157 fprintf (f, "}\n");
19159 if (push && pushed_words && dwarf2out_do_frame ())
19161 char *l = dwarf2out_cfi_label (false);
19162 int pushed_mask = real_regs;
19164 *cfa_offset += pushed_words * 4;
19165 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19168 pushed_mask = real_regs;
19169 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19171 if (pushed_mask & 1)
19172 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19177 /* Generate code to return from a thumb function.
19178 If 'reg_containing_return_addr' is -1, then the return address is
19179 actually on the stack, at the stack pointer. */
19181 thumb_exit (FILE *f, int reg_containing_return_addr)
19183 unsigned regs_available_for_popping;
19184 unsigned regs_to_pop;
19186 unsigned available;
19190 int restore_a4 = FALSE;
19192 /* Compute the registers we need to pop. */
19196 if (reg_containing_return_addr == -1)
19198 regs_to_pop |= 1 << LR_REGNUM;
19202 if (TARGET_BACKTRACE)
19204 /* Restore the (ARM) frame pointer and stack pointer. */
19205 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19209 /* If there is nothing to pop then just emit the BX instruction and
19211 if (pops_needed == 0)
19213 if (crtl->calls_eh_return)
19214 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19216 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19219 /* Otherwise if we are not supporting interworking and we have not created
19220 a backtrace structure and the function was not entered in ARM mode then
19221 just pop the return address straight into the PC. */
19222 else if (!TARGET_INTERWORK
19223 && !TARGET_BACKTRACE
19224 && !is_called_in_ARM_mode (current_function_decl)
19225 && !crtl->calls_eh_return)
19227 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19231 /* Find out how many of the (return) argument registers we can corrupt. */
19232 regs_available_for_popping = 0;
19234 /* If returning via __builtin_eh_return, the bottom three registers
19235 all contain information needed for the return. */
19236 if (crtl->calls_eh_return)
19240 /* If we can deduce the registers used from the function's
19241 return value. This is more reliable that examining
19242 df_regs_ever_live_p () because that will be set if the register is
19243 ever used in the function, not just if the register is used
19244 to hold a return value. */
19246 if (crtl->return_rtx != 0)
19247 mode = GET_MODE (crtl->return_rtx);
19249 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19251 size = GET_MODE_SIZE (mode);
19255 /* In a void function we can use any argument register.
19256 In a function that returns a structure on the stack
19257 we can use the second and third argument registers. */
19258 if (mode == VOIDmode)
19259 regs_available_for_popping =
19260 (1 << ARG_REGISTER (1))
19261 | (1 << ARG_REGISTER (2))
19262 | (1 << ARG_REGISTER (3));
19264 regs_available_for_popping =
19265 (1 << ARG_REGISTER (2))
19266 | (1 << ARG_REGISTER (3));
19268 else if (size <= 4)
19269 regs_available_for_popping =
19270 (1 << ARG_REGISTER (2))
19271 | (1 << ARG_REGISTER (3));
19272 else if (size <= 8)
19273 regs_available_for_popping =
19274 (1 << ARG_REGISTER (3));
19277 /* Match registers to be popped with registers into which we pop them. */
19278 for (available = regs_available_for_popping,
19279 required = regs_to_pop;
19280 required != 0 && available != 0;
19281 available &= ~(available & - available),
19282 required &= ~(required & - required))
19285 /* If we have any popping registers left over, remove them. */
19287 regs_available_for_popping &= ~available;
19289 /* Otherwise if we need another popping register we can use
19290 the fourth argument register. */
19291 else if (pops_needed)
19293 /* If we have not found any free argument registers and
19294 reg a4 contains the return address, we must move it. */
19295 if (regs_available_for_popping == 0
19296 && reg_containing_return_addr == LAST_ARG_REGNUM)
19298 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19299 reg_containing_return_addr = LR_REGNUM;
19301 else if (size > 12)
19303 /* Register a4 is being used to hold part of the return value,
19304 but we have dire need of a free, low register. */
19307 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19310 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19312 /* The fourth argument register is available. */
19313 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19319 /* Pop as many registers as we can. */
19320 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19321 regs_available_for_popping);
19323 /* Process the registers we popped. */
19324 if (reg_containing_return_addr == -1)
19326 /* The return address was popped into the lowest numbered register. */
19327 regs_to_pop &= ~(1 << LR_REGNUM);
19329 reg_containing_return_addr =
19330 number_of_first_bit_set (regs_available_for_popping);
19332 /* Remove this register for the mask of available registers, so that
19333 the return address will not be corrupted by further pops. */
19334 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19337 /* If we popped other registers then handle them here. */
19338 if (regs_available_for_popping)
19342 /* Work out which register currently contains the frame pointer. */
19343 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19345 /* Move it into the correct place. */
19346 asm_fprintf (f, "\tmov\t%r, %r\n",
19347 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19349 /* (Temporarily) remove it from the mask of popped registers. */
19350 regs_available_for_popping &= ~(1 << frame_pointer);
19351 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19353 if (regs_available_for_popping)
19357 /* We popped the stack pointer as well,
19358 find the register that contains it. */
19359 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19361 /* Move it into the stack register. */
19362 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19364 /* At this point we have popped all necessary registers, so
19365 do not worry about restoring regs_available_for_popping
19366 to its correct value:
19368 assert (pops_needed == 0)
19369 assert (regs_available_for_popping == (1 << frame_pointer))
19370 assert (regs_to_pop == (1 << STACK_POINTER)) */
19374 /* Since we have just move the popped value into the frame
19375 pointer, the popping register is available for reuse, and
19376 we know that we still have the stack pointer left to pop. */
19377 regs_available_for_popping |= (1 << frame_pointer);
19381 /* If we still have registers left on the stack, but we no longer have
19382 any registers into which we can pop them, then we must move the return
19383 address into the link register and make available the register that
19385 if (regs_available_for_popping == 0 && pops_needed > 0)
19387 regs_available_for_popping |= 1 << reg_containing_return_addr;
19389 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19390 reg_containing_return_addr);
19392 reg_containing_return_addr = LR_REGNUM;
19395 /* If we have registers left on the stack then pop some more.
19396 We know that at most we will want to pop FP and SP. */
19397 if (pops_needed > 0)
19402 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19403 regs_available_for_popping);
19405 /* We have popped either FP or SP.
19406 Move whichever one it is into the correct register. */
19407 popped_into = number_of_first_bit_set (regs_available_for_popping);
19408 move_to = number_of_first_bit_set (regs_to_pop);
19410 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19412 regs_to_pop &= ~(1 << move_to);
19417 /* If we still have not popped everything then we must have only
19418 had one register available to us and we are now popping the SP. */
19419 if (pops_needed > 0)
19423 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19424 regs_available_for_popping);
19426 popped_into = number_of_first_bit_set (regs_available_for_popping);
19428 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19430 assert (regs_to_pop == (1 << STACK_POINTER))
19431 assert (pops_needed == 1)
19435 /* If necessary restore the a4 register. */
19438 if (reg_containing_return_addr != LR_REGNUM)
19440 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19441 reg_containing_return_addr = LR_REGNUM;
19444 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19447 if (crtl->calls_eh_return)
19448 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19450 /* Return to caller. */
19451 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19456 thumb1_final_prescan_insn (rtx insn)
19458 if (flag_print_asm_name)
19459 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19460 INSN_ADDRESSES (INSN_UID (insn)));
19464 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19466 unsigned HOST_WIDE_INT mask = 0xff;
19469 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19470 if (val == 0) /* XXX */
19473 for (i = 0; i < 25; i++)
19474 if ((val & (mask << i)) == val)
19480 /* Returns nonzero if the current function contains,
19481 or might contain a far jump. */
19483 thumb_far_jump_used_p (void)
19487 /* This test is only important for leaf functions. */
19488 /* assert (!leaf_function_p ()); */
19490 /* If we have already decided that far jumps may be used,
19491 do not bother checking again, and always return true even if
19492 it turns out that they are not being used. Once we have made
19493 the decision that far jumps are present (and that hence the link
19494 register will be pushed onto the stack) we cannot go back on it. */
19495 if (cfun->machine->far_jump_used)
19498 /* If this function is not being called from the prologue/epilogue
19499 generation code then it must be being called from the
19500 INITIAL_ELIMINATION_OFFSET macro. */
19501 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19503 /* In this case we know that we are being asked about the elimination
19504 of the arg pointer register. If that register is not being used,
19505 then there are no arguments on the stack, and we do not have to
19506 worry that a far jump might force the prologue to push the link
19507 register, changing the stack offsets. In this case we can just
19508 return false, since the presence of far jumps in the function will
19509 not affect stack offsets.
19511 If the arg pointer is live (or if it was live, but has now been
19512 eliminated and so set to dead) then we do have to test to see if
19513 the function might contain a far jump. This test can lead to some
19514 false negatives, since before reload is completed, then length of
19515 branch instructions is not known, so gcc defaults to returning their
19516 longest length, which in turn sets the far jump attribute to true.
19518 A false negative will not result in bad code being generated, but it
19519 will result in a needless push and pop of the link register. We
19520 hope that this does not occur too often.
19522 If we need doubleword stack alignment this could affect the other
19523 elimination offsets so we can't risk getting it wrong. */
19524 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19525 cfun->machine->arg_pointer_live = 1;
19526 else if (!cfun->machine->arg_pointer_live)
19530 /* Check to see if the function contains a branch
19531 insn with the far jump attribute set. */
19532 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19534 if (GET_CODE (insn) == JUMP_INSN
19535 /* Ignore tablejump patterns. */
19536 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19537 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19538 && get_attr_far_jump (insn) == FAR_JUMP_YES
19541 /* Record the fact that we have decided that
19542 the function does use far jumps. */
19543 cfun->machine->far_jump_used = 1;
19551 /* Return nonzero if FUNC must be entered in ARM mode. */
19553 is_called_in_ARM_mode (tree func)
19555 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19557 /* Ignore the problem about functions whose address is taken. */
19558 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19562 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19568 /* The bits which aren't usefully expanded as rtl. */
19570 thumb_unexpanded_epilogue (void)
19572 arm_stack_offsets *offsets;
19574 unsigned long live_regs_mask = 0;
19575 int high_regs_pushed = 0;
19576 int had_to_push_lr;
19579 if (cfun->machine->return_used_this_function != 0)
19582 if (IS_NAKED (arm_current_func_type ()))
19585 offsets = arm_get_frame_offsets ();
19586 live_regs_mask = offsets->saved_regs_mask;
19587 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19589 /* If we can deduce the registers used from the function's return value.
19590 This is more reliable that examining df_regs_ever_live_p () because that
19591 will be set if the register is ever used in the function, not just if
19592 the register is used to hold a return value. */
19593 size = arm_size_return_regs ();
19595 /* The prolog may have pushed some high registers to use as
19596 work registers. e.g. the testsuite file:
19597 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19598 compiles to produce:
19599 push {r4, r5, r6, r7, lr}
19603 as part of the prolog. We have to undo that pushing here. */
19605 if (high_regs_pushed)
19607 unsigned long mask = live_regs_mask & 0xff;
19610 /* The available low registers depend on the size of the value we are
19618 /* Oh dear! We have no low registers into which we can pop
19621 ("no low registers available for popping high registers");
19623 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19624 if (live_regs_mask & (1 << next_hi_reg))
19627 while (high_regs_pushed)
19629 /* Find lo register(s) into which the high register(s) can
19631 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19633 if (mask & (1 << regno))
19634 high_regs_pushed--;
19635 if (high_regs_pushed == 0)
19639 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19641 /* Pop the values into the low register(s). */
19642 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19644 /* Move the value(s) into the high registers. */
19645 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19647 if (mask & (1 << regno))
19649 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19652 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19653 if (live_regs_mask & (1 << next_hi_reg))
19658 live_regs_mask &= ~0x0f00;
19661 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19662 live_regs_mask &= 0xff;
19664 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19666 /* Pop the return address into the PC. */
19667 if (had_to_push_lr)
19668 live_regs_mask |= 1 << PC_REGNUM;
19670 /* Either no argument registers were pushed or a backtrace
19671 structure was created which includes an adjusted stack
19672 pointer, so just pop everything. */
19673 if (live_regs_mask)
19674 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19677 /* We have either just popped the return address into the
19678 PC or it is was kept in LR for the entire function. */
19679 if (!had_to_push_lr)
19680 thumb_exit (asm_out_file, LR_REGNUM);
19684 /* Pop everything but the return address. */
19685 if (live_regs_mask)
19686 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19689 if (had_to_push_lr)
19693 /* We have no free low regs, so save one. */
19694 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19698 /* Get the return address into a temporary register. */
19699 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19700 1 << LAST_ARG_REGNUM);
19704 /* Move the return address to lr. */
19705 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19707 /* Restore the low register. */
19708 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19713 regno = LAST_ARG_REGNUM;
19718 /* Remove the argument registers that were pushed onto the stack. */
19719 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19720 SP_REGNUM, SP_REGNUM,
19721 crtl->args.pretend_args_size);
19723 thumb_exit (asm_out_file, regno);
19729 /* Functions to save and restore machine-specific function data. */
19730 static struct machine_function *
19731 arm_init_machine_status (void)
19733 struct machine_function *machine;
19734 machine = ggc_alloc_cleared_machine_function ();
19736 #if ARM_FT_UNKNOWN != 0
19737 machine->func_type = ARM_FT_UNKNOWN;
19742 /* Return an RTX indicating where the return address to the
19743 calling function can be found. */
19745 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19750 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19753 /* Do anything needed before RTL is emitted for each function. */
19755 arm_init_expanders (void)
19757 /* Arrange to initialize and mark the machine per-function status. */
19758 init_machine_status = arm_init_machine_status;
19760 /* This is to stop the combine pass optimizing away the alignment
19761 adjustment of va_arg. */
19762 /* ??? It is claimed that this should not be necessary. */
19764 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19768 /* Like arm_compute_initial_elimination offset. Simpler because there
19769 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19770 to point at the base of the local variables after static stack
19771 space for a function has been allocated. */
19774 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19776 arm_stack_offsets *offsets;
19778 offsets = arm_get_frame_offsets ();
19782 case ARG_POINTER_REGNUM:
19785 case STACK_POINTER_REGNUM:
19786 return offsets->outgoing_args - offsets->saved_args;
19788 case FRAME_POINTER_REGNUM:
19789 return offsets->soft_frame - offsets->saved_args;
19791 case ARM_HARD_FRAME_POINTER_REGNUM:
19792 return offsets->saved_regs - offsets->saved_args;
19794 case THUMB_HARD_FRAME_POINTER_REGNUM:
19795 return offsets->locals_base - offsets->saved_args;
19798 gcc_unreachable ();
19802 case FRAME_POINTER_REGNUM:
19805 case STACK_POINTER_REGNUM:
19806 return offsets->outgoing_args - offsets->soft_frame;
19808 case ARM_HARD_FRAME_POINTER_REGNUM:
19809 return offsets->saved_regs - offsets->soft_frame;
19811 case THUMB_HARD_FRAME_POINTER_REGNUM:
19812 return offsets->locals_base - offsets->soft_frame;
19815 gcc_unreachable ();
19820 gcc_unreachable ();
19824 /* Given the stack offsets and register mask in OFFSETS, decide
19825 how many additional registers to push instead of subtracting
19826 a constant from SP. */
19828 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19830 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19831 unsigned long live_regs_mask = offsets->saved_regs_mask;
19832 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19833 unsigned long l_mask = live_regs_mask & 0x40ff;
19834 /* Then count how many other high registers will need to be pushed. */
19835 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19838 /* If the stack frame size is 512 exactly, we can save one load
19839 instruction, which should make this a win even when optimizing
19841 if (!optimize_size && amount != 512)
19844 /* Can't do this if there are high registers to push, or if we
19845 are not going to do a push at all. */
19846 if (high_regs_pushed != 0 || l_mask == 0)
19849 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19850 between the push and the stack frame allocation. */
19851 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19852 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19855 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19860 gcc_assert (amount / 4 * 4 == amount);
19862 if (amount >= 512 && (amount - n_free * 4) < 512)
19863 return (amount - 508) / 4;
19864 if (amount <= n_free * 4)
19869 /* Generate the rest of a function's prologue. */
19871 thumb1_expand_prologue (void)
19875 HOST_WIDE_INT amount;
19876 arm_stack_offsets *offsets;
19877 unsigned long func_type;
19879 unsigned long live_regs_mask;
19881 func_type = arm_current_func_type ();
19883 /* Naked functions don't have prologues. */
19884 if (IS_NAKED (func_type))
19887 if (IS_INTERRUPT (func_type))
19889 error ("interrupt Service Routines cannot be coded in Thumb mode");
19893 offsets = arm_get_frame_offsets ();
19894 live_regs_mask = offsets->saved_regs_mask;
19895 /* Load the pic register before setting the frame pointer,
19896 so we can use r7 as a temporary work register. */
19897 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19898 arm_load_pic_register (live_regs_mask);
19900 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19901 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19902 stack_pointer_rtx);
19904 amount = offsets->outgoing_args - offsets->saved_regs;
19905 amount -= 4 * thumb1_extra_regs_pushed (offsets);
19910 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19911 GEN_INT (- amount)));
19912 RTX_FRAME_RELATED_P (insn) = 1;
19918 /* The stack decrement is too big for an immediate value in a single
19919 insn. In theory we could issue multiple subtracts, but after
19920 three of them it becomes more space efficient to place the full
19921 value in the constant pool and load into a register. (Also the
19922 ARM debugger really likes to see only one stack decrement per
19923 function). So instead we look for a scratch register into which
19924 we can load the decrement, and then we subtract this from the
19925 stack pointer. Unfortunately on the thumb the only available
19926 scratch registers are the argument registers, and we cannot use
19927 these as they may hold arguments to the function. Instead we
19928 attempt to locate a call preserved register which is used by this
19929 function. If we can find one, then we know that it will have
19930 been pushed at the start of the prologue and so we can corrupt
19932 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19933 if (live_regs_mask & (1 << regno))
19936 gcc_assert(regno <= LAST_LO_REGNUM);
19938 reg = gen_rtx_REG (SImode, regno);
19940 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19942 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19943 stack_pointer_rtx, reg));
19944 RTX_FRAME_RELATED_P (insn) = 1;
19945 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19946 plus_constant (stack_pointer_rtx,
19948 RTX_FRAME_RELATED_P (dwarf) = 1;
19949 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19953 if (frame_pointer_needed)
19954 thumb_set_frame_pointer (offsets);
19956 /* If we are profiling, make sure no instructions are scheduled before
19957 the call to mcount. Similarly if the user has requested no
19958 scheduling in the prolog. Similarly if we want non-call exceptions
19959 using the EABI unwinder, to prevent faulting instructions from being
19960 swapped with a stack adjustment. */
19961 if (crtl->profile || !TARGET_SCHED_PROLOG
19962 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
19963 emit_insn (gen_blockage ());
19965 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19966 if (live_regs_mask & 0xff)
19967 cfun->machine->lr_save_eliminated = 0;
19972 thumb1_expand_epilogue (void)
19974 HOST_WIDE_INT amount;
19975 arm_stack_offsets *offsets;
19978 /* Naked functions don't have prologues. */
19979 if (IS_NAKED (arm_current_func_type ()))
19982 offsets = arm_get_frame_offsets ();
19983 amount = offsets->outgoing_args - offsets->saved_regs;
19985 if (frame_pointer_needed)
19987 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19988 amount = offsets->locals_base - offsets->saved_regs;
19991 gcc_assert (amount >= 0);
19995 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19996 GEN_INT (amount)));
19999 /* r3 is always free in the epilogue. */
20000 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20002 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20003 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20007 /* Emit a USE (stack_pointer_rtx), so that
20008 the stack adjustment will not be deleted. */
20009 emit_insn (gen_prologue_use (stack_pointer_rtx));
20011 if (crtl->profile || !TARGET_SCHED_PROLOG)
20012 emit_insn (gen_blockage ());
20014 /* Emit a clobber for each insn that will be restored in the epilogue,
20015 so that flow2 will get register lifetimes correct. */
20016 for (regno = 0; regno < 13; regno++)
20017 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20018 emit_clobber (gen_rtx_REG (SImode, regno));
20020 if (! df_regs_ever_live_p (LR_REGNUM))
20021 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20025 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20027 arm_stack_offsets *offsets;
20028 unsigned long live_regs_mask = 0;
20029 unsigned long l_mask;
20030 unsigned high_regs_pushed = 0;
20031 int cfa_offset = 0;
20034 if (IS_NAKED (arm_current_func_type ()))
20037 if (is_called_in_ARM_mode (current_function_decl))
20041 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20042 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20044 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20046 /* Generate code sequence to switch us into Thumb mode. */
20047 /* The .code 32 directive has already been emitted by
20048 ASM_DECLARE_FUNCTION_NAME. */
20049 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20050 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20052 /* Generate a label, so that the debugger will notice the
20053 change in instruction sets. This label is also used by
20054 the assembler to bypass the ARM code when this function
20055 is called from a Thumb encoded function elsewhere in the
20056 same file. Hence the definition of STUB_NAME here must
20057 agree with the definition in gas/config/tc-arm.c. */
20059 #define STUB_NAME ".real_start_of"
20061 fprintf (f, "\t.code\t16\n");
20063 if (arm_dllexport_name_p (name))
20064 name = arm_strip_name_encoding (name);
20066 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20067 fprintf (f, "\t.thumb_func\n");
20068 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20071 if (crtl->args.pretend_args_size)
20073 /* Output unwind directive for the stack adjustment. */
20074 if (ARM_EABI_UNWIND_TABLES)
20075 fprintf (f, "\t.pad #%d\n",
20076 crtl->args.pretend_args_size);
20078 if (cfun->machine->uses_anonymous_args)
20082 fprintf (f, "\tpush\t{");
20084 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20086 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20087 regno <= LAST_ARG_REGNUM;
20089 asm_fprintf (f, "%r%s", regno,
20090 regno == LAST_ARG_REGNUM ? "" : ", ");
20092 fprintf (f, "}\n");
20095 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20096 SP_REGNUM, SP_REGNUM,
20097 crtl->args.pretend_args_size);
20099 /* We don't need to record the stores for unwinding (would it
20100 help the debugger any if we did?), but record the change in
20101 the stack pointer. */
20102 if (dwarf2out_do_frame ())
20104 char *l = dwarf2out_cfi_label (false);
20106 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20107 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20111 /* Get the registers we are going to push. */
20112 offsets = arm_get_frame_offsets ();
20113 live_regs_mask = offsets->saved_regs_mask;
20114 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20115 l_mask = live_regs_mask & 0x40ff;
20116 /* Then count how many other high registers will need to be pushed. */
20117 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20119 if (TARGET_BACKTRACE)
20122 unsigned work_register;
20124 /* We have been asked to create a stack backtrace structure.
20125 The code looks like this:
20129 0 sub SP, #16 Reserve space for 4 registers.
20130 2 push {R7} Push low registers.
20131 4 add R7, SP, #20 Get the stack pointer before the push.
20132 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20133 8 mov R7, PC Get hold of the start of this code plus 12.
20134 10 str R7, [SP, #16] Store it.
20135 12 mov R7, FP Get hold of the current frame pointer.
20136 14 str R7, [SP, #4] Store it.
20137 16 mov R7, LR Get hold of the current return address.
20138 18 str R7, [SP, #12] Store it.
20139 20 add R7, SP, #16 Point at the start of the backtrace structure.
20140 22 mov FP, R7 Put this value into the frame pointer. */
20142 work_register = thumb_find_work_register (live_regs_mask);
20144 if (ARM_EABI_UNWIND_TABLES)
20145 asm_fprintf (f, "\t.pad #16\n");
20148 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20149 SP_REGNUM, SP_REGNUM);
20151 if (dwarf2out_do_frame ())
20153 char *l = dwarf2out_cfi_label (false);
20155 cfa_offset = cfa_offset + 16;
20156 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20161 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20162 offset = bit_count (l_mask) * UNITS_PER_WORD;
20167 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20168 offset + 16 + crtl->args.pretend_args_size);
20170 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20173 /* Make sure that the instruction fetching the PC is in the right place
20174 to calculate "start of backtrace creation code + 12". */
20177 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20178 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20180 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20181 ARM_HARD_FRAME_POINTER_REGNUM);
20182 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20187 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20188 ARM_HARD_FRAME_POINTER_REGNUM);
20189 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20191 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20192 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20196 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20197 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20199 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20201 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20202 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20204 /* Optimization: If we are not pushing any low registers but we are going
20205 to push some high registers then delay our first push. This will just
20206 be a push of LR and we can combine it with the push of the first high
20208 else if ((l_mask & 0xff) != 0
20209 || (high_regs_pushed == 0 && l_mask))
20211 unsigned long mask = l_mask;
20212 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
20213 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20216 if (high_regs_pushed)
20218 unsigned pushable_regs;
20219 unsigned next_hi_reg;
20221 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20222 if (live_regs_mask & (1 << next_hi_reg))
20225 pushable_regs = l_mask & 0xff;
20227 if (pushable_regs == 0)
20228 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20230 while (high_regs_pushed > 0)
20232 unsigned long real_regs_mask = 0;
20234 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20236 if (pushable_regs & (1 << regno))
20238 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20240 high_regs_pushed --;
20241 real_regs_mask |= (1 << next_hi_reg);
20243 if (high_regs_pushed)
20245 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20247 if (live_regs_mask & (1 << next_hi_reg))
20252 pushable_regs &= ~((1 << regno) - 1);
20258 /* If we had to find a work register and we have not yet
20259 saved the LR then add it to the list of regs to push. */
20260 if (l_mask == (1 << LR_REGNUM))
20262 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20264 real_regs_mask | (1 << LR_REGNUM));
20268 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20273 /* Handle the case of a double word load into a low register from
20274 a computed memory address. The computed address may involve a
20275 register which is overwritten by the load. */
20277 thumb_load_double_from_address (rtx *operands)
20285 gcc_assert (GET_CODE (operands[0]) == REG);
20286 gcc_assert (GET_CODE (operands[1]) == MEM);
20288 /* Get the memory address. */
20289 addr = XEXP (operands[1], 0);
20291 /* Work out how the memory address is computed. */
20292 switch (GET_CODE (addr))
20295 operands[2] = adjust_address (operands[1], SImode, 4);
20297 if (REGNO (operands[0]) == REGNO (addr))
20299 output_asm_insn ("ldr\t%H0, %2", operands);
20300 output_asm_insn ("ldr\t%0, %1", operands);
20304 output_asm_insn ("ldr\t%0, %1", operands);
20305 output_asm_insn ("ldr\t%H0, %2", operands);
20310 /* Compute <address> + 4 for the high order load. */
20311 operands[2] = adjust_address (operands[1], SImode, 4);
20313 output_asm_insn ("ldr\t%0, %1", operands);
20314 output_asm_insn ("ldr\t%H0, %2", operands);
20318 arg1 = XEXP (addr, 0);
20319 arg2 = XEXP (addr, 1);
20321 if (CONSTANT_P (arg1))
20322 base = arg2, offset = arg1;
20324 base = arg1, offset = arg2;
20326 gcc_assert (GET_CODE (base) == REG);
20328 /* Catch the case of <address> = <reg> + <reg> */
20329 if (GET_CODE (offset) == REG)
20331 int reg_offset = REGNO (offset);
20332 int reg_base = REGNO (base);
20333 int reg_dest = REGNO (operands[0]);
20335 /* Add the base and offset registers together into the
20336 higher destination register. */
20337 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20338 reg_dest + 1, reg_base, reg_offset);
20340 /* Load the lower destination register from the address in
20341 the higher destination register. */
20342 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20343 reg_dest, reg_dest + 1);
20345 /* Load the higher destination register from its own address
20347 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20348 reg_dest + 1, reg_dest + 1);
20352 /* Compute <address> + 4 for the high order load. */
20353 operands[2] = adjust_address (operands[1], SImode, 4);
20355 /* If the computed address is held in the low order register
20356 then load the high order register first, otherwise always
20357 load the low order register first. */
20358 if (REGNO (operands[0]) == REGNO (base))
20360 output_asm_insn ("ldr\t%H0, %2", operands);
20361 output_asm_insn ("ldr\t%0, %1", operands);
20365 output_asm_insn ("ldr\t%0, %1", operands);
20366 output_asm_insn ("ldr\t%H0, %2", operands);
20372 /* With no registers to worry about we can just load the value
20374 operands[2] = adjust_address (operands[1], SImode, 4);
20376 output_asm_insn ("ldr\t%H0, %2", operands);
20377 output_asm_insn ("ldr\t%0, %1", operands);
20381 gcc_unreachable ();
20388 thumb_output_move_mem_multiple (int n, rtx *operands)
20395 if (REGNO (operands[4]) > REGNO (operands[5]))
20398 operands[4] = operands[5];
20401 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20402 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20406 if (REGNO (operands[4]) > REGNO (operands[5]))
20409 operands[4] = operands[5];
20412 if (REGNO (operands[5]) > REGNO (operands[6]))
20415 operands[5] = operands[6];
20418 if (REGNO (operands[4]) > REGNO (operands[5]))
20421 operands[4] = operands[5];
20425 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20426 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20430 gcc_unreachable ();
20436 /* Output a call-via instruction for thumb state. */
20438 thumb_call_via_reg (rtx reg)
20440 int regno = REGNO (reg);
20443 gcc_assert (regno < LR_REGNUM);
20445 /* If we are in the normal text section we can use a single instance
20446 per compilation unit. If we are doing function sections, then we need
20447 an entry per section, since we can't rely on reachability. */
20448 if (in_section == text_section)
20450 thumb_call_reg_needed = 1;
20452 if (thumb_call_via_label[regno] == NULL)
20453 thumb_call_via_label[regno] = gen_label_rtx ();
20454 labelp = thumb_call_via_label + regno;
20458 if (cfun->machine->call_via[regno] == NULL)
20459 cfun->machine->call_via[regno] = gen_label_rtx ();
20460 labelp = cfun->machine->call_via + regno;
20463 output_asm_insn ("bl\t%a0", labelp);
20467 /* Routines for generating rtl. */
20469 thumb_expand_movmemqi (rtx *operands)
20471 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20472 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20473 HOST_WIDE_INT len = INTVAL (operands[2]);
20474 HOST_WIDE_INT offset = 0;
20478 emit_insn (gen_movmem12b (out, in, out, in));
20484 emit_insn (gen_movmem8b (out, in, out, in));
20490 rtx reg = gen_reg_rtx (SImode);
20491 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20492 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20499 rtx reg = gen_reg_rtx (HImode);
20500 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20501 plus_constant (in, offset))));
20502 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20510 rtx reg = gen_reg_rtx (QImode);
20511 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20512 plus_constant (in, offset))));
20513 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20519 thumb_reload_out_hi (rtx *operands)
20521 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20524 /* Handle reading a half-word from memory during reload. */
20526 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20528 gcc_unreachable ();
20531 /* Return the length of a function name prefix
20532 that starts with the character 'c'. */
20534 arm_get_strip_length (int c)
20538 ARM_NAME_ENCODING_LENGTHS
20543 /* Return a pointer to a function's name with any
20544 and all prefix encodings stripped from it. */
20546 arm_strip_name_encoding (const char *name)
20550 while ((skip = arm_get_strip_length (* name)))
20556 /* If there is a '*' anywhere in the name's prefix, then
20557 emit the stripped name verbatim, otherwise prepend an
20558 underscore if leading underscores are being used. */
20560 arm_asm_output_labelref (FILE *stream, const char *name)
20565 while ((skip = arm_get_strip_length (* name)))
20567 verbatim |= (*name == '*');
20572 fputs (name, stream);
20574 asm_fprintf (stream, "%U%s", name);
20578 arm_file_start (void)
20582 if (TARGET_UNIFIED_ASM)
20583 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20587 const char *fpu_name;
20588 if (arm_selected_arch)
20589 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20591 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20593 if (TARGET_SOFT_FLOAT)
20596 fpu_name = "softvfp";
20598 fpu_name = "softfpa";
20602 fpu_name = arm_fpu_desc->name;
20603 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20605 if (TARGET_HARD_FLOAT)
20606 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20607 if (TARGET_HARD_FLOAT_ABI)
20608 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20611 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20613 /* Some of these attributes only apply when the corresponding features
20614 are used. However we don't have any easy way of figuring this out.
20615 Conservatively record the setting that would have been used. */
20617 /* Tag_ABI_FP_rounding. */
20618 if (flag_rounding_math)
20619 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20620 if (!flag_unsafe_math_optimizations)
20622 /* Tag_ABI_FP_denomal. */
20623 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20624 /* Tag_ABI_FP_exceptions. */
20625 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20627 /* Tag_ABI_FP_user_exceptions. */
20628 if (flag_signaling_nans)
20629 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20630 /* Tag_ABI_FP_number_model. */
20631 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20632 flag_finite_math_only ? 1 : 3);
20634 /* Tag_ABI_align8_needed. */
20635 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20636 /* Tag_ABI_align8_preserved. */
20637 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20638 /* Tag_ABI_enum_size. */
20639 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20640 flag_short_enums ? 1 : 2);
20642 /* Tag_ABI_optimization_goals. */
20645 else if (optimize >= 2)
20651 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20653 /* Tag_ABI_FP_16bit_format. */
20654 if (arm_fp16_format)
20655 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20656 (int)arm_fp16_format);
20658 if (arm_lang_output_object_attributes_hook)
20659 arm_lang_output_object_attributes_hook();
20661 default_file_start();
20665 arm_file_end (void)
20669 if (NEED_INDICATE_EXEC_STACK)
20670 /* Add .note.GNU-stack. */
20671 file_end_indicate_exec_stack ();
20673 if (! thumb_call_reg_needed)
20676 switch_to_section (text_section);
20677 asm_fprintf (asm_out_file, "\t.code 16\n");
20678 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20680 for (regno = 0; regno < LR_REGNUM; regno++)
20682 rtx label = thumb_call_via_label[regno];
20686 targetm.asm_out.internal_label (asm_out_file, "L",
20687 CODE_LABEL_NUMBER (label));
20688 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20694 /* Symbols in the text segment can be accessed without indirecting via the
20695 constant pool; it may take an extra binary operation, but this is still
20696 faster than indirecting via memory. Don't do this when not optimizing,
20697 since we won't be calculating al of the offsets necessary to do this
20701 arm_encode_section_info (tree decl, rtx rtl, int first)
20703 if (optimize > 0 && TREE_CONSTANT (decl))
20704 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20706 default_encode_section_info (decl, rtl, first);
20708 #endif /* !ARM_PE */
20711 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20713 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20714 && !strcmp (prefix, "L"))
20716 arm_ccfsm_state = 0;
20717 arm_target_insn = NULL;
20719 default_internal_label (stream, prefix, labelno);
20722 /* Output code to add DELTA to the first argument, and then jump
20723 to FUNCTION. Used for C++ multiple inheritance. */
20725 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20726 HOST_WIDE_INT delta,
20727 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20730 static int thunk_label = 0;
20733 int mi_delta = delta;
20734 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20736 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20739 mi_delta = - mi_delta;
20743 int labelno = thunk_label++;
20744 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20745 /* Thunks are entered in arm mode when avaiable. */
20746 if (TARGET_THUMB1_ONLY)
20748 /* push r3 so we can use it as a temporary. */
20749 /* TODO: Omit this save if r3 is not used. */
20750 fputs ("\tpush {r3}\n", file);
20751 fputs ("\tldr\tr3, ", file);
20755 fputs ("\tldr\tr12, ", file);
20757 assemble_name (file, label);
20758 fputc ('\n', file);
20761 /* If we are generating PIC, the ldr instruction below loads
20762 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20763 the address of the add + 8, so we have:
20765 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20768 Note that we have "+ 1" because some versions of GNU ld
20769 don't set the low bit of the result for R_ARM_REL32
20770 relocations against thumb function symbols.
20771 On ARMv6M this is +4, not +8. */
20772 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20773 assemble_name (file, labelpc);
20774 fputs (":\n", file);
20775 if (TARGET_THUMB1_ONLY)
20777 /* This is 2 insns after the start of the thunk, so we know it
20778 is 4-byte aligned. */
20779 fputs ("\tadd\tr3, pc, r3\n", file);
20780 fputs ("\tmov r12, r3\n", file);
20783 fputs ("\tadd\tr12, pc, r12\n", file);
20785 else if (TARGET_THUMB1_ONLY)
20786 fputs ("\tmov r12, r3\n", file);
20788 if (TARGET_THUMB1_ONLY)
20790 if (mi_delta > 255)
20792 fputs ("\tldr\tr3, ", file);
20793 assemble_name (file, label);
20794 fputs ("+4\n", file);
20795 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20796 mi_op, this_regno, this_regno);
20798 else if (mi_delta != 0)
20800 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20801 mi_op, this_regno, this_regno,
20807 /* TODO: Use movw/movt for large constants when available. */
20808 while (mi_delta != 0)
20810 if ((mi_delta & (3 << shift)) == 0)
20814 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20815 mi_op, this_regno, this_regno,
20816 mi_delta & (0xff << shift));
20817 mi_delta &= ~(0xff << shift);
20824 if (TARGET_THUMB1_ONLY)
20825 fputs ("\tpop\t{r3}\n", file);
20827 fprintf (file, "\tbx\tr12\n");
20828 ASM_OUTPUT_ALIGN (file, 2);
20829 assemble_name (file, label);
20830 fputs (":\n", file);
20833 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20834 rtx tem = XEXP (DECL_RTL (function), 0);
20835 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20836 tem = gen_rtx_MINUS (GET_MODE (tem),
20838 gen_rtx_SYMBOL_REF (Pmode,
20839 ggc_strdup (labelpc)));
20840 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20843 /* Output ".word .LTHUNKn". */
20844 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20846 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20847 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20851 fputs ("\tb\t", file);
20852 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20853 if (NEED_PLT_RELOC)
20854 fputs ("(PLT)", file);
20855 fputc ('\n', file);
20860 arm_emit_vector_const (FILE *file, rtx x)
20863 const char * pattern;
20865 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20867 switch (GET_MODE (x))
20869 case V2SImode: pattern = "%08x"; break;
20870 case V4HImode: pattern = "%04x"; break;
20871 case V8QImode: pattern = "%02x"; break;
20872 default: gcc_unreachable ();
20875 fprintf (file, "0x");
20876 for (i = CONST_VECTOR_NUNITS (x); i--;)
20880 element = CONST_VECTOR_ELT (x, i);
20881 fprintf (file, pattern, INTVAL (element));
20887 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20888 HFmode constant pool entries are actually loaded with ldr. */
20890 arm_emit_fp16_const (rtx c)
20895 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20896 bits = real_to_target (NULL, &r, HFmode);
20897 if (WORDS_BIG_ENDIAN)
20898 assemble_zeros (2);
20899 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20900 if (!WORDS_BIG_ENDIAN)
20901 assemble_zeros (2);
20905 arm_output_load_gr (rtx *operands)
20912 if (GET_CODE (operands [1]) != MEM
20913 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20914 || GET_CODE (reg = XEXP (sum, 0)) != REG
20915 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20916 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20917 return "wldrw%?\t%0, %1";
20919 /* Fix up an out-of-range load of a GR register. */
20920 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20921 wcgr = operands[0];
20923 output_asm_insn ("ldr%?\t%0, %1", operands);
20925 operands[0] = wcgr;
20927 output_asm_insn ("tmcr%?\t%0, %1", operands);
20928 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20933 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20935 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20936 named arg and all anonymous args onto the stack.
20937 XXX I know the prologue shouldn't be pushing registers, but it is faster
20941 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20942 enum machine_mode mode,
20945 int second_time ATTRIBUTE_UNUSED)
20949 cfun->machine->uses_anonymous_args = 1;
20950 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20952 nregs = pcum->aapcs_ncrn;
20953 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20957 nregs = pcum->nregs;
20959 if (nregs < NUM_ARG_REGS)
20960 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20963 /* Return nonzero if the CONSUMER instruction (a store) does not need
20964 PRODUCER's value to calculate the address. */
20967 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20969 rtx value = PATTERN (producer);
20970 rtx addr = PATTERN (consumer);
20972 if (GET_CODE (value) == COND_EXEC)
20973 value = COND_EXEC_CODE (value);
20974 if (GET_CODE (value) == PARALLEL)
20975 value = XVECEXP (value, 0, 0);
20976 value = XEXP (value, 0);
20977 if (GET_CODE (addr) == COND_EXEC)
20978 addr = COND_EXEC_CODE (addr);
20979 if (GET_CODE (addr) == PARALLEL)
20980 addr = XVECEXP (addr, 0, 0);
20981 addr = XEXP (addr, 0);
20983 return !reg_overlap_mentioned_p (value, addr);
20986 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20987 have an early register shift value or amount dependency on the
20988 result of PRODUCER. */
20991 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20993 rtx value = PATTERN (producer);
20994 rtx op = PATTERN (consumer);
20997 if (GET_CODE (value) == COND_EXEC)
20998 value = COND_EXEC_CODE (value);
20999 if (GET_CODE (value) == PARALLEL)
21000 value = XVECEXP (value, 0, 0);
21001 value = XEXP (value, 0);
21002 if (GET_CODE (op) == COND_EXEC)
21003 op = COND_EXEC_CODE (op);
21004 if (GET_CODE (op) == PARALLEL)
21005 op = XVECEXP (op, 0, 0);
21008 early_op = XEXP (op, 0);
21009 /* This is either an actual independent shift, or a shift applied to
21010 the first operand of another operation. We want the whole shift
21012 if (GET_CODE (early_op) == REG)
21015 return !reg_overlap_mentioned_p (value, early_op);
21018 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21019 have an early register shift value dependency on the result of
21023 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21025 rtx value = PATTERN (producer);
21026 rtx op = PATTERN (consumer);
21029 if (GET_CODE (value) == COND_EXEC)
21030 value = COND_EXEC_CODE (value);
21031 if (GET_CODE (value) == PARALLEL)
21032 value = XVECEXP (value, 0, 0);
21033 value = XEXP (value, 0);
21034 if (GET_CODE (op) == COND_EXEC)
21035 op = COND_EXEC_CODE (op);
21036 if (GET_CODE (op) == PARALLEL)
21037 op = XVECEXP (op, 0, 0);
21040 early_op = XEXP (op, 0);
21042 /* This is either an actual independent shift, or a shift applied to
21043 the first operand of another operation. We want the value being
21044 shifted, in either case. */
21045 if (GET_CODE (early_op) != REG)
21046 early_op = XEXP (early_op, 0);
21048 return !reg_overlap_mentioned_p (value, early_op);
21051 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21052 have an early register mult dependency on the result of
21056 arm_no_early_mul_dep (rtx producer, rtx consumer)
21058 rtx value = PATTERN (producer);
21059 rtx op = PATTERN (consumer);
21061 if (GET_CODE (value) == COND_EXEC)
21062 value = COND_EXEC_CODE (value);
21063 if (GET_CODE (value) == PARALLEL)
21064 value = XVECEXP (value, 0, 0);
21065 value = XEXP (value, 0);
21066 if (GET_CODE (op) == COND_EXEC)
21067 op = COND_EXEC_CODE (op);
21068 if (GET_CODE (op) == PARALLEL)
21069 op = XVECEXP (op, 0, 0);
21072 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21074 if (GET_CODE (XEXP (op, 0)) == MULT)
21075 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21077 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21083 /* We can't rely on the caller doing the proper promotion when
21084 using APCS or ATPCS. */
21087 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21089 return !TARGET_AAPCS_BASED;
21092 static enum machine_mode
21093 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21094 enum machine_mode mode,
21095 int *punsignedp ATTRIBUTE_UNUSED,
21096 const_tree fntype ATTRIBUTE_UNUSED,
21097 int for_return ATTRIBUTE_UNUSED)
21099 if (GET_MODE_CLASS (mode) == MODE_INT
21100 && GET_MODE_SIZE (mode) < 4)
21106 /* AAPCS based ABIs use short enums by default. */
21109 arm_default_short_enums (void)
21111 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21115 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21118 arm_align_anon_bitfield (void)
21120 return TARGET_AAPCS_BASED;
21124 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21127 arm_cxx_guard_type (void)
21129 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21132 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21133 has an accumulator dependency on the result of the producer (a
21134 multiplication instruction) and no other dependency on that result. */
21136 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21138 rtx mul = PATTERN (producer);
21139 rtx mac = PATTERN (consumer);
21141 rtx mac_op0, mac_op1, mac_acc;
21143 if (GET_CODE (mul) == COND_EXEC)
21144 mul = COND_EXEC_CODE (mul);
21145 if (GET_CODE (mac) == COND_EXEC)
21146 mac = COND_EXEC_CODE (mac);
21148 /* Check that mul is of the form (set (...) (mult ...))
21149 and mla is of the form (set (...) (plus (mult ...) (...))). */
21150 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21151 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21152 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21155 mul_result = XEXP (mul, 0);
21156 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21157 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21158 mac_acc = XEXP (XEXP (mac, 1), 1);
21160 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21161 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21162 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21166 /* The EABI says test the least significant bit of a guard variable. */
21169 arm_cxx_guard_mask_bit (void)
21171 return TARGET_AAPCS_BASED;
21175 /* The EABI specifies that all array cookies are 8 bytes long. */
21178 arm_get_cookie_size (tree type)
21182 if (!TARGET_AAPCS_BASED)
21183 return default_cxx_get_cookie_size (type);
21185 size = build_int_cst (sizetype, 8);
21190 /* The EABI says that array cookies should also contain the element size. */
21193 arm_cookie_has_size (void)
21195 return TARGET_AAPCS_BASED;
21199 /* The EABI says constructors and destructors should return a pointer to
21200 the object constructed/destroyed. */
21203 arm_cxx_cdtor_returns_this (void)
21205 return TARGET_AAPCS_BASED;
21208 /* The EABI says that an inline function may never be the key
21212 arm_cxx_key_method_may_be_inline (void)
21214 return !TARGET_AAPCS_BASED;
21218 arm_cxx_determine_class_data_visibility (tree decl)
21220 if (!TARGET_AAPCS_BASED
21221 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21224 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21225 is exported. However, on systems without dynamic vague linkage,
21226 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21227 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21228 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21230 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21231 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21235 arm_cxx_class_data_always_comdat (void)
21237 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21238 vague linkage if the class has no key function. */
21239 return !TARGET_AAPCS_BASED;
21243 /* The EABI says __aeabi_atexit should be used to register static
21247 arm_cxx_use_aeabi_atexit (void)
21249 return TARGET_AAPCS_BASED;
21254 arm_set_return_address (rtx source, rtx scratch)
21256 arm_stack_offsets *offsets;
21257 HOST_WIDE_INT delta;
21259 unsigned long saved_regs;
21261 offsets = arm_get_frame_offsets ();
21262 saved_regs = offsets->saved_regs_mask;
21264 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21265 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21268 if (frame_pointer_needed)
21269 addr = plus_constant(hard_frame_pointer_rtx, -4);
21272 /* LR will be the first saved register. */
21273 delta = offsets->outgoing_args - (offsets->frame + 4);
21278 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21279 GEN_INT (delta & ~4095)));
21284 addr = stack_pointer_rtx;
21286 addr = plus_constant (addr, delta);
21288 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21294 thumb_set_return_address (rtx source, rtx scratch)
21296 arm_stack_offsets *offsets;
21297 HOST_WIDE_INT delta;
21298 HOST_WIDE_INT limit;
21301 unsigned long mask;
21305 offsets = arm_get_frame_offsets ();
21306 mask = offsets->saved_regs_mask;
21307 if (mask & (1 << LR_REGNUM))
21310 /* Find the saved regs. */
21311 if (frame_pointer_needed)
21313 delta = offsets->soft_frame - offsets->saved_args;
21314 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21320 delta = offsets->outgoing_args - offsets->saved_args;
21323 /* Allow for the stack frame. */
21324 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21326 /* The link register is always the first saved register. */
21329 /* Construct the address. */
21330 addr = gen_rtx_REG (SImode, reg);
21333 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21334 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21338 addr = plus_constant (addr, delta);
21340 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21343 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21346 /* Implements target hook vector_mode_supported_p. */
21348 arm_vector_mode_supported_p (enum machine_mode mode)
21350 /* Neon also supports V2SImode, etc. listed in the clause below. */
21351 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21352 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21355 if ((TARGET_NEON || TARGET_IWMMXT)
21356 && ((mode == V2SImode)
21357 || (mode == V4HImode)
21358 || (mode == V8QImode)))
21364 /* Implements target hook small_register_classes_for_mode_p. */
21366 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21368 return TARGET_THUMB1;
21371 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21372 ARM insns and therefore guarantee that the shift count is modulo 256.
21373 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21374 guarantee no particular behavior for out-of-range counts. */
21376 static unsigned HOST_WIDE_INT
21377 arm_shift_truncation_mask (enum machine_mode mode)
21379 return mode == SImode ? 255 : 0;
21383 /* Map internal gcc register numbers to DWARF2 register numbers. */
21386 arm_dbx_register_number (unsigned int regno)
21391 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21392 compatibility. The EABI defines them as registers 96-103. */
21393 if (IS_FPA_REGNUM (regno))
21394 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21396 if (IS_VFP_REGNUM (regno))
21398 /* See comment in arm_dwarf_register_span. */
21399 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21400 return 64 + regno - FIRST_VFP_REGNUM;
21402 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21405 if (IS_IWMMXT_GR_REGNUM (regno))
21406 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21408 if (IS_IWMMXT_REGNUM (regno))
21409 return 112 + regno - FIRST_IWMMXT_REGNUM;
21411 gcc_unreachable ();
21414 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21415 GCC models tham as 64 32-bit registers, so we need to describe this to
21416 the DWARF generation code. Other registers can use the default. */
21418 arm_dwarf_register_span (rtx rtl)
21425 regno = REGNO (rtl);
21426 if (!IS_VFP_REGNUM (regno))
21429 /* XXX FIXME: The EABI defines two VFP register ranges:
21430 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21432 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21433 corresponding D register. Until GDB supports this, we shall use the
21434 legacy encodings. We also use these encodings for D0-D15 for
21435 compatibility with older debuggers. */
21436 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21439 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21440 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21441 regno = (regno - FIRST_VFP_REGNUM) / 2;
21442 for (i = 0; i < nregs; i++)
21443 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21448 #ifdef TARGET_UNWIND_INFO
21449 /* Emit unwind directives for a store-multiple instruction or stack pointer
21450 push during alignment.
21451 These should only ever be generated by the function prologue code, so
21452 expect them to have a particular form. */
21455 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21458 HOST_WIDE_INT offset;
21459 HOST_WIDE_INT nregs;
21465 e = XVECEXP (p, 0, 0);
21466 if (GET_CODE (e) != SET)
21469 /* First insn will adjust the stack pointer. */
21470 if (GET_CODE (e) != SET
21471 || GET_CODE (XEXP (e, 0)) != REG
21472 || REGNO (XEXP (e, 0)) != SP_REGNUM
21473 || GET_CODE (XEXP (e, 1)) != PLUS)
21476 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21477 nregs = XVECLEN (p, 0) - 1;
21479 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21482 /* The function prologue may also push pc, but not annotate it as it is
21483 never restored. We turn this into a stack pointer adjustment. */
21484 if (nregs * 4 == offset - 4)
21486 fprintf (asm_out_file, "\t.pad #4\n");
21490 fprintf (asm_out_file, "\t.save {");
21492 else if (IS_VFP_REGNUM (reg))
21495 fprintf (asm_out_file, "\t.vsave {");
21497 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21499 /* FPA registers are done differently. */
21500 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21504 /* Unknown register type. */
21507 /* If the stack increment doesn't match the size of the saved registers,
21508 something has gone horribly wrong. */
21509 if (offset != nregs * reg_size)
21514 /* The remaining insns will describe the stores. */
21515 for (i = 1; i <= nregs; i++)
21517 /* Expect (set (mem <addr>) (reg)).
21518 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21519 e = XVECEXP (p, 0, i);
21520 if (GET_CODE (e) != SET
21521 || GET_CODE (XEXP (e, 0)) != MEM
21522 || GET_CODE (XEXP (e, 1)) != REG)
21525 reg = REGNO (XEXP (e, 1));
21530 fprintf (asm_out_file, ", ");
21531 /* We can't use %r for vfp because we need to use the
21532 double precision register names. */
21533 if (IS_VFP_REGNUM (reg))
21534 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21536 asm_fprintf (asm_out_file, "%r", reg);
21538 #ifdef ENABLE_CHECKING
21539 /* Check that the addresses are consecutive. */
21540 e = XEXP (XEXP (e, 0), 0);
21541 if (GET_CODE (e) == PLUS)
21543 offset += reg_size;
21544 if (GET_CODE (XEXP (e, 0)) != REG
21545 || REGNO (XEXP (e, 0)) != SP_REGNUM
21546 || GET_CODE (XEXP (e, 1)) != CONST_INT
21547 || offset != INTVAL (XEXP (e, 1)))
21551 || GET_CODE (e) != REG
21552 || REGNO (e) != SP_REGNUM)
21556 fprintf (asm_out_file, "}\n");
21559 /* Emit unwind directives for a SET. */
21562 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21570 switch (GET_CODE (e0))
21573 /* Pushing a single register. */
21574 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21575 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21576 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21579 asm_fprintf (asm_out_file, "\t.save ");
21580 if (IS_VFP_REGNUM (REGNO (e1)))
21581 asm_fprintf(asm_out_file, "{d%d}\n",
21582 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21584 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21588 if (REGNO (e0) == SP_REGNUM)
21590 /* A stack increment. */
21591 if (GET_CODE (e1) != PLUS
21592 || GET_CODE (XEXP (e1, 0)) != REG
21593 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21594 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21597 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21598 -INTVAL (XEXP (e1, 1)));
21600 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21602 HOST_WIDE_INT offset;
21604 if (GET_CODE (e1) == PLUS)
21606 if (GET_CODE (XEXP (e1, 0)) != REG
21607 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21609 reg = REGNO (XEXP (e1, 0));
21610 offset = INTVAL (XEXP (e1, 1));
21611 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21612 HARD_FRAME_POINTER_REGNUM, reg,
21615 else if (GET_CODE (e1) == REG)
21618 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21619 HARD_FRAME_POINTER_REGNUM, reg);
21624 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21626 /* Move from sp to reg. */
21627 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21629 else if (GET_CODE (e1) == PLUS
21630 && GET_CODE (XEXP (e1, 0)) == REG
21631 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21632 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21634 /* Set reg to offset from sp. */
21635 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21636 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21638 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21640 /* Stack pointer save before alignment. */
21642 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21655 /* Emit unwind directives for the given insn. */
21658 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21662 if (!ARM_EABI_UNWIND_TABLES)
21665 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21666 && (TREE_NOTHROW (current_function_decl)
21667 || crtl->all_throwers_are_sibcalls))
21670 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21673 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21675 pat = XEXP (pat, 0);
21677 pat = PATTERN (insn);
21679 switch (GET_CODE (pat))
21682 arm_unwind_emit_set (asm_out_file, pat);
21686 /* Store multiple. */
21687 arm_unwind_emit_sequence (asm_out_file, pat);
21696 /* Output a reference from a function exception table to the type_info
21697 object X. The EABI specifies that the symbol should be relocated by
21698 an R_ARM_TARGET2 relocation. */
21701 arm_output_ttype (rtx x)
21703 fputs ("\t.word\t", asm_out_file);
21704 output_addr_const (asm_out_file, x);
21705 /* Use special relocations for symbol references. */
21706 if (GET_CODE (x) != CONST_INT)
21707 fputs ("(TARGET2)", asm_out_file);
21708 fputc ('\n', asm_out_file);
21712 #endif /* TARGET_UNWIND_INFO */
21715 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21716 stack alignment. */
21719 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21721 rtx unspec = SET_SRC (pattern);
21722 gcc_assert (GET_CODE (unspec) == UNSPEC);
21726 case UNSPEC_STACK_ALIGN:
21727 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21728 put anything on the stack, so hopefully it won't matter.
21729 CFA = SP will be correct after alignment. */
21730 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21731 SET_DEST (pattern));
21734 gcc_unreachable ();
21739 /* Output unwind directives for the start/end of a function. */
21742 arm_output_fn_unwind (FILE * f, bool prologue)
21744 if (!ARM_EABI_UNWIND_TABLES)
21748 fputs ("\t.fnstart\n", f);
21751 /* If this function will never be unwound, then mark it as such.
21752 The came condition is used in arm_unwind_emit to suppress
21753 the frame annotations. */
21754 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21755 && (TREE_NOTHROW (current_function_decl)
21756 || crtl->all_throwers_are_sibcalls))
21757 fputs("\t.cantunwind\n", f);
21759 fputs ("\t.fnend\n", f);
21764 arm_emit_tls_decoration (FILE *fp, rtx x)
21766 enum tls_reloc reloc;
21769 val = XVECEXP (x, 0, 0);
21770 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21772 output_addr_const (fp, val);
21777 fputs ("(tlsgd)", fp);
21780 fputs ("(tlsldm)", fp);
21783 fputs ("(tlsldo)", fp);
21786 fputs ("(gottpoff)", fp);
21789 fputs ("(tpoff)", fp);
21792 gcc_unreachable ();
21800 fputs (" + (. - ", fp);
21801 output_addr_const (fp, XVECEXP (x, 0, 2));
21803 output_addr_const (fp, XVECEXP (x, 0, 3));
21813 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21816 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21818 gcc_assert (size == 4);
21819 fputs ("\t.word\t", file);
21820 output_addr_const (file, x);
21821 fputs ("(tlsldo)", file);
21825 arm_output_addr_const_extra (FILE *fp, rtx x)
21827 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21828 return arm_emit_tls_decoration (fp, x);
21829 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21832 int labelno = INTVAL (XVECEXP (x, 0, 0));
21834 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21835 assemble_name_raw (fp, label);
21839 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21841 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21845 output_addr_const (fp, XVECEXP (x, 0, 0));
21849 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21851 output_addr_const (fp, XVECEXP (x, 0, 0));
21855 output_addr_const (fp, XVECEXP (x, 0, 1));
21859 else if (GET_CODE (x) == CONST_VECTOR)
21860 return arm_emit_vector_const (fp, x);
21865 /* Output assembly for a shift instruction.
21866 SET_FLAGS determines how the instruction modifies the condition codes.
21867 0 - Do not set condition codes.
21868 1 - Set condition codes.
21869 2 - Use smallest instruction. */
21871 arm_output_shift(rtx * operands, int set_flags)
21874 static const char flag_chars[3] = {'?', '.', '!'};
21879 c = flag_chars[set_flags];
21880 if (TARGET_UNIFIED_ASM)
21882 shift = shift_op(operands[3], &val);
21886 operands[2] = GEN_INT(val);
21887 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21890 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21893 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21894 output_asm_insn (pattern, operands);
21898 /* Output a Thumb-1 casesi dispatch sequence. */
21900 thumb1_output_casesi (rtx *operands)
21902 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21904 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21906 switch (GET_MODE(diff_vec))
21909 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21910 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21912 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21913 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21915 return "bl\t%___gnu_thumb1_case_si";
21917 gcc_unreachable ();
21921 /* Output a Thumb-2 casesi instruction. */
21923 thumb2_output_casesi (rtx *operands)
21925 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21927 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21929 output_asm_insn ("cmp\t%0, %1", operands);
21930 output_asm_insn ("bhi\t%l3", operands);
21931 switch (GET_MODE(diff_vec))
21934 return "tbb\t[%|pc, %0]";
21936 return "tbh\t[%|pc, %0, lsl #1]";
21940 output_asm_insn ("adr\t%4, %l2", operands);
21941 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21942 output_asm_insn ("add\t%4, %4, %5", operands);
21947 output_asm_insn ("adr\t%4, %l2", operands);
21948 return "ldr\t%|pc, [%4, %0, lsl #2]";
21951 gcc_unreachable ();
21955 /* Most ARM cores are single issue, but some newer ones can dual issue.
21956 The scheduler descriptions rely on this being correct. */
21958 arm_issue_rate (void)
21973 /* A table and a function to perform ARM-specific name mangling for
21974 NEON vector types in order to conform to the AAPCS (see "Procedure
21975 Call Standard for the ARM Architecture", Appendix A). To qualify
21976 for emission with the mangled names defined in that document, a
21977 vector type must not only be of the correct mode but also be
21978 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21981 enum machine_mode mode;
21982 const char *element_type_name;
21983 const char *aapcs_name;
21984 } arm_mangle_map_entry;
21986 static arm_mangle_map_entry arm_mangle_map[] = {
21987 /* 64-bit containerized types. */
21988 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21989 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21990 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21991 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21992 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21993 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21994 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21995 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21996 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21997 /* 128-bit containerized types. */
21998 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21999 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22000 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22001 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22002 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22003 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22004 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22005 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22006 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22007 { VOIDmode, NULL, NULL }
22011 arm_mangle_type (const_tree type)
22013 arm_mangle_map_entry *pos = arm_mangle_map;
22015 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22016 has to be managled as if it is in the "std" namespace. */
22017 if (TARGET_AAPCS_BASED
22018 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22020 static bool warned;
22021 if (!warned && warn_psabi && !in_system_header)
22024 inform (input_location,
22025 "the mangling of %<va_list%> has changed in GCC 4.4");
22027 return "St9__va_list";
22030 /* Half-precision float. */
22031 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22034 if (TREE_CODE (type) != VECTOR_TYPE)
22037 /* Check the mode of the vector type, and the name of the vector
22038 element type, against the table. */
22039 while (pos->mode != VOIDmode)
22041 tree elt_type = TREE_TYPE (type);
22043 if (pos->mode == TYPE_MODE (type)
22044 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22045 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22046 pos->element_type_name))
22047 return pos->aapcs_name;
22052 /* Use the default mangling for unrecognized (possibly user-defined)
22057 /* Order of allocation of core registers for Thumb: this allocation is
22058 written over the corresponding initial entries of the array
22059 initialized with REG_ALLOC_ORDER. We allocate all low registers
22060 first. Saving and restoring a low register is usually cheaper than
22061 using a call-clobbered high register. */
22063 static const int thumb_core_reg_alloc_order[] =
22065 3, 2, 1, 0, 4, 5, 6, 7,
22066 14, 12, 8, 9, 10, 11, 13, 15
22069 /* Adjust register allocation order when compiling for Thumb. */
22072 arm_order_regs_for_local_alloc (void)
22074 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22075 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22077 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22078 sizeof (thumb_core_reg_alloc_order));
22081 /* Set default optimization options. */
22083 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22085 /* Enable section anchors by default at -O1 or higher.
22086 Use 2 to distinguish from an explicit -fsection-anchors
22087 given on the command line. */
22089 flag_section_anchors = 2;
22092 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22095 arm_frame_pointer_required (void)
22097 return (cfun->has_nonlocal_label
22098 || SUBTARGET_FRAME_POINTER_REQUIRED
22099 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22102 /* Only thumb1 can't support conditional execution, so return true if
22103 the target is not thumb1. */
22105 arm_have_conditional_execution (void)
22107 return !TARGET_THUMB1;
22110 #include "gt-arm.h"