1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static void arm_print_operand (FILE *, rtx, int);
88 static void arm_print_operand_address (FILE *, rtx);
89 static bool arm_print_operand_punct_valid_p (unsigned char code);
90 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
91 static arm_cc get_arm_condition_code (rtx);
92 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
93 static rtx is_jump_table (rtx);
94 static const char *output_multi_immediate (rtx *, const char *, const char *,
96 static const char *shift_op (rtx, HOST_WIDE_INT *);
97 static struct machine_function *arm_init_machine_status (void);
98 static void thumb_exit (FILE *, int);
99 static rtx is_jump_table (rtx);
100 static HOST_WIDE_INT get_jump_table_size (rtx);
101 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
102 static Mnode *add_minipool_forward_ref (Mfix *);
103 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_backward_ref (Mfix *);
105 static void assign_minipool_offsets (Mfix *);
106 static void arm_print_value (FILE *, rtx);
107 static void dump_minipool (rtx);
108 static int arm_barrier_cost (rtx);
109 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
110 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
111 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
113 static void arm_reorg (void);
114 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
115 static unsigned long arm_compute_save_reg0_reg12_mask (void);
116 static unsigned long arm_compute_save_reg_mask (void);
117 static unsigned long arm_isr_value (tree);
118 static unsigned long arm_compute_func_type (void);
119 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
120 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
122 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
123 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
125 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
126 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
127 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static int arm_comp_type_attributes (const_tree, const_tree);
129 static void arm_set_default_type_attributes (tree);
130 static int arm_adjust_cost (rtx, rtx, rtx, int);
131 static int count_insns_for_constant (HOST_WIDE_INT, int);
132 static int arm_get_strip_length (int);
133 static bool arm_function_ok_for_sibcall (tree, tree);
134 static enum machine_mode arm_promote_function_mode (const_tree,
135 enum machine_mode, int *,
137 static bool arm_return_in_memory (const_tree, const_tree);
138 static rtx arm_function_value (const_tree, const_tree, bool);
139 static rtx arm_libcall_value (enum machine_mode, const_rtx);
141 static void arm_internal_label (FILE *, const char *, unsigned long);
142 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
144 static bool arm_have_conditional_execution (void);
145 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
146 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
147 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_rtx_costs (rtx, int, int, int *, bool);
152 static int arm_address_cost (rtx, bool);
153 static bool arm_memory_load_p (rtx);
154 static bool arm_cirrus_insn_p (rtx);
155 static void cirrus_reorg (rtx);
156 static void arm_init_builtins (void);
157 static void arm_init_iwmmxt_builtins (void);
158 static rtx safe_vector_operand (rtx, enum machine_mode);
159 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
160 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
161 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
162 static void emit_constant_insn (rtx cond, rtx pattern);
163 static rtx emit_set_insn (rtx, rtx);
164 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
166 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
168 static int aapcs_select_return_coproc (const_tree, const_tree);
170 #ifdef OBJECT_FORMAT_ELF
171 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
175 static void arm_encode_section_info (tree, rtx, int);
178 static void arm_file_end (void);
179 static void arm_file_start (void);
181 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
183 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
184 enum machine_mode, const_tree, bool);
185 static bool arm_promote_prototypes (const_tree);
186 static bool arm_default_short_enums (void);
187 static bool arm_align_anon_bitfield (void);
188 static bool arm_return_in_msb (const_tree);
189 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
190 static bool arm_return_in_memory (const_tree, const_tree);
191 #ifdef TARGET_UNWIND_INFO
192 static void arm_unwind_emit (FILE *, rtx);
193 static bool arm_output_ttype (rtx);
195 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
196 static rtx arm_dwarf_register_span (rtx);
198 static tree arm_cxx_guard_type (void);
199 static bool arm_cxx_guard_mask_bit (void);
200 static tree arm_get_cookie_size (tree);
201 static bool arm_cookie_has_size (void);
202 static bool arm_cxx_cdtor_returns_this (void);
203 static bool arm_cxx_key_method_may_be_inline (void);
204 static void arm_cxx_determine_class_data_visibility (tree);
205 static bool arm_cxx_class_data_always_comdat (void);
206 static bool arm_cxx_use_aeabi_atexit (void);
207 static void arm_init_libfuncs (void);
208 static tree arm_build_builtin_va_list (void);
209 static void arm_expand_builtin_va_start (tree, rtx);
210 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
211 static bool arm_handle_option (size_t, const char *, int);
212 static void arm_target_help (void);
213 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
214 static bool arm_cannot_copy_insn_p (rtx);
215 static bool arm_tls_symbol_p (rtx x);
216 static int arm_issue_rate (void);
217 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
218 static bool arm_allocate_stack_slots_for_args (void);
219 static const char *arm_invalid_parameter_type (const_tree t);
220 static const char *arm_invalid_return_type (const_tree t);
221 static tree arm_promoted_type (const_tree t);
222 static tree arm_convert_to_type (tree type, tree expr);
223 static bool arm_scalar_mode_supported_p (enum machine_mode);
224 static bool arm_frame_pointer_required (void);
225 static bool arm_can_eliminate (const int, const int);
226 static void arm_asm_trampoline_template (FILE *);
227 static void arm_trampoline_init (rtx, tree, rtx);
228 static rtx arm_trampoline_adjust_address (rtx);
229 static rtx arm_pic_static_addr (rtx orig, rtx reg);
232 /* Table of machine attributes. */
233 static const struct attribute_spec arm_attribute_table[] =
235 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
236 /* Function calls made to this symbol must be done indirectly, because
237 it may lie outside of the 26 bit addressing range of a normal function
239 { "long_call", 0, 0, false, true, true, NULL },
240 /* Whereas these functions are always known to reside within the 26 bit
242 { "short_call", 0, 0, false, true, true, NULL },
243 /* Specify the procedure call conventions for a function. */
244 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
245 /* Interrupt Service Routines have special prologue and epilogue requirements. */
246 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
247 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
248 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
250 /* ARM/PE has three new attributes:
252 dllexport - for exporting a function/variable that will live in a dll
253 dllimport - for importing a function/variable from a dll
255 Microsoft allows multiple declspecs in one __declspec, separating
256 them with spaces. We do NOT support this. Instead, use __declspec
259 { "dllimport", 0, 0, true, false, false, NULL },
260 { "dllexport", 0, 0, true, false, false, NULL },
261 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
262 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
263 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
264 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
265 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
267 { NULL, 0, 0, false, false, false, NULL }
270 /* Initialize the GCC target structure. */
271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
272 #undef TARGET_MERGE_DECL_ATTRIBUTES
273 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
282 #undef TARGET_ASM_FILE_START
283 #define TARGET_ASM_FILE_START arm_file_start
284 #undef TARGET_ASM_FILE_END
285 #define TARGET_ASM_FILE_END arm_file_end
287 #undef TARGET_ASM_ALIGNED_SI_OP
288 #define TARGET_ASM_ALIGNED_SI_OP NULL
289 #undef TARGET_ASM_INTEGER
290 #define TARGET_ASM_INTEGER arm_assemble_integer
292 #undef TARGET_PRINT_OPERAND
293 #define TARGET_PRINT_OPERAND arm_print_operand
294 #undef TARGET_PRINT_OPERAND_ADDRESS
295 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
296 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
297 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
299 #undef TARGET_ASM_FUNCTION_PROLOGUE
300 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
302 #undef TARGET_ASM_FUNCTION_EPILOGUE
303 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION arm_handle_option
310 #define TARGET_HELP arm_target_help
312 #undef TARGET_COMP_TYPE_ATTRIBUTES
313 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
315 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
316 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
318 #undef TARGET_SCHED_ADJUST_COST
319 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
321 #undef TARGET_ENCODE_SECTION_INFO
323 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
325 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
328 #undef TARGET_STRIP_NAME_ENCODING
329 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
331 #undef TARGET_ASM_INTERNAL_LABEL
332 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
334 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
335 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
337 #undef TARGET_FUNCTION_VALUE
338 #define TARGET_FUNCTION_VALUE arm_function_value
340 #undef TARGET_LIBCALL_VALUE
341 #define TARGET_LIBCALL_VALUE arm_libcall_value
343 #undef TARGET_ASM_OUTPUT_MI_THUNK
344 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
345 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
346 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
348 #undef TARGET_RTX_COSTS
349 #define TARGET_RTX_COSTS arm_rtx_costs
350 #undef TARGET_ADDRESS_COST
351 #define TARGET_ADDRESS_COST arm_address_cost
353 #undef TARGET_SHIFT_TRUNCATION_MASK
354 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
355 #undef TARGET_VECTOR_MODE_SUPPORTED_P
356 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
358 #undef TARGET_MACHINE_DEPENDENT_REORG
359 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
361 #undef TARGET_INIT_BUILTINS
362 #define TARGET_INIT_BUILTINS arm_init_builtins
363 #undef TARGET_EXPAND_BUILTIN
364 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
366 #undef TARGET_INIT_LIBFUNCS
367 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
369 #undef TARGET_PROMOTE_FUNCTION_MODE
370 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
371 #undef TARGET_PROMOTE_PROTOTYPES
372 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
373 #undef TARGET_PASS_BY_REFERENCE
374 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
375 #undef TARGET_ARG_PARTIAL_BYTES
376 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
378 #undef TARGET_SETUP_INCOMING_VARARGS
379 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
381 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
382 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
384 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
386 #undef TARGET_TRAMPOLINE_INIT
387 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
388 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
391 #undef TARGET_DEFAULT_SHORT_ENUMS
392 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
394 #undef TARGET_ALIGN_ANON_BITFIELD
395 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
397 #undef TARGET_NARROW_VOLATILE_BITFIELD
398 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
400 #undef TARGET_CXX_GUARD_TYPE
401 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
403 #undef TARGET_CXX_GUARD_MASK_BIT
404 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
406 #undef TARGET_CXX_GET_COOKIE_SIZE
407 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
409 #undef TARGET_CXX_COOKIE_HAS_SIZE
410 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
412 #undef TARGET_CXX_CDTOR_RETURNS_THIS
413 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
415 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
416 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
418 #undef TARGET_CXX_USE_AEABI_ATEXIT
419 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
421 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
422 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
423 arm_cxx_determine_class_data_visibility
425 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
426 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
428 #undef TARGET_RETURN_IN_MSB
429 #define TARGET_RETURN_IN_MSB arm_return_in_msb
431 #undef TARGET_RETURN_IN_MEMORY
432 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
434 #undef TARGET_MUST_PASS_IN_STACK
435 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
437 #ifdef TARGET_UNWIND_INFO
438 #undef TARGET_ASM_UNWIND_EMIT
439 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
441 /* EABI unwinding tables use a different format for the typeinfo tables. */
442 #undef TARGET_ASM_TTYPE
443 #define TARGET_ASM_TTYPE arm_output_ttype
445 #undef TARGET_ARM_EABI_UNWINDER
446 #define TARGET_ARM_EABI_UNWINDER true
447 #endif /* TARGET_UNWIND_INFO */
449 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
450 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
452 #undef TARGET_DWARF_REGISTER_SPAN
453 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
455 #undef TARGET_CANNOT_COPY_INSN_P
456 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
459 #undef TARGET_HAVE_TLS
460 #define TARGET_HAVE_TLS true
463 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
464 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
466 #undef TARGET_CANNOT_FORCE_CONST_MEM
467 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
469 #undef TARGET_MAX_ANCHOR_OFFSET
470 #define TARGET_MAX_ANCHOR_OFFSET 4095
472 /* The minimum is set such that the total size of the block
473 for a particular anchor is -4088 + 1 + 4095 bytes, which is
474 divisible by eight, ensuring natural spacing of anchors. */
475 #undef TARGET_MIN_ANCHOR_OFFSET
476 #define TARGET_MIN_ANCHOR_OFFSET -4088
478 #undef TARGET_SCHED_ISSUE_RATE
479 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
481 #undef TARGET_MANGLE_TYPE
482 #define TARGET_MANGLE_TYPE arm_mangle_type
484 #undef TARGET_BUILD_BUILTIN_VA_LIST
485 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
486 #undef TARGET_EXPAND_BUILTIN_VA_START
487 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
488 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
489 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
492 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
493 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
496 #undef TARGET_LEGITIMATE_ADDRESS_P
497 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
499 #undef TARGET_INVALID_PARAMETER_TYPE
500 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
502 #undef TARGET_INVALID_RETURN_TYPE
503 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
505 #undef TARGET_PROMOTED_TYPE
506 #define TARGET_PROMOTED_TYPE arm_promoted_type
508 #undef TARGET_CONVERT_TO_TYPE
509 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
511 #undef TARGET_SCALAR_MODE_SUPPORTED_P
512 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
514 #undef TARGET_FRAME_POINTER_REQUIRED
515 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
517 #undef TARGET_CAN_ELIMINATE
518 #define TARGET_CAN_ELIMINATE arm_can_eliminate
520 struct gcc_target targetm = TARGET_INITIALIZER;
522 /* Obstack for minipool constant handling. */
523 static struct obstack minipool_obstack;
524 static char * minipool_startobj;
526 /* The maximum number of insns skipped which
527 will be conditionalised if possible. */
528 static int max_insns_skipped = 5;
530 extern FILE * asm_out_file;
532 /* True if we are currently building a constant table. */
533 int making_const_table;
535 /* The processor for which instructions should be scheduled. */
536 enum processor_type arm_tune = arm_none;
538 /* The current tuning set. */
539 const struct tune_params *current_tune;
541 /* Which floating point hardware to schedule for. */
544 /* Which floating popint hardware to use. */
545 const struct arm_fpu_desc *arm_fpu_desc;
547 /* Whether to use floating point hardware. */
548 enum float_abi_type arm_float_abi;
550 /* Which __fp16 format to use. */
551 enum arm_fp16_format_type arm_fp16_format;
553 /* Which ABI to use. */
554 enum arm_abi_type arm_abi;
556 /* Which thread pointer model to use. */
557 enum arm_tp_type target_thread_pointer = TP_AUTO;
559 /* Used to parse -mstructure_size_boundary command line option. */
560 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
562 /* Used for Thumb call_via trampolines. */
563 rtx thumb_call_via_label[14];
564 static int thumb_call_reg_needed;
566 /* Bit values used to identify processor capabilities. */
567 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
568 #define FL_ARCH3M (1 << 1) /* Extended multiply */
569 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
570 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
571 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
572 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
573 #define FL_THUMB (1 << 6) /* Thumb aware */
574 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
575 #define FL_STRONG (1 << 8) /* StrongARM */
576 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
577 #define FL_XSCALE (1 << 10) /* XScale */
578 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
579 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
580 media instructions. */
581 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
582 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
583 Note: ARM6 & 7 derivatives only. */
584 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
585 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
586 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
588 #define FL_DIV (1 << 18) /* Hardware divide. */
589 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
590 #define FL_NEON (1 << 20) /* Neon instructions. */
591 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
594 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
596 /* Flags that only effect tuning, not available instructions. */
597 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
600 #define FL_FOR_ARCH2 FL_NOTM
601 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
602 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
603 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
604 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
605 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
606 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
607 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
608 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
609 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
610 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
611 #define FL_FOR_ARCH6J FL_FOR_ARCH6
612 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
613 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
614 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
615 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
616 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
617 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
618 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
619 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
620 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
621 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
623 /* The bits in this mask specify which
624 instructions we are allowed to generate. */
625 static unsigned long insn_flags = 0;
627 /* The bits in this mask specify which instruction scheduling options should
629 static unsigned long tune_flags = 0;
631 /* The following are used in the arm.md file as equivalents to bits
632 in the above two flag variables. */
634 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
637 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
640 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
643 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
646 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
649 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
652 /* Nonzero if this chip supports the ARM 6K extensions. */
655 /* Nonzero if instructions not present in the 'M' profile can be used. */
656 int arm_arch_notm = 0;
658 /* Nonzero if instructions present in ARMv7E-M can be used. */
661 /* Nonzero if this chip can benefit from load scheduling. */
662 int arm_ld_sched = 0;
664 /* Nonzero if this chip is a StrongARM. */
665 int arm_tune_strongarm = 0;
667 /* Nonzero if this chip is a Cirrus variant. */
668 int arm_arch_cirrus = 0;
670 /* Nonzero if this chip supports Intel Wireless MMX technology. */
671 int arm_arch_iwmmxt = 0;
673 /* Nonzero if this chip is an XScale. */
674 int arm_arch_xscale = 0;
676 /* Nonzero if tuning for XScale */
677 int arm_tune_xscale = 0;
679 /* Nonzero if we want to tune for stores that access the write-buffer.
680 This typically means an ARM6 or ARM7 with MMU or MPU. */
681 int arm_tune_wbuf = 0;
683 /* Nonzero if tuning for Cortex-A9. */
684 int arm_tune_cortex_a9 = 0;
686 /* Nonzero if generating Thumb instructions. */
689 /* Nonzero if we should define __THUMB_INTERWORK__ in the
691 XXX This is a bit of a hack, it's intended to help work around
692 problems in GLD which doesn't understand that armv5t code is
693 interworking clean. */
694 int arm_cpp_interwork = 0;
696 /* Nonzero if chip supports Thumb 2. */
699 /* Nonzero if chip supports integer division instruction. */
702 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
703 we must report the mode of the memory reference from
704 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
705 enum machine_mode output_memory_reference_mode;
707 /* The register number to be used for the PIC offset register. */
708 unsigned arm_pic_register = INVALID_REGNUM;
710 /* Set to 1 after arm_reorg has started. Reset to start at the start of
711 the next function. */
712 static int after_arm_reorg = 0;
714 static enum arm_pcs arm_pcs_default;
716 /* For an explanation of these variables, see final_prescan_insn below. */
718 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
719 enum arm_cond_code arm_current_cc;
721 int arm_target_label;
722 /* The number of conditionally executed insns, including the current insn. */
723 int arm_condexec_count = 0;
724 /* A bitmask specifying the patterns for the IT block.
725 Zero means do not output an IT block before this insn. */
726 int arm_condexec_mask = 0;
727 /* The number of bits used in arm_condexec_mask. */
728 int arm_condexec_masklen = 0;
730 /* The condition codes of the ARM, and the inverse function. */
731 static const char * const arm_condition_codes[] =
733 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
734 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
737 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
738 #define streq(string1, string2) (strcmp (string1, string2) == 0)
740 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
741 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
742 | (1 << PIC_OFFSET_TABLE_REGNUM)))
744 /* Initialization code. */
748 const char *const name;
749 enum processor_type core;
751 const unsigned long flags;
752 const struct tune_params *const tune;
755 const struct tune_params arm_slowmul_tune =
757 arm_slowmul_rtx_costs,
761 const struct tune_params arm_fastmul_tune =
763 arm_fastmul_rtx_costs,
767 const struct tune_params arm_xscale_tune =
769 arm_xscale_rtx_costs,
773 const struct tune_params arm_9e_tune =
779 /* Not all of these give usefully different compilation alternatives,
780 but there is no simple way of generalizing them. */
781 static const struct processors all_cores[] =
784 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
785 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
786 #include "arm-cores.def"
788 {NULL, arm_none, NULL, 0, NULL}
791 static const struct processors all_architectures[] =
793 /* ARM Architectures */
794 /* We don't specify tuning costs here as it will be figured out
797 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
798 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
799 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
800 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
801 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
802 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
803 implementations that support it, so we will leave it out for now. */
804 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
805 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
806 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
807 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
808 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
809 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
810 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
811 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
812 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
813 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
814 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
815 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
816 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
817 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
818 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
819 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
820 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
821 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
822 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
823 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
824 {NULL, arm_none, NULL, 0 , NULL}
828 /* These are populated as commandline arguments are processed, or NULL
830 static const struct processors *arm_selected_arch;
831 static const struct processors *arm_selected_cpu;
832 static const struct processors *arm_selected_tune;
834 /* The name of the preprocessor macro to define for this architecture. */
836 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
838 /* Available values for -mfpu=. */
840 static const struct arm_fpu_desc all_fpus[] =
842 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
843 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
844 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
845 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
846 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
847 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
848 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
849 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
850 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
851 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
852 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
853 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
854 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
855 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
856 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
857 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
858 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
859 /* Compatibility aliases. */
860 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
867 enum float_abi_type abi_type;
871 /* Available values for -mfloat-abi=. */
873 static const struct float_abi all_float_abis[] =
875 {"soft", ARM_FLOAT_ABI_SOFT},
876 {"softfp", ARM_FLOAT_ABI_SOFTFP},
877 {"hard", ARM_FLOAT_ABI_HARD}
884 enum arm_fp16_format_type fp16_format_type;
888 /* Available values for -mfp16-format=. */
890 static const struct fp16_format all_fp16_formats[] =
892 {"none", ARM_FP16_FORMAT_NONE},
893 {"ieee", ARM_FP16_FORMAT_IEEE},
894 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
901 enum arm_abi_type abi_type;
905 /* Available values for -mabi=. */
907 static const struct abi_name arm_all_abis[] =
909 {"apcs-gnu", ARM_ABI_APCS},
910 {"atpcs", ARM_ABI_ATPCS},
911 {"aapcs", ARM_ABI_AAPCS},
912 {"iwmmxt", ARM_ABI_IWMMXT},
913 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
916 /* Supported TLS relocations. */
926 /* The maximum number of insns to be used when loading a constant. */
928 arm_constant_limit (bool size_p)
930 return size_p ? 1 : current_tune->constant_limit;
933 /* Emit an insn that's a simple single-set. Both the operands must be known
936 emit_set_insn (rtx x, rtx y)
938 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
941 /* Return the number of bits set in VALUE. */
943 bit_count (unsigned long value)
945 unsigned long count = 0;
950 value &= value - 1; /* Clear the least-significant set bit. */
956 /* Set up library functions unique to ARM. */
959 arm_init_libfuncs (void)
961 /* There are no special library functions unless we are using the
966 /* The functions below are described in Section 4 of the "Run-Time
967 ABI for the ARM architecture", Version 1.0. */
969 /* Double-precision floating-point arithmetic. Table 2. */
970 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
971 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
972 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
973 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
974 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
976 /* Double-precision comparisons. Table 3. */
977 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
978 set_optab_libfunc (ne_optab, DFmode, NULL);
979 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
980 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
981 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
982 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
983 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
985 /* Single-precision floating-point arithmetic. Table 4. */
986 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
987 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
988 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
989 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
990 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
992 /* Single-precision comparisons. Table 5. */
993 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
994 set_optab_libfunc (ne_optab, SFmode, NULL);
995 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
996 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
997 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
998 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
999 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1001 /* Floating-point to integer conversions. Table 6. */
1002 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1003 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1004 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1005 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1006 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1007 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1008 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1009 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1011 /* Conversions between floating types. Table 7. */
1012 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1013 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1015 /* Integer to floating-point conversions. Table 8. */
1016 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1017 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1018 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1019 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1020 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1021 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1022 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1023 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1025 /* Long long. Table 9. */
1026 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1027 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1028 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1029 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1030 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1031 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1032 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1033 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1035 /* Integer (32/32->32) division. \S 4.3.1. */
1036 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1037 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1039 /* The divmod functions are designed so that they can be used for
1040 plain division, even though they return both the quotient and the
1041 remainder. The quotient is returned in the usual location (i.e.,
1042 r0 for SImode, {r0, r1} for DImode), just as would be expected
1043 for an ordinary division routine. Because the AAPCS calling
1044 conventions specify that all of { r0, r1, r2, r3 } are
1045 callee-saved registers, there is no need to tell the compiler
1046 explicitly that those registers are clobbered by these
1048 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1049 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1051 /* For SImode division the ABI provides div-without-mod routines,
1052 which are faster. */
1053 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1054 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1056 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1057 divmod libcalls instead. */
1058 set_optab_libfunc (smod_optab, DImode, NULL);
1059 set_optab_libfunc (umod_optab, DImode, NULL);
1060 set_optab_libfunc (smod_optab, SImode, NULL);
1061 set_optab_libfunc (umod_optab, SImode, NULL);
1063 /* Half-precision float operations. The compiler handles all operations
1064 with NULL libfuncs by converting the SFmode. */
1065 switch (arm_fp16_format)
1067 case ARM_FP16_FORMAT_IEEE:
1068 case ARM_FP16_FORMAT_ALTERNATIVE:
1071 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1072 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1074 : "__gnu_f2h_alternative"));
1075 set_conv_libfunc (sext_optab, SFmode, HFmode,
1076 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1078 : "__gnu_h2f_alternative"));
1081 set_optab_libfunc (add_optab, HFmode, NULL);
1082 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1083 set_optab_libfunc (smul_optab, HFmode, NULL);
1084 set_optab_libfunc (neg_optab, HFmode, NULL);
1085 set_optab_libfunc (sub_optab, HFmode, NULL);
1088 set_optab_libfunc (eq_optab, HFmode, NULL);
1089 set_optab_libfunc (ne_optab, HFmode, NULL);
1090 set_optab_libfunc (lt_optab, HFmode, NULL);
1091 set_optab_libfunc (le_optab, HFmode, NULL);
1092 set_optab_libfunc (ge_optab, HFmode, NULL);
1093 set_optab_libfunc (gt_optab, HFmode, NULL);
1094 set_optab_libfunc (unord_optab, HFmode, NULL);
1101 if (TARGET_AAPCS_BASED)
1102 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1105 /* On AAPCS systems, this is the "struct __va_list". */
1106 static GTY(()) tree va_list_type;
1108 /* Return the type to use as __builtin_va_list. */
1110 arm_build_builtin_va_list (void)
1115 if (!TARGET_AAPCS_BASED)
1116 return std_build_builtin_va_list ();
1118 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1126 The C Library ABI further reinforces this definition in \S
1129 We must follow this definition exactly. The structure tag
1130 name is visible in C++ mangled names, and thus forms a part
1131 of the ABI. The field name may be used by people who
1132 #include <stdarg.h>. */
1133 /* Create the type. */
1134 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1135 /* Give it the required name. */
1136 va_list_name = build_decl (BUILTINS_LOCATION,
1138 get_identifier ("__va_list"),
1140 DECL_ARTIFICIAL (va_list_name) = 1;
1141 TYPE_NAME (va_list_type) = va_list_name;
1142 /* Create the __ap field. */
1143 ap_field = build_decl (BUILTINS_LOCATION,
1145 get_identifier ("__ap"),
1147 DECL_ARTIFICIAL (ap_field) = 1;
1148 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1149 TYPE_FIELDS (va_list_type) = ap_field;
1150 /* Compute its layout. */
1151 layout_type (va_list_type);
1153 return va_list_type;
1156 /* Return an expression of type "void *" pointing to the next
1157 available argument in a variable-argument list. VALIST is the
1158 user-level va_list object, of type __builtin_va_list. */
1160 arm_extract_valist_ptr (tree valist)
1162 if (TREE_TYPE (valist) == error_mark_node)
1163 return error_mark_node;
1165 /* On an AAPCS target, the pointer is stored within "struct
1167 if (TARGET_AAPCS_BASED)
1169 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1170 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1171 valist, ap_field, NULL_TREE);
1177 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1179 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1181 valist = arm_extract_valist_ptr (valist);
1182 std_expand_builtin_va_start (valist, nextarg);
1185 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1187 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1190 valist = arm_extract_valist_ptr (valist);
1191 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1194 /* Lookup NAME in SEL. */
1196 static const struct processors *
1197 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1199 if (!(name && *name))
1202 for (; sel->name != NULL; sel++)
1204 if (streq (name, sel->name))
1208 error ("bad value (%s) for %s switch", name, desc);
1212 /* Implement TARGET_HANDLE_OPTION. */
1215 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1220 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1224 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1227 case OPT_mhard_float:
1228 target_float_abi_name = "hard";
1231 case OPT_msoft_float:
1232 target_float_abi_name = "soft";
1236 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1245 arm_target_help (void)
1248 static int columns = 0;
1251 /* If we have not done so already, obtain the desired maximum width of
1252 the output. Note - this is a duplication of the code at the start of
1253 gcc/opts.c:print_specific_help() - the two copies should probably be
1254 replaced by a single function. */
1259 GET_ENVIRONMENT (p, "COLUMNS");
1262 int value = atoi (p);
1269 /* Use a reasonable default. */
1273 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1275 /* The - 2 is because we know that the last entry in the array is NULL. */
1276 i = ARRAY_SIZE (all_cores) - 2;
1278 printf (" %s", all_cores[i].name);
1279 remaining = columns - (strlen (all_cores[i].name) + 4);
1280 gcc_assert (remaining >= 0);
1284 int len = strlen (all_cores[i].name);
1286 if (remaining > len + 2)
1288 printf (", %s", all_cores[i].name);
1289 remaining -= len + 2;
1295 printf ("\n %s", all_cores[i].name);
1296 remaining = columns - (len + 4);
1300 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1302 i = ARRAY_SIZE (all_architectures) - 2;
1305 printf (" %s", all_architectures[i].name);
1306 remaining = columns - (strlen (all_architectures[i].name) + 4);
1307 gcc_assert (remaining >= 0);
1311 int len = strlen (all_architectures[i].name);
1313 if (remaining > len + 2)
1315 printf (", %s", all_architectures[i].name);
1316 remaining -= len + 2;
1322 printf ("\n %s", all_architectures[i].name);
1323 remaining = columns - (len + 4);
1330 /* Fix up any incompatible options that the user has specified.
1331 This has now turned into a maze. */
1333 arm_override_options (void)
1337 if (arm_selected_arch)
1339 if (arm_selected_cpu)
1341 /* Check for conflict between mcpu and march. */
1342 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1344 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1345 arm_selected_cpu->name, arm_selected_arch->name);
1346 /* -march wins for code generation.
1347 -mcpu wins for default tuning. */
1348 if (!arm_selected_tune)
1349 arm_selected_tune = arm_selected_cpu;
1351 arm_selected_cpu = arm_selected_arch;
1355 arm_selected_arch = NULL;
1358 /* Pick a CPU based on the architecture. */
1359 arm_selected_cpu = arm_selected_arch;
1362 /* If the user did not specify a processor, choose one for them. */
1363 if (!arm_selected_cpu)
1365 const struct processors * sel;
1366 unsigned int sought;
1368 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1369 if (!arm_selected_cpu->name)
1371 #ifdef SUBTARGET_CPU_DEFAULT
1372 /* Use the subtarget default CPU if none was specified by
1374 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1376 /* Default to ARM6. */
1377 if (arm_selected_cpu->name)
1378 arm_selected_cpu = &all_cores[arm6];
1381 sel = arm_selected_cpu;
1382 insn_flags = sel->flags;
1384 /* Now check to see if the user has specified some command line
1385 switch that require certain abilities from the cpu. */
1388 if (TARGET_INTERWORK || TARGET_THUMB)
1390 sought |= (FL_THUMB | FL_MODE32);
1392 /* There are no ARM processors that support both APCS-26 and
1393 interworking. Therefore we force FL_MODE26 to be removed
1394 from insn_flags here (if it was set), so that the search
1395 below will always be able to find a compatible processor. */
1396 insn_flags &= ~FL_MODE26;
1399 if (sought != 0 && ((sought & insn_flags) != sought))
1401 /* Try to locate a CPU type that supports all of the abilities
1402 of the default CPU, plus the extra abilities requested by
1404 for (sel = all_cores; sel->name != NULL; sel++)
1405 if ((sel->flags & sought) == (sought | insn_flags))
1408 if (sel->name == NULL)
1410 unsigned current_bit_count = 0;
1411 const struct processors * best_fit = NULL;
1413 /* Ideally we would like to issue an error message here
1414 saying that it was not possible to find a CPU compatible
1415 with the default CPU, but which also supports the command
1416 line options specified by the programmer, and so they
1417 ought to use the -mcpu=<name> command line option to
1418 override the default CPU type.
1420 If we cannot find a cpu that has both the
1421 characteristics of the default cpu and the given
1422 command line options we scan the array again looking
1423 for a best match. */
1424 for (sel = all_cores; sel->name != NULL; sel++)
1425 if ((sel->flags & sought) == sought)
1429 count = bit_count (sel->flags & insn_flags);
1431 if (count >= current_bit_count)
1434 current_bit_count = count;
1438 gcc_assert (best_fit);
1442 arm_selected_cpu = sel;
1446 gcc_assert (arm_selected_cpu);
1447 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1448 if (!arm_selected_tune)
1449 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1451 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1452 insn_flags = arm_selected_cpu->flags;
1454 arm_tune = arm_selected_tune->core;
1455 tune_flags = arm_selected_tune->flags;
1456 current_tune = arm_selected_tune->tune;
1458 if (target_fp16_format_name)
1460 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1462 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1464 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1468 if (i == ARRAY_SIZE (all_fp16_formats))
1469 error ("invalid __fp16 format option: -mfp16-format=%s",
1470 target_fp16_format_name);
1473 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1475 if (target_abi_name)
1477 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1479 if (streq (arm_all_abis[i].name, target_abi_name))
1481 arm_abi = arm_all_abis[i].abi_type;
1485 if (i == ARRAY_SIZE (arm_all_abis))
1486 error ("invalid ABI option: -mabi=%s", target_abi_name);
1489 arm_abi = ARM_DEFAULT_ABI;
1491 /* Make sure that the processor choice does not conflict with any of the
1492 other command line choices. */
1493 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1494 error ("target CPU does not support ARM mode");
1496 /* BPABI targets use linker tricks to allow interworking on cores
1497 without thumb support. */
1498 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1500 warning (0, "target CPU does not support interworking" );
1501 target_flags &= ~MASK_INTERWORK;
1504 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1506 warning (0, "target CPU does not support THUMB instructions");
1507 target_flags &= ~MASK_THUMB;
1510 if (TARGET_APCS_FRAME && TARGET_THUMB)
1512 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1513 target_flags &= ~MASK_APCS_FRAME;
1516 /* Callee super interworking implies thumb interworking. Adding
1517 this to the flags here simplifies the logic elsewhere. */
1518 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1519 target_flags |= MASK_INTERWORK;
1521 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1522 from here where no function is being compiled currently. */
1523 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1524 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1526 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1527 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1529 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1530 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1532 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1534 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1535 target_flags |= MASK_APCS_FRAME;
1538 if (TARGET_POKE_FUNCTION_NAME)
1539 target_flags |= MASK_APCS_FRAME;
1541 if (TARGET_APCS_REENT && flag_pic)
1542 error ("-fpic and -mapcs-reent are incompatible");
1544 if (TARGET_APCS_REENT)
1545 warning (0, "APCS reentrant code not supported. Ignored");
1547 /* If this target is normally configured to use APCS frames, warn if they
1548 are turned off and debugging is turned on. */
1550 && write_symbols != NO_DEBUG
1551 && !TARGET_APCS_FRAME
1552 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1553 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1555 if (TARGET_APCS_FLOAT)
1556 warning (0, "passing floating point arguments in fp regs not yet supported");
1558 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1559 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1560 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1561 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1562 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1563 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1564 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1565 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1566 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1567 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1568 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1569 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1570 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1572 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1573 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1574 thumb_code = (TARGET_ARM == 0);
1575 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1576 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1577 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1578 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1579 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1581 /* If we are not using the default (ARM mode) section anchor offset
1582 ranges, then set the correct ranges now. */
1585 /* Thumb-1 LDR instructions cannot have negative offsets.
1586 Permissible positive offset ranges are 5-bit (for byte loads),
1587 6-bit (for halfword loads), or 7-bit (for word loads).
1588 Empirical results suggest a 7-bit anchor range gives the best
1589 overall code size. */
1590 targetm.min_anchor_offset = 0;
1591 targetm.max_anchor_offset = 127;
1593 else if (TARGET_THUMB2)
1595 /* The minimum is set such that the total size of the block
1596 for a particular anchor is 248 + 1 + 4095 bytes, which is
1597 divisible by eight, ensuring natural spacing of anchors. */
1598 targetm.min_anchor_offset = -248;
1599 targetm.max_anchor_offset = 4095;
1602 /* V5 code we generate is completely interworking capable, so we turn off
1603 TARGET_INTERWORK here to avoid many tests later on. */
1605 /* XXX However, we must pass the right pre-processor defines to CPP
1606 or GLD can get confused. This is a hack. */
1607 if (TARGET_INTERWORK)
1608 arm_cpp_interwork = 1;
1611 target_flags &= ~MASK_INTERWORK;
1613 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1614 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1616 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1617 error ("iwmmxt abi requires an iwmmxt capable cpu");
1619 if (target_fpu_name == NULL && target_fpe_name != NULL)
1621 if (streq (target_fpe_name, "2"))
1622 target_fpu_name = "fpe2";
1623 else if (streq (target_fpe_name, "3"))
1624 target_fpu_name = "fpe3";
1626 error ("invalid floating point emulation option: -mfpe=%s",
1630 if (target_fpu_name == NULL)
1632 #ifdef FPUTYPE_DEFAULT
1633 target_fpu_name = FPUTYPE_DEFAULT;
1635 if (arm_arch_cirrus)
1636 target_fpu_name = "maverick";
1638 target_fpu_name = "fpe2";
1642 arm_fpu_desc = NULL;
1643 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1645 if (streq (all_fpus[i].name, target_fpu_name))
1647 arm_fpu_desc = &all_fpus[i];
1654 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1658 switch (arm_fpu_desc->model)
1660 case ARM_FP_MODEL_FPA:
1661 if (arm_fpu_desc->rev == 2)
1662 arm_fpu_attr = FPU_FPE2;
1663 else if (arm_fpu_desc->rev == 3)
1664 arm_fpu_attr = FPU_FPE3;
1666 arm_fpu_attr = FPU_FPA;
1669 case ARM_FP_MODEL_MAVERICK:
1670 arm_fpu_attr = FPU_MAVERICK;
1673 case ARM_FP_MODEL_VFP:
1674 arm_fpu_attr = FPU_VFP;
1681 if (target_float_abi_name != NULL)
1683 /* The user specified a FP ABI. */
1684 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1686 if (streq (all_float_abis[i].name, target_float_abi_name))
1688 arm_float_abi = all_float_abis[i].abi_type;
1692 if (i == ARRAY_SIZE (all_float_abis))
1693 error ("invalid floating point abi: -mfloat-abi=%s",
1694 target_float_abi_name);
1697 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1699 if (TARGET_AAPCS_BASED
1700 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1701 error ("FPA is unsupported in the AAPCS");
1703 if (TARGET_AAPCS_BASED)
1705 if (TARGET_CALLER_INTERWORKING)
1706 error ("AAPCS does not support -mcaller-super-interworking");
1708 if (TARGET_CALLEE_INTERWORKING)
1709 error ("AAPCS does not support -mcallee-super-interworking");
1712 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1713 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1714 will ever exist. GCC makes no attempt to support this combination. */
1715 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1716 sorry ("iWMMXt and hardware floating point");
1718 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1719 if (TARGET_THUMB2 && TARGET_IWMMXT)
1720 sorry ("Thumb-2 iWMMXt");
1722 /* __fp16 support currently assumes the core has ldrh. */
1723 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1724 sorry ("__fp16 and no ldrh");
1726 /* If soft-float is specified then don't use FPU. */
1727 if (TARGET_SOFT_FLOAT)
1728 arm_fpu_attr = FPU_NONE;
1730 if (TARGET_AAPCS_BASED)
1732 if (arm_abi == ARM_ABI_IWMMXT)
1733 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1734 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1735 && TARGET_HARD_FLOAT
1737 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1739 arm_pcs_default = ARM_PCS_AAPCS;
1743 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1744 sorry ("-mfloat-abi=hard and VFP");
1746 if (arm_abi == ARM_ABI_APCS)
1747 arm_pcs_default = ARM_PCS_APCS;
1749 arm_pcs_default = ARM_PCS_ATPCS;
1752 /* For arm2/3 there is no need to do any scheduling if there is only
1753 a floating point emulator, or we are doing software floating-point. */
1754 if ((TARGET_SOFT_FLOAT
1755 || (TARGET_FPA && arm_fpu_desc->rev))
1756 && (tune_flags & FL_MODE32) == 0)
1757 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1759 if (target_thread_switch)
1761 if (strcmp (target_thread_switch, "soft") == 0)
1762 target_thread_pointer = TP_SOFT;
1763 else if (strcmp (target_thread_switch, "auto") == 0)
1764 target_thread_pointer = TP_AUTO;
1765 else if (strcmp (target_thread_switch, "cp15") == 0)
1766 target_thread_pointer = TP_CP15;
1768 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1771 /* Use the cp15 method if it is available. */
1772 if (target_thread_pointer == TP_AUTO)
1774 if (arm_arch6k && !TARGET_THUMB1)
1775 target_thread_pointer = TP_CP15;
1777 target_thread_pointer = TP_SOFT;
1780 if (TARGET_HARD_TP && TARGET_THUMB1)
1781 error ("can not use -mtp=cp15 with 16-bit Thumb");
1783 /* Override the default structure alignment for AAPCS ABI. */
1784 if (TARGET_AAPCS_BASED)
1785 arm_structure_size_boundary = 8;
1787 if (structure_size_string != NULL)
1789 int size = strtol (structure_size_string, NULL, 0);
1791 if (size == 8 || size == 32
1792 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1793 arm_structure_size_boundary = size;
1795 warning (0, "structure size boundary can only be set to %s",
1796 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1799 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1801 error ("RTP PIC is incompatible with Thumb");
1805 /* If stack checking is disabled, we can use r10 as the PIC register,
1806 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1807 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1809 if (TARGET_VXWORKS_RTP)
1810 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1811 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1814 if (flag_pic && TARGET_VXWORKS_RTP)
1815 arm_pic_register = 9;
1817 if (arm_pic_register_string != NULL)
1819 int pic_register = decode_reg_name (arm_pic_register_string);
1822 warning (0, "-mpic-register= is useless without -fpic");
1824 /* Prevent the user from choosing an obviously stupid PIC register. */
1825 else if (pic_register < 0 || call_used_regs[pic_register]
1826 || pic_register == HARD_FRAME_POINTER_REGNUM
1827 || pic_register == STACK_POINTER_REGNUM
1828 || pic_register >= PC_REGNUM
1829 || (TARGET_VXWORKS_RTP
1830 && (unsigned int) pic_register != arm_pic_register))
1831 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1833 arm_pic_register = pic_register;
1836 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1837 if (fix_cm3_ldrd == 2)
1839 if (arm_selected_cpu->core == cortexm3)
1845 if (TARGET_THUMB1 && flag_schedule_insns)
1847 /* Don't warn since it's on by default in -O2. */
1848 flag_schedule_insns = 0;
1853 /* If optimizing for size, bump the number of instructions that we
1854 are prepared to conditionally execute (even on a StrongARM). */
1855 max_insns_skipped = 6;
1859 /* StrongARM has early execution of branches, so a sequence
1860 that is worth skipping is shorter. */
1861 if (arm_tune_strongarm)
1862 max_insns_skipped = 3;
1865 /* Hot/Cold partitioning is not currently supported, since we can't
1866 handle literal pool placement in that case. */
1867 if (flag_reorder_blocks_and_partition)
1869 inform (input_location,
1870 "-freorder-blocks-and-partition not supported on this architecture");
1871 flag_reorder_blocks_and_partition = 0;
1872 flag_reorder_blocks = 1;
1875 /* Register global variables with the garbage collector. */
1876 arm_add_gc_roots ();
1880 arm_add_gc_roots (void)
1882 gcc_obstack_init(&minipool_obstack);
1883 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1886 /* A table of known ARM exception types.
1887 For use with the interrupt function attribute. */
1891 const char *const arg;
1892 const unsigned long return_value;
1896 static const isr_attribute_arg isr_attribute_args [] =
1898 { "IRQ", ARM_FT_ISR },
1899 { "irq", ARM_FT_ISR },
1900 { "FIQ", ARM_FT_FIQ },
1901 { "fiq", ARM_FT_FIQ },
1902 { "ABORT", ARM_FT_ISR },
1903 { "abort", ARM_FT_ISR },
1904 { "ABORT", ARM_FT_ISR },
1905 { "abort", ARM_FT_ISR },
1906 { "UNDEF", ARM_FT_EXCEPTION },
1907 { "undef", ARM_FT_EXCEPTION },
1908 { "SWI", ARM_FT_EXCEPTION },
1909 { "swi", ARM_FT_EXCEPTION },
1910 { NULL, ARM_FT_NORMAL }
1913 /* Returns the (interrupt) function type of the current
1914 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1916 static unsigned long
1917 arm_isr_value (tree argument)
1919 const isr_attribute_arg * ptr;
1923 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1925 /* No argument - default to IRQ. */
1926 if (argument == NULL_TREE)
1929 /* Get the value of the argument. */
1930 if (TREE_VALUE (argument) == NULL_TREE
1931 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1932 return ARM_FT_UNKNOWN;
1934 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1936 /* Check it against the list of known arguments. */
1937 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1938 if (streq (arg, ptr->arg))
1939 return ptr->return_value;
1941 /* An unrecognized interrupt type. */
1942 return ARM_FT_UNKNOWN;
1945 /* Computes the type of the current function. */
1947 static unsigned long
1948 arm_compute_func_type (void)
1950 unsigned long type = ARM_FT_UNKNOWN;
1954 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1956 /* Decide if the current function is volatile. Such functions
1957 never return, and many memory cycles can be saved by not storing
1958 register values that will never be needed again. This optimization
1959 was added to speed up context switching in a kernel application. */
1961 && (TREE_NOTHROW (current_function_decl)
1962 || !(flag_unwind_tables
1963 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1964 && TREE_THIS_VOLATILE (current_function_decl))
1965 type |= ARM_FT_VOLATILE;
1967 if (cfun->static_chain_decl != NULL)
1968 type |= ARM_FT_NESTED;
1970 attr = DECL_ATTRIBUTES (current_function_decl);
1972 a = lookup_attribute ("naked", attr);
1974 type |= ARM_FT_NAKED;
1976 a = lookup_attribute ("isr", attr);
1978 a = lookup_attribute ("interrupt", attr);
1981 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1983 type |= arm_isr_value (TREE_VALUE (a));
1988 /* Returns the type of the current function. */
1991 arm_current_func_type (void)
1993 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1994 cfun->machine->func_type = arm_compute_func_type ();
1996 return cfun->machine->func_type;
2000 arm_allocate_stack_slots_for_args (void)
2002 /* Naked functions should not allocate stack slots for arguments. */
2003 return !IS_NAKED (arm_current_func_type ());
2007 /* Output assembler code for a block containing the constant parts
2008 of a trampoline, leaving space for the variable parts.
2010 On the ARM, (if r8 is the static chain regnum, and remembering that
2011 referencing pc adds an offset of 8) the trampoline looks like:
2014 .word static chain value
2015 .word function's address
2016 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2019 arm_asm_trampoline_template (FILE *f)
2023 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2024 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2026 else if (TARGET_THUMB2)
2028 /* The Thumb-2 trampoline is similar to the arm implementation.
2029 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2030 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2031 STATIC_CHAIN_REGNUM, PC_REGNUM);
2032 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2036 ASM_OUTPUT_ALIGN (f, 2);
2037 fprintf (f, "\t.code\t16\n");
2038 fprintf (f, ".Ltrampoline_start:\n");
2039 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2040 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2041 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2042 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2043 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2044 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2046 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2047 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2050 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2053 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2055 rtx fnaddr, mem, a_tramp;
2057 emit_block_move (m_tramp, assemble_trampoline_template (),
2058 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2060 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2061 emit_move_insn (mem, chain_value);
2063 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2064 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2065 emit_move_insn (mem, fnaddr);
2067 a_tramp = XEXP (m_tramp, 0);
2068 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2069 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2070 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2073 /* Thumb trampolines should be entered in thumb mode, so set
2074 the bottom bit of the address. */
2077 arm_trampoline_adjust_address (rtx addr)
2080 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2081 NULL, 0, OPTAB_LIB_WIDEN);
2085 /* Return 1 if it is possible to return using a single instruction.
2086 If SIBLING is non-null, this is a test for a return before a sibling
2087 call. SIBLING is the call insn, so we can examine its register usage. */
2090 use_return_insn (int iscond, rtx sibling)
2093 unsigned int func_type;
2094 unsigned long saved_int_regs;
2095 unsigned HOST_WIDE_INT stack_adjust;
2096 arm_stack_offsets *offsets;
2098 /* Never use a return instruction before reload has run. */
2099 if (!reload_completed)
2102 func_type = arm_current_func_type ();
2104 /* Naked, volatile and stack alignment functions need special
2106 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2109 /* So do interrupt functions that use the frame pointer and Thumb
2110 interrupt functions. */
2111 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2114 offsets = arm_get_frame_offsets ();
2115 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2117 /* As do variadic functions. */
2118 if (crtl->args.pretend_args_size
2119 || cfun->machine->uses_anonymous_args
2120 /* Or if the function calls __builtin_eh_return () */
2121 || crtl->calls_eh_return
2122 /* Or if the function calls alloca */
2123 || cfun->calls_alloca
2124 /* Or if there is a stack adjustment. However, if the stack pointer
2125 is saved on the stack, we can use a pre-incrementing stack load. */
2126 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2127 && stack_adjust == 4)))
2130 saved_int_regs = offsets->saved_regs_mask;
2132 /* Unfortunately, the insn
2134 ldmib sp, {..., sp, ...}
2136 triggers a bug on most SA-110 based devices, such that the stack
2137 pointer won't be correctly restored if the instruction takes a
2138 page fault. We work around this problem by popping r3 along with
2139 the other registers, since that is never slower than executing
2140 another instruction.
2142 We test for !arm_arch5 here, because code for any architecture
2143 less than this could potentially be run on one of the buggy
2145 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2147 /* Validate that r3 is a call-clobbered register (always true in
2148 the default abi) ... */
2149 if (!call_used_regs[3])
2152 /* ... that it isn't being used for a return value ... */
2153 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2156 /* ... or for a tail-call argument ... */
2159 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2161 if (find_regno_fusage (sibling, USE, 3))
2165 /* ... and that there are no call-saved registers in r0-r2
2166 (always true in the default ABI). */
2167 if (saved_int_regs & 0x7)
2171 /* Can't be done if interworking with Thumb, and any registers have been
2173 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2176 /* On StrongARM, conditional returns are expensive if they aren't
2177 taken and multiple registers have been stacked. */
2178 if (iscond && arm_tune_strongarm)
2180 /* Conditional return when just the LR is stored is a simple
2181 conditional-load instruction, that's not expensive. */
2182 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2186 && arm_pic_register != INVALID_REGNUM
2187 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2191 /* If there are saved registers but the LR isn't saved, then we need
2192 two instructions for the return. */
2193 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2196 /* Can't be done if any of the FPA regs are pushed,
2197 since this also requires an insn. */
2198 if (TARGET_HARD_FLOAT && TARGET_FPA)
2199 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2200 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2203 /* Likewise VFP regs. */
2204 if (TARGET_HARD_FLOAT && TARGET_VFP)
2205 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2206 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2209 if (TARGET_REALLY_IWMMXT)
2210 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2211 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2217 /* Return TRUE if int I is a valid immediate ARM constant. */
2220 const_ok_for_arm (HOST_WIDE_INT i)
2224 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2225 be all zero, or all one. */
2226 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2227 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2228 != ((~(unsigned HOST_WIDE_INT) 0)
2229 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2232 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2234 /* Fast return for 0 and small values. We must do this for zero, since
2235 the code below can't handle that one case. */
2236 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2239 /* Get the number of trailing zeros. */
2240 lowbit = ffs((int) i) - 1;
2242 /* Only even shifts are allowed in ARM mode so round down to the
2243 nearest even number. */
2247 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2252 /* Allow rotated constants in ARM mode. */
2254 && ((i & ~0xc000003f) == 0
2255 || (i & ~0xf000000f) == 0
2256 || (i & ~0xfc000003) == 0))
2263 /* Allow repeated pattern. */
2266 if (i == v || i == (v | (v << 8)))
2273 /* Return true if I is a valid constant for the operation CODE. */
2275 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2277 if (const_ok_for_arm (i))
2301 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2303 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2309 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2313 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2320 /* Emit a sequence of insns to handle a large constant.
2321 CODE is the code of the operation required, it can be any of SET, PLUS,
2322 IOR, AND, XOR, MINUS;
2323 MODE is the mode in which the operation is being performed;
2324 VAL is the integer to operate on;
2325 SOURCE is the other operand (a register, or a null-pointer for SET);
2326 SUBTARGETS means it is safe to create scratch registers if that will
2327 either produce a simpler sequence, or we will want to cse the values.
2328 Return value is the number of insns emitted. */
2330 /* ??? Tweak this for thumb2. */
2332 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2333 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2337 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2338 cond = COND_EXEC_TEST (PATTERN (insn));
2342 if (subtargets || code == SET
2343 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2344 && REGNO (target) != REGNO (source)))
2346 /* After arm_reorg has been called, we can't fix up expensive
2347 constants by pushing them into memory so we must synthesize
2348 them in-line, regardless of the cost. This is only likely to
2349 be more costly on chips that have load delay slots and we are
2350 compiling without running the scheduler (so no splitting
2351 occurred before the final instruction emission).
2353 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2355 if (!after_arm_reorg
2357 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2359 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2364 /* Currently SET is the only monadic value for CODE, all
2365 the rest are diadic. */
2366 if (TARGET_USE_MOVT)
2367 arm_emit_movpair (target, GEN_INT (val));
2369 emit_set_insn (target, GEN_INT (val));
2375 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2377 if (TARGET_USE_MOVT)
2378 arm_emit_movpair (temp, GEN_INT (val));
2380 emit_set_insn (temp, GEN_INT (val));
2382 /* For MINUS, the value is subtracted from, since we never
2383 have subtraction of a constant. */
2385 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2387 emit_set_insn (target,
2388 gen_rtx_fmt_ee (code, mode, source, temp));
2394 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2398 /* Return the number of instructions required to synthesize the given
2399 constant, if we start emitting them from bit-position I. */
2401 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2403 HOST_WIDE_INT temp1;
2404 int step_size = TARGET_ARM ? 2 : 1;
2407 gcc_assert (TARGET_ARM || i == 0);
2415 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2420 temp1 = remainder & ((0x0ff << end)
2421 | ((i < end) ? (0xff >> (32 - end)) : 0));
2422 remainder &= ~temp1;
2427 } while (remainder);
2432 find_best_start (unsigned HOST_WIDE_INT remainder)
2434 int best_consecutive_zeros = 0;
2438 /* If we aren't targetting ARM, the best place to start is always at
2443 for (i = 0; i < 32; i += 2)
2445 int consecutive_zeros = 0;
2447 if (!(remainder & (3 << i)))
2449 while ((i < 32) && !(remainder & (3 << i)))
2451 consecutive_zeros += 2;
2454 if (consecutive_zeros > best_consecutive_zeros)
2456 best_consecutive_zeros = consecutive_zeros;
2457 best_start = i - consecutive_zeros;
2463 /* So long as it won't require any more insns to do so, it's
2464 desirable to emit a small constant (in bits 0...9) in the last
2465 insn. This way there is more chance that it can be combined with
2466 a later addressing insn to form a pre-indexed load or store
2467 operation. Consider:
2469 *((volatile int *)0xe0000100) = 1;
2470 *((volatile int *)0xe0000110) = 2;
2472 We want this to wind up as:
2476 str rB, [rA, #0x100]
2478 str rB, [rA, #0x110]
2480 rather than having to synthesize both large constants from scratch.
2482 Therefore, we calculate how many insns would be required to emit
2483 the constant starting from `best_start', and also starting from
2484 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2485 yield a shorter sequence, we may as well use zero. */
2487 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2488 && (count_insns_for_constant (remainder, 0) <=
2489 count_insns_for_constant (remainder, best_start)))
2495 /* Emit an instruction with the indicated PATTERN. If COND is
2496 non-NULL, conditionalize the execution of the instruction on COND
2500 emit_constant_insn (rtx cond, rtx pattern)
2503 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2504 emit_insn (pattern);
2507 /* As above, but extra parameter GENERATE which, if clear, suppresses
2509 /* ??? This needs more work for thumb2. */
2512 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2513 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2518 int final_invert = 0;
2519 int can_negate_initial = 0;
2521 int num_bits_set = 0;
2522 int set_sign_bit_copies = 0;
2523 int clear_sign_bit_copies = 0;
2524 int clear_zero_bit_copies = 0;
2525 int set_zero_bit_copies = 0;
2527 unsigned HOST_WIDE_INT temp1, temp2;
2528 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2529 int step_size = TARGET_ARM ? 2 : 1;
2531 /* Find out which operations are safe for a given CODE. Also do a quick
2532 check for degenerate cases; these can occur when DImode operations
2543 can_negate_initial = 1;
2547 if (remainder == 0xffffffff)
2550 emit_constant_insn (cond,
2551 gen_rtx_SET (VOIDmode, target,
2552 GEN_INT (ARM_SIGN_EXTEND (val))));
2558 if (reload_completed && rtx_equal_p (target, source))
2562 emit_constant_insn (cond,
2563 gen_rtx_SET (VOIDmode, target, source));
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target, const0_rtx));
2579 if (remainder == 0xffffffff)
2581 if (reload_completed && rtx_equal_p (target, source))
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target, source));
2594 if (reload_completed && rtx_equal_p (target, source))
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, target, source));
2602 if (remainder == 0xffffffff)
2605 emit_constant_insn (cond,
2606 gen_rtx_SET (VOIDmode, target,
2607 gen_rtx_NOT (mode, source)));
2613 /* We treat MINUS as (val - source), since (source - val) is always
2614 passed as (source + (-val)). */
2618 emit_constant_insn (cond,
2619 gen_rtx_SET (VOIDmode, target,
2620 gen_rtx_NEG (mode, source)));
2623 if (const_ok_for_arm (val))
2626 emit_constant_insn (cond,
2627 gen_rtx_SET (VOIDmode, target,
2628 gen_rtx_MINUS (mode, GEN_INT (val),
2640 /* If we can do it in one insn get out quickly. */
2641 if (const_ok_for_arm (val)
2642 || (can_negate_initial && const_ok_for_arm (-val))
2643 || (can_invert && const_ok_for_arm (~val)))
2646 emit_constant_insn (cond,
2647 gen_rtx_SET (VOIDmode, target,
2649 ? gen_rtx_fmt_ee (code, mode, source,
2655 /* Calculate a few attributes that may be useful for specific
2657 /* Count number of leading zeros. */
2658 for (i = 31; i >= 0; i--)
2660 if ((remainder & (1 << i)) == 0)
2661 clear_sign_bit_copies++;
2666 /* Count number of leading 1's. */
2667 for (i = 31; i >= 0; i--)
2669 if ((remainder & (1 << i)) != 0)
2670 set_sign_bit_copies++;
2675 /* Count number of trailing zero's. */
2676 for (i = 0; i <= 31; i++)
2678 if ((remainder & (1 << i)) == 0)
2679 clear_zero_bit_copies++;
2684 /* Count number of trailing 1's. */
2685 for (i = 0; i <= 31; i++)
2687 if ((remainder & (1 << i)) != 0)
2688 set_zero_bit_copies++;
2696 /* See if we can use movw. */
2697 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2700 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2705 /* See if we can do this by sign_extending a constant that is known
2706 to be negative. This is a good, way of doing it, since the shift
2707 may well merge into a subsequent insn. */
2708 if (set_sign_bit_copies > 1)
2710 if (const_ok_for_arm
2711 (temp1 = ARM_SIGN_EXTEND (remainder
2712 << (set_sign_bit_copies - 1))))
2716 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2717 emit_constant_insn (cond,
2718 gen_rtx_SET (VOIDmode, new_src,
2720 emit_constant_insn (cond,
2721 gen_ashrsi3 (target, new_src,
2722 GEN_INT (set_sign_bit_copies - 1)));
2726 /* For an inverted constant, we will need to set the low bits,
2727 these will be shifted out of harm's way. */
2728 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2729 if (const_ok_for_arm (~temp1))
2733 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2734 emit_constant_insn (cond,
2735 gen_rtx_SET (VOIDmode, new_src,
2737 emit_constant_insn (cond,
2738 gen_ashrsi3 (target, new_src,
2739 GEN_INT (set_sign_bit_copies - 1)));
2745 /* See if we can calculate the value as the difference between two
2746 valid immediates. */
2747 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2749 int topshift = clear_sign_bit_copies & ~1;
2751 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2752 & (0xff000000 >> topshift));
2754 /* If temp1 is zero, then that means the 9 most significant
2755 bits of remainder were 1 and we've caused it to overflow.
2756 When topshift is 0 we don't need to do anything since we
2757 can borrow from 'bit 32'. */
2758 if (temp1 == 0 && topshift != 0)
2759 temp1 = 0x80000000 >> (topshift - 1);
2761 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2763 if (const_ok_for_arm (temp2))
2767 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2768 emit_constant_insn (cond,
2769 gen_rtx_SET (VOIDmode, new_src,
2771 emit_constant_insn (cond,
2772 gen_addsi3 (target, new_src,
2780 /* See if we can generate this by setting the bottom (or the top)
2781 16 bits, and then shifting these into the other half of the
2782 word. We only look for the simplest cases, to do more would cost
2783 too much. Be careful, however, not to generate this when the
2784 alternative would take fewer insns. */
2785 if (val & 0xffff0000)
2787 temp1 = remainder & 0xffff0000;
2788 temp2 = remainder & 0x0000ffff;
2790 /* Overlaps outside this range are best done using other methods. */
2791 for (i = 9; i < 24; i++)
2793 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2794 && !const_ok_for_arm (temp2))
2796 rtx new_src = (subtargets
2797 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2799 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2800 source, subtargets, generate);
2808 gen_rtx_ASHIFT (mode, source,
2815 /* Don't duplicate cases already considered. */
2816 for (i = 17; i < 24; i++)
2818 if (((temp1 | (temp1 >> i)) == remainder)
2819 && !const_ok_for_arm (temp1))
2821 rtx new_src = (subtargets
2822 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2824 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2825 source, subtargets, generate);
2830 gen_rtx_SET (VOIDmode, target,
2833 gen_rtx_LSHIFTRT (mode, source,
2844 /* If we have IOR or XOR, and the constant can be loaded in a
2845 single instruction, and we can find a temporary to put it in,
2846 then this can be done in two instructions instead of 3-4. */
2848 /* TARGET can't be NULL if SUBTARGETS is 0 */
2849 || (reload_completed && !reg_mentioned_p (target, source)))
2851 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2855 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2857 emit_constant_insn (cond,
2858 gen_rtx_SET (VOIDmode, sub,
2860 emit_constant_insn (cond,
2861 gen_rtx_SET (VOIDmode, target,
2862 gen_rtx_fmt_ee (code, mode,
2873 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2874 and the remainder 0s for e.g. 0xfff00000)
2875 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2877 This can be done in 2 instructions by using shifts with mov or mvn.
2882 mvn r0, r0, lsr #12 */
2883 if (set_sign_bit_copies > 8
2884 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2888 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2889 rtx shift = GEN_INT (set_sign_bit_copies);
2893 gen_rtx_SET (VOIDmode, sub,
2895 gen_rtx_ASHIFT (mode,
2900 gen_rtx_SET (VOIDmode, target,
2902 gen_rtx_LSHIFTRT (mode, sub,
2909 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2911 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2913 For eg. r0 = r0 | 0xfff
2918 if (set_zero_bit_copies > 8
2919 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2923 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2924 rtx shift = GEN_INT (set_zero_bit_copies);
2928 gen_rtx_SET (VOIDmode, sub,
2930 gen_rtx_LSHIFTRT (mode,
2935 gen_rtx_SET (VOIDmode, target,
2937 gen_rtx_ASHIFT (mode, sub,
2943 /* This will never be reached for Thumb2 because orn is a valid
2944 instruction. This is for Thumb1 and the ARM 32 bit cases.
2946 x = y | constant (such that ~constant is a valid constant)
2948 x = ~(~y & ~constant).
2950 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2954 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2955 emit_constant_insn (cond,
2956 gen_rtx_SET (VOIDmode, sub,
2957 gen_rtx_NOT (mode, source)));
2960 sub = gen_reg_rtx (mode);
2961 emit_constant_insn (cond,
2962 gen_rtx_SET (VOIDmode, sub,
2963 gen_rtx_AND (mode, source,
2965 emit_constant_insn (cond,
2966 gen_rtx_SET (VOIDmode, target,
2967 gen_rtx_NOT (mode, sub)));
2974 /* See if two shifts will do 2 or more insn's worth of work. */
2975 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2977 HOST_WIDE_INT shift_mask = ((0xffffffff
2978 << (32 - clear_sign_bit_copies))
2981 if ((remainder | shift_mask) != 0xffffffff)
2985 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2986 insns = arm_gen_constant (AND, mode, cond,
2987 remainder | shift_mask,
2988 new_src, source, subtargets, 1);
2993 rtx targ = subtargets ? NULL_RTX : target;
2994 insns = arm_gen_constant (AND, mode, cond,
2995 remainder | shift_mask,
2996 targ, source, subtargets, 0);
3002 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3003 rtx shift = GEN_INT (clear_sign_bit_copies);
3005 emit_insn (gen_ashlsi3 (new_src, source, shift));
3006 emit_insn (gen_lshrsi3 (target, new_src, shift));
3012 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3014 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3016 if ((remainder | shift_mask) != 0xffffffff)
3020 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 insns = arm_gen_constant (AND, mode, cond,
3023 remainder | shift_mask,
3024 new_src, source, subtargets, 1);
3029 rtx targ = subtargets ? NULL_RTX : target;
3031 insns = arm_gen_constant (AND, mode, cond,
3032 remainder | shift_mask,
3033 targ, source, subtargets, 0);
3039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3040 rtx shift = GEN_INT (clear_zero_bit_copies);
3042 emit_insn (gen_lshrsi3 (new_src, source, shift));
3043 emit_insn (gen_ashlsi3 (target, new_src, shift));
3055 for (i = 0; i < 32; i++)
3056 if (remainder & (1 << i))
3060 || (code != IOR && can_invert && num_bits_set > 16))
3061 remainder ^= 0xffffffff;
3062 else if (code == PLUS && num_bits_set > 16)
3063 remainder = (-remainder) & 0xffffffff;
3065 /* For XOR, if more than half the bits are set and there's a sequence
3066 of more than 8 consecutive ones in the pattern then we can XOR by the
3067 inverted constant and then invert the final result; this may save an
3068 instruction and might also lead to the final mvn being merged with
3069 some other operation. */
3070 else if (code == XOR && num_bits_set > 16
3071 && (count_insns_for_constant (remainder ^ 0xffffffff,
3073 (remainder ^ 0xffffffff))
3074 < count_insns_for_constant (remainder,
3075 find_best_start (remainder))))
3077 remainder ^= 0xffffffff;
3086 /* Now try and find a way of doing the job in either two or three
3088 We start by looking for the largest block of zeros that are aligned on
3089 a 2-bit boundary, we then fill up the temps, wrapping around to the
3090 top of the word when we drop off the bottom.
3091 In the worst case this code should produce no more than four insns.
3092 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3093 best place to start. */
3095 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3098 /* Now start emitting the insns. */
3099 i = find_best_start (remainder);
3106 if (remainder & (3 << (i - 2)))
3111 temp1 = remainder & ((0x0ff << end)
3112 | ((i < end) ? (0xff >> (32 - end)) : 0));
3113 remainder &= ~temp1;
3117 rtx new_src, temp1_rtx;
3119 if (code == SET || code == MINUS)
3121 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3122 if (can_invert && code != MINUS)
3127 if ((final_invert || remainder) && subtargets)
3128 new_src = gen_reg_rtx (mode);
3133 else if (can_negate)
3137 temp1 = trunc_int_for_mode (temp1, mode);
3138 temp1_rtx = GEN_INT (temp1);
3142 else if (code == MINUS)
3143 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3145 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, new_src,
3158 else if (code == MINUS)
3164 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3174 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3175 gen_rtx_NOT (mode, source)));
3182 /* Canonicalize a comparison so that we are more likely to recognize it.
3183 This can be done for a few constant compares, where we can make the
3184 immediate value easier to load. */
3187 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3189 enum machine_mode mode;
3190 unsigned HOST_WIDE_INT i, maxval;
3192 mode = GET_MODE (*op0);
3193 if (mode == VOIDmode)
3194 mode = GET_MODE (*op1);
3196 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3198 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3199 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3200 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3201 for GTU/LEU in Thumb mode. */
3206 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3208 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3211 if (code == GT || code == LE
3212 || (!TARGET_ARM && (code == GTU || code == LEU)))
3214 /* Missing comparison. First try to use an available
3216 if (GET_CODE (*op1) == CONST_INT)
3224 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3226 *op1 = GEN_INT (i + 1);
3227 return code == GT ? GE : LT;
3232 if (i != ~((unsigned HOST_WIDE_INT) 0)
3233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3235 *op1 = GEN_INT (i + 1);
3236 return code == GTU ? GEU : LTU;
3244 /* If that did not work, reverse the condition. */
3248 return swap_condition (code);
3254 /* Comparisons smaller than DImode. Only adjust comparisons against
3255 an out-of-range constant. */
3256 if (GET_CODE (*op1) != CONST_INT
3257 || const_ok_for_arm (INTVAL (*op1))
3258 || const_ok_for_arm (- INTVAL (*op1)))
3272 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3274 *op1 = GEN_INT (i + 1);
3275 return code == GT ? GE : LT;
3282 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3284 *op1 = GEN_INT (i - 1);
3285 return code == GE ? GT : LE;
3291 if (i != ~((unsigned HOST_WIDE_INT) 0)
3292 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3294 *op1 = GEN_INT (i + 1);
3295 return code == GTU ? GEU : LTU;
3302 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3304 *op1 = GEN_INT (i - 1);
3305 return code == GEU ? GTU : LEU;
3317 /* Define how to find the value returned by a function. */
3320 arm_function_value(const_tree type, const_tree func,
3321 bool outgoing ATTRIBUTE_UNUSED)
3323 enum machine_mode mode;
3324 int unsignedp ATTRIBUTE_UNUSED;
3325 rtx r ATTRIBUTE_UNUSED;
3327 mode = TYPE_MODE (type);
3329 if (TARGET_AAPCS_BASED)
3330 return aapcs_allocate_return_reg (mode, type, func);
3332 /* Promote integer types. */
3333 if (INTEGRAL_TYPE_P (type))
3334 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3336 /* Promotes small structs returned in a register to full-word size
3337 for big-endian AAPCS. */
3338 if (arm_return_in_msb (type))
3340 HOST_WIDE_INT size = int_size_in_bytes (type);
3341 if (size % UNITS_PER_WORD != 0)
3343 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3344 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3348 return LIBCALL_VALUE (mode);
3352 libcall_eq (const void *p1, const void *p2)
3354 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3358 libcall_hash (const void *p1)
3360 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3364 add_libcall (htab_t htab, rtx libcall)
3366 *htab_find_slot (htab, libcall, INSERT) = libcall;
3370 arm_libcall_uses_aapcs_base (const_rtx libcall)
3372 static bool init_done = false;
3373 static htab_t libcall_htab;
3379 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3381 add_libcall (libcall_htab,
3382 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3383 add_libcall (libcall_htab,
3384 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3385 add_libcall (libcall_htab,
3386 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3387 add_libcall (libcall_htab,
3388 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3390 add_libcall (libcall_htab,
3391 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3392 add_libcall (libcall_htab,
3393 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3394 add_libcall (libcall_htab,
3395 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3396 add_libcall (libcall_htab,
3397 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3399 add_libcall (libcall_htab,
3400 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3401 add_libcall (libcall_htab,
3402 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3403 add_libcall (libcall_htab,
3404 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3405 add_libcall (libcall_htab,
3406 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3407 add_libcall (libcall_htab,
3408 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3409 add_libcall (libcall_htab,
3410 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3413 return libcall && htab_find (libcall_htab, libcall) != NULL;
3417 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3419 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3420 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3422 /* The following libcalls return their result in integer registers,
3423 even though they return a floating point value. */
3424 if (arm_libcall_uses_aapcs_base (libcall))
3425 return gen_rtx_REG (mode, ARG_REGISTER(1));
3429 return LIBCALL_VALUE (mode);
3432 /* Determine the amount of memory needed to store the possible return
3433 registers of an untyped call. */
3435 arm_apply_result_size (void)
3441 if (TARGET_HARD_FLOAT_ABI)
3447 if (TARGET_MAVERICK)
3450 if (TARGET_IWMMXT_ABI)
3457 /* Decide whether TYPE should be returned in memory (true)
3458 or in a register (false). FNTYPE is the type of the function making
3461 arm_return_in_memory (const_tree type, const_tree fntype)
3465 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3467 if (TARGET_AAPCS_BASED)
3469 /* Simple, non-aggregate types (ie not including vectors and
3470 complex) are always returned in a register (or registers).
3471 We don't care about which register here, so we can short-cut
3472 some of the detail. */
3473 if (!AGGREGATE_TYPE_P (type)
3474 && TREE_CODE (type) != VECTOR_TYPE
3475 && TREE_CODE (type) != COMPLEX_TYPE)
3478 /* Any return value that is no larger than one word can be
3480 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3483 /* Check any available co-processors to see if they accept the
3484 type as a register candidate (VFP, for example, can return
3485 some aggregates in consecutive registers). These aren't
3486 available if the call is variadic. */
3487 if (aapcs_select_return_coproc (type, fntype) >= 0)
3490 /* Vector values should be returned using ARM registers, not
3491 memory (unless they're over 16 bytes, which will break since
3492 we only have four call-clobbered registers to play with). */
3493 if (TREE_CODE (type) == VECTOR_TYPE)
3494 return (size < 0 || size > (4 * UNITS_PER_WORD));
3496 /* The rest go in memory. */
3500 if (TREE_CODE (type) == VECTOR_TYPE)
3501 return (size < 0 || size > (4 * UNITS_PER_WORD));
3503 if (!AGGREGATE_TYPE_P (type) &&
3504 (TREE_CODE (type) != VECTOR_TYPE))
3505 /* All simple types are returned in registers. */
3508 if (arm_abi != ARM_ABI_APCS)
3510 /* ATPCS and later return aggregate types in memory only if they are
3511 larger than a word (or are variable size). */
3512 return (size < 0 || size > UNITS_PER_WORD);
3515 /* For the arm-wince targets we choose to be compatible with Microsoft's
3516 ARM and Thumb compilers, which always return aggregates in memory. */
3518 /* All structures/unions bigger than one word are returned in memory.
3519 Also catch the case where int_size_in_bytes returns -1. In this case
3520 the aggregate is either huge or of variable size, and in either case
3521 we will want to return it via memory and not in a register. */
3522 if (size < 0 || size > UNITS_PER_WORD)
3525 if (TREE_CODE (type) == RECORD_TYPE)
3529 /* For a struct the APCS says that we only return in a register
3530 if the type is 'integer like' and every addressable element
3531 has an offset of zero. For practical purposes this means
3532 that the structure can have at most one non bit-field element
3533 and that this element must be the first one in the structure. */
3535 /* Find the first field, ignoring non FIELD_DECL things which will
3536 have been created by C++. */
3537 for (field = TYPE_FIELDS (type);
3538 field && TREE_CODE (field) != FIELD_DECL;
3539 field = DECL_CHAIN (field))
3543 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3545 /* Check that the first field is valid for returning in a register. */
3547 /* ... Floats are not allowed */
3548 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3551 /* ... Aggregates that are not themselves valid for returning in
3552 a register are not allowed. */
3553 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3556 /* Now check the remaining fields, if any. Only bitfields are allowed,
3557 since they are not addressable. */
3558 for (field = DECL_CHAIN (field);
3560 field = DECL_CHAIN (field))
3562 if (TREE_CODE (field) != FIELD_DECL)
3565 if (!DECL_BIT_FIELD_TYPE (field))
3572 if (TREE_CODE (type) == UNION_TYPE)
3576 /* Unions can be returned in registers if every element is
3577 integral, or can be returned in an integer register. */
3578 for (field = TYPE_FIELDS (type);
3580 field = DECL_CHAIN (field))
3582 if (TREE_CODE (field) != FIELD_DECL)
3585 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3588 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3594 #endif /* not ARM_WINCE */
3596 /* Return all other types in memory. */
3600 /* Indicate whether or not words of a double are in big-endian order. */
3603 arm_float_words_big_endian (void)
3605 if (TARGET_MAVERICK)
3608 /* For FPA, float words are always big-endian. For VFP, floats words
3609 follow the memory system mode. */
3617 return (TARGET_BIG_END ? 1 : 0);
3622 const struct pcs_attribute_arg
3626 } pcs_attribute_args[] =
3628 {"aapcs", ARM_PCS_AAPCS},
3629 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3631 /* We could recognize these, but changes would be needed elsewhere
3632 * to implement them. */
3633 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3634 {"atpcs", ARM_PCS_ATPCS},
3635 {"apcs", ARM_PCS_APCS},
3637 {NULL, ARM_PCS_UNKNOWN}
3641 arm_pcs_from_attribute (tree attr)
3643 const struct pcs_attribute_arg *ptr;
3646 /* Get the value of the argument. */
3647 if (TREE_VALUE (attr) == NULL_TREE
3648 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3649 return ARM_PCS_UNKNOWN;
3651 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3653 /* Check it against the list of known arguments. */
3654 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3655 if (streq (arg, ptr->arg))
3658 /* An unrecognized interrupt type. */
3659 return ARM_PCS_UNKNOWN;
3662 /* Get the PCS variant to use for this call. TYPE is the function's type
3663 specification, DECL is the specific declartion. DECL may be null if
3664 the call could be indirect or if this is a library call. */
3666 arm_get_pcs_model (const_tree type, const_tree decl)
3668 bool user_convention = false;
3669 enum arm_pcs user_pcs = arm_pcs_default;
3674 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3677 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3678 user_convention = true;
3681 if (TARGET_AAPCS_BASED)
3683 /* Detect varargs functions. These always use the base rules
3684 (no argument is ever a candidate for a co-processor
3686 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3687 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3688 != void_type_node));
3690 if (user_convention)
3692 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3693 sorry ("Non-AAPCS derived PCS variant");
3694 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3695 error ("Variadic functions must use the base AAPCS variant");
3699 return ARM_PCS_AAPCS;
3700 else if (user_convention)
3702 else if (decl && flag_unit_at_a_time)
3704 /* Local functions never leak outside this compilation unit,
3705 so we are free to use whatever conventions are
3707 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3708 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3710 return ARM_PCS_AAPCS_LOCAL;
3713 else if (user_convention && user_pcs != arm_pcs_default)
3714 sorry ("PCS variant");
3716 /* For everything else we use the target's default. */
3717 return arm_pcs_default;
3722 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3723 const_tree fntype ATTRIBUTE_UNUSED,
3724 rtx libcall ATTRIBUTE_UNUSED,
3725 const_tree fndecl ATTRIBUTE_UNUSED)
3727 /* Record the unallocated VFP registers. */
3728 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3729 pcum->aapcs_vfp_reg_alloc = 0;
3732 /* Walk down the type tree of TYPE counting consecutive base elements.
3733 If *MODEP is VOIDmode, then set it to the first valid floating point
3734 type. If a non-floating point type is found, or if a floating point
3735 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3736 otherwise return the count in the sub-tree. */
3738 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3740 enum machine_mode mode;
3743 switch (TREE_CODE (type))
3746 mode = TYPE_MODE (type);
3747 if (mode != DFmode && mode != SFmode)
3750 if (*modep == VOIDmode)
3759 mode = TYPE_MODE (TREE_TYPE (type));
3760 if (mode != DFmode && mode != SFmode)
3763 if (*modep == VOIDmode)
3772 /* Use V2SImode and V4SImode as representatives of all 64-bit
3773 and 128-bit vector types, whether or not those modes are
3774 supported with the present options. */
3775 size = int_size_in_bytes (type);
3788 if (*modep == VOIDmode)
3791 /* Vector modes are considered to be opaque: two vectors are
3792 equivalent for the purposes of being homogeneous aggregates
3793 if they are the same size. */
3802 tree index = TYPE_DOMAIN (type);
3804 /* Can't handle incomplete types. */
3805 if (!COMPLETE_TYPE_P(type))
3808 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3811 || !TYPE_MAX_VALUE (index)
3812 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3813 || !TYPE_MIN_VALUE (index)
3814 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3818 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3819 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3821 /* There must be no padding. */
3822 if (!host_integerp (TYPE_SIZE (type), 1)
3823 || (tree_low_cst (TYPE_SIZE (type), 1)
3824 != count * GET_MODE_BITSIZE (*modep)))
3836 /* Can't handle incomplete types. */
3837 if (!COMPLETE_TYPE_P(type))
3840 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3842 if (TREE_CODE (field) != FIELD_DECL)
3845 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3851 /* There must be no padding. */
3852 if (!host_integerp (TYPE_SIZE (type), 1)
3853 || (tree_low_cst (TYPE_SIZE (type), 1)
3854 != count * GET_MODE_BITSIZE (*modep)))
3861 case QUAL_UNION_TYPE:
3863 /* These aren't very interesting except in a degenerate case. */
3868 /* Can't handle incomplete types. */
3869 if (!COMPLETE_TYPE_P(type))
3872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3874 if (TREE_CODE (field) != FIELD_DECL)
3877 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3880 count = count > sub_count ? count : sub_count;
3883 /* There must be no padding. */
3884 if (!host_integerp (TYPE_SIZE (type), 1)
3885 || (tree_low_cst (TYPE_SIZE (type), 1)
3886 != count * GET_MODE_BITSIZE (*modep)))
3899 /* Return true if PCS_VARIANT should use VFP registers. */
3901 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3903 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3905 static bool seen_thumb1_vfp = false;
3907 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3909 sorry ("Thumb-1 hard-float VFP ABI");
3910 /* sorry() is not immediately fatal, so only display this once. */
3911 seen_thumb1_vfp = true;
3917 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3920 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3921 (TARGET_VFP_DOUBLE || !is_double));
3925 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3926 enum machine_mode mode, const_tree type,
3927 enum machine_mode *base_mode, int *count)
3929 enum machine_mode new_mode = VOIDmode;
3931 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3932 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3933 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3938 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3941 new_mode = (mode == DCmode ? DFmode : SFmode);
3943 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3945 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3947 if (ag_count > 0 && ag_count <= 4)
3956 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3959 *base_mode = new_mode;
3964 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3965 enum machine_mode mode, const_tree type)
3967 int count ATTRIBUTE_UNUSED;
3968 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3970 if (!use_vfp_abi (pcs_variant, false))
3972 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3977 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3980 if (!use_vfp_abi (pcum->pcs_variant, false))
3983 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3984 &pcum->aapcs_vfp_rmode,
3985 &pcum->aapcs_vfp_rcount);
3989 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3990 const_tree type ATTRIBUTE_UNUSED)
3992 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3993 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3996 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3997 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3999 pcum->aapcs_vfp_reg_alloc = mask << regno;
4000 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4003 int rcount = pcum->aapcs_vfp_rcount;
4005 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4009 /* Avoid using unsupported vector modes. */
4010 if (rmode == V2SImode)
4012 else if (rmode == V4SImode)
4019 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4020 for (i = 0; i < rcount; i++)
4022 rtx tmp = gen_rtx_REG (rmode,
4023 FIRST_VFP_REGNUM + regno + i * rshift);
4024 tmp = gen_rtx_EXPR_LIST
4026 GEN_INT (i * GET_MODE_SIZE (rmode)));
4027 XVECEXP (par, 0, i) = tmp;
4030 pcum->aapcs_reg = par;
4033 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4040 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4041 enum machine_mode mode,
4042 const_tree type ATTRIBUTE_UNUSED)
4044 if (!use_vfp_abi (pcs_variant, false))
4047 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4050 enum machine_mode ag_mode;
4055 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4060 if (ag_mode == V2SImode)
4062 else if (ag_mode == V4SImode)
4068 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4069 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4070 for (i = 0; i < count; i++)
4072 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4073 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4074 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4075 XVECEXP (par, 0, i) = tmp;
4081 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4085 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4086 enum machine_mode mode ATTRIBUTE_UNUSED,
4087 const_tree type ATTRIBUTE_UNUSED)
4089 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4090 pcum->aapcs_vfp_reg_alloc = 0;
4094 #define AAPCS_CP(X) \
4096 aapcs_ ## X ## _cum_init, \
4097 aapcs_ ## X ## _is_call_candidate, \
4098 aapcs_ ## X ## _allocate, \
4099 aapcs_ ## X ## _is_return_candidate, \
4100 aapcs_ ## X ## _allocate_return_reg, \
4101 aapcs_ ## X ## _advance \
4104 /* Table of co-processors that can be used to pass arguments in
4105 registers. Idealy no arugment should be a candidate for more than
4106 one co-processor table entry, but the table is processed in order
4107 and stops after the first match. If that entry then fails to put
4108 the argument into a co-processor register, the argument will go on
4112 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4113 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4115 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4116 BLKmode) is a candidate for this co-processor's registers; this
4117 function should ignore any position-dependent state in
4118 CUMULATIVE_ARGS and only use call-type dependent information. */
4119 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4121 /* Return true if the argument does get a co-processor register; it
4122 should set aapcs_reg to an RTX of the register allocated as is
4123 required for a return from FUNCTION_ARG. */
4124 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4126 /* Return true if a result of mode MODE (or type TYPE if MODE is
4127 BLKmode) is can be returned in this co-processor's registers. */
4128 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4130 /* Allocate and return an RTX element to hold the return type of a
4131 call, this routine must not fail and will only be called if
4132 is_return_candidate returned true with the same parameters. */
4133 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4135 /* Finish processing this argument and prepare to start processing
4137 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4138 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4146 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4151 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4152 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4159 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4161 /* We aren't passed a decl, so we can't check that a call is local.
4162 However, it isn't clear that that would be a win anyway, since it
4163 might limit some tail-calling opportunities. */
4164 enum arm_pcs pcs_variant;
4168 const_tree fndecl = NULL_TREE;
4170 if (TREE_CODE (fntype) == FUNCTION_DECL)
4173 fntype = TREE_TYPE (fntype);
4176 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4179 pcs_variant = arm_pcs_default;
4181 if (pcs_variant != ARM_PCS_AAPCS)
4185 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4186 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4195 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4198 /* We aren't passed a decl, so we can't check that a call is local.
4199 However, it isn't clear that that would be a win anyway, since it
4200 might limit some tail-calling opportunities. */
4201 enum arm_pcs pcs_variant;
4202 int unsignedp ATTRIBUTE_UNUSED;
4206 const_tree fndecl = NULL_TREE;
4208 if (TREE_CODE (fntype) == FUNCTION_DECL)
4211 fntype = TREE_TYPE (fntype);
4214 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4217 pcs_variant = arm_pcs_default;
4219 /* Promote integer types. */
4220 if (type && INTEGRAL_TYPE_P (type))
4221 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4223 if (pcs_variant != ARM_PCS_AAPCS)
4227 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4228 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4230 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4234 /* Promotes small structs returned in a register to full-word size
4235 for big-endian AAPCS. */
4236 if (type && arm_return_in_msb (type))
4238 HOST_WIDE_INT size = int_size_in_bytes (type);
4239 if (size % UNITS_PER_WORD != 0)
4241 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4242 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4246 return gen_rtx_REG (mode, R0_REGNUM);
4250 aapcs_libcall_value (enum machine_mode mode)
4252 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4255 /* Lay out a function argument using the AAPCS rules. The rule
4256 numbers referred to here are those in the AAPCS. */
4258 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4259 tree type, int named)
4264 /* We only need to do this once per argument. */
4265 if (pcum->aapcs_arg_processed)
4268 pcum->aapcs_arg_processed = true;
4270 /* Special case: if named is false then we are handling an incoming
4271 anonymous argument which is on the stack. */
4275 /* Is this a potential co-processor register candidate? */
4276 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4278 int slot = aapcs_select_call_coproc (pcum, mode, type);
4279 pcum->aapcs_cprc_slot = slot;
4281 /* We don't have to apply any of the rules from part B of the
4282 preparation phase, these are handled elsewhere in the
4287 /* A Co-processor register candidate goes either in its own
4288 class of registers or on the stack. */
4289 if (!pcum->aapcs_cprc_failed[slot])
4291 /* C1.cp - Try to allocate the argument to co-processor
4293 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4296 /* C2.cp - Put the argument on the stack and note that we
4297 can't assign any more candidates in this slot. We also
4298 need to note that we have allocated stack space, so that
4299 we won't later try to split a non-cprc candidate between
4300 core registers and the stack. */
4301 pcum->aapcs_cprc_failed[slot] = true;
4302 pcum->can_split = false;
4305 /* We didn't get a register, so this argument goes on the
4307 gcc_assert (pcum->can_split == false);
4312 /* C3 - For double-word aligned arguments, round the NCRN up to the
4313 next even number. */
4314 ncrn = pcum->aapcs_ncrn;
4315 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4318 nregs = ARM_NUM_REGS2(mode, type);
4320 /* Sigh, this test should really assert that nregs > 0, but a GCC
4321 extension allows empty structs and then gives them empty size; it
4322 then allows such a structure to be passed by value. For some of
4323 the code below we have to pretend that such an argument has
4324 non-zero size so that we 'locate' it correctly either in
4325 registers or on the stack. */
4326 gcc_assert (nregs >= 0);
4328 nregs2 = nregs ? nregs : 1;
4330 /* C4 - Argument fits entirely in core registers. */
4331 if (ncrn + nregs2 <= NUM_ARG_REGS)
4333 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4334 pcum->aapcs_next_ncrn = ncrn + nregs;
4338 /* C5 - Some core registers left and there are no arguments already
4339 on the stack: split this argument between the remaining core
4340 registers and the stack. */
4341 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4343 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4344 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4345 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4349 /* C6 - NCRN is set to 4. */
4350 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4352 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4356 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4357 for a call to a function whose data type is FNTYPE.
4358 For a library call, FNTYPE is NULL. */
4360 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4362 tree fndecl ATTRIBUTE_UNUSED)
4364 /* Long call handling. */
4366 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4368 pcum->pcs_variant = arm_pcs_default;
4370 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4372 if (arm_libcall_uses_aapcs_base (libname))
4373 pcum->pcs_variant = ARM_PCS_AAPCS;
4375 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4376 pcum->aapcs_reg = NULL_RTX;
4377 pcum->aapcs_partial = 0;
4378 pcum->aapcs_arg_processed = false;
4379 pcum->aapcs_cprc_slot = -1;
4380 pcum->can_split = true;
4382 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4386 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4388 pcum->aapcs_cprc_failed[i] = false;
4389 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4397 /* On the ARM, the offset starts at 0. */
4399 pcum->iwmmxt_nregs = 0;
4400 pcum->can_split = true;
4402 /* Varargs vectors are treated the same as long long.
4403 named_count avoids having to change the way arm handles 'named' */
4404 pcum->named_count = 0;
4407 if (TARGET_REALLY_IWMMXT && fntype)
4411 for (fn_arg = TYPE_ARG_TYPES (fntype);
4413 fn_arg = TREE_CHAIN (fn_arg))
4414 pcum->named_count += 1;
4416 if (! pcum->named_count)
4417 pcum->named_count = INT_MAX;
4422 /* Return true if mode/type need doubleword alignment. */
4424 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4426 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4427 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4431 /* Determine where to put an argument to a function.
4432 Value is zero to push the argument on the stack,
4433 or a hard register in which to store the argument.
4435 MODE is the argument's machine mode.
4436 TYPE is the data type of the argument (as a tree).
4437 This is null for libcalls where that information may
4439 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4440 the preceding args and about the function being called.
4441 NAMED is nonzero if this argument is a named parameter
4442 (otherwise it is an extra parameter matching an ellipsis). */
4445 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4446 tree type, int named)
4450 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4451 a call insn (op3 of a call_value insn). */
4452 if (mode == VOIDmode)
4455 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4457 aapcs_layout_arg (pcum, mode, type, named);
4458 return pcum->aapcs_reg;
4461 /* Varargs vectors are treated the same as long long.
4462 named_count avoids having to change the way arm handles 'named' */
4463 if (TARGET_IWMMXT_ABI
4464 && arm_vector_mode_supported_p (mode)
4465 && pcum->named_count > pcum->nargs + 1)
4467 if (pcum->iwmmxt_nregs <= 9)
4468 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4471 pcum->can_split = false;
4476 /* Put doubleword aligned quantities in even register pairs. */
4478 && ARM_DOUBLEWORD_ALIGN
4479 && arm_needs_doubleword_align (mode, type))
4482 if (mode == VOIDmode)
4483 /* Pick an arbitrary value for operand 2 of the call insn. */
4486 /* Only allow splitting an arg between regs and memory if all preceding
4487 args were allocated to regs. For args passed by reference we only count
4488 the reference pointer. */
4489 if (pcum->can_split)
4492 nregs = ARM_NUM_REGS2 (mode, type);
4494 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4497 return gen_rtx_REG (mode, pcum->nregs);
4501 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4502 tree type, bool named)
4504 int nregs = pcum->nregs;
4506 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4508 aapcs_layout_arg (pcum, mode, type, named);
4509 return pcum->aapcs_partial;
4512 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4515 if (NUM_ARG_REGS > nregs
4516 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4518 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4524 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4525 tree type, bool named)
4527 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4529 aapcs_layout_arg (pcum, mode, type, named);
4531 if (pcum->aapcs_cprc_slot >= 0)
4533 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4535 pcum->aapcs_cprc_slot = -1;
4538 /* Generic stuff. */
4539 pcum->aapcs_arg_processed = false;
4540 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4541 pcum->aapcs_reg = NULL_RTX;
4542 pcum->aapcs_partial = 0;
4547 if (arm_vector_mode_supported_p (mode)
4548 && pcum->named_count > pcum->nargs
4549 && TARGET_IWMMXT_ABI)
4550 pcum->iwmmxt_nregs += 1;
4552 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4556 /* Variable sized types are passed by reference. This is a GCC
4557 extension to the ARM ABI. */
4560 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4561 enum machine_mode mode ATTRIBUTE_UNUSED,
4562 const_tree type, bool named ATTRIBUTE_UNUSED)
4564 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4567 /* Encode the current state of the #pragma [no_]long_calls. */
4570 OFF, /* No #pragma [no_]long_calls is in effect. */
4571 LONG, /* #pragma long_calls is in effect. */
4572 SHORT /* #pragma no_long_calls is in effect. */
4575 static arm_pragma_enum arm_pragma_long_calls = OFF;
4578 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4580 arm_pragma_long_calls = LONG;
4584 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4586 arm_pragma_long_calls = SHORT;
4590 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4592 arm_pragma_long_calls = OFF;
4595 /* Handle an attribute requiring a FUNCTION_DECL;
4596 arguments as in struct attribute_spec.handler. */
4598 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4599 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4601 if (TREE_CODE (*node) != FUNCTION_DECL)
4603 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4605 *no_add_attrs = true;
4611 /* Handle an "interrupt" or "isr" attribute;
4612 arguments as in struct attribute_spec.handler. */
4614 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4619 if (TREE_CODE (*node) != FUNCTION_DECL)
4621 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4623 *no_add_attrs = true;
4625 /* FIXME: the argument if any is checked for type attributes;
4626 should it be checked for decl ones? */
4630 if (TREE_CODE (*node) == FUNCTION_TYPE
4631 || TREE_CODE (*node) == METHOD_TYPE)
4633 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4635 warning (OPT_Wattributes, "%qE attribute ignored",
4637 *no_add_attrs = true;
4640 else if (TREE_CODE (*node) == POINTER_TYPE
4641 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4642 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4643 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4645 *node = build_variant_type_copy (*node);
4646 TREE_TYPE (*node) = build_type_attribute_variant
4648 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4649 *no_add_attrs = true;
4653 /* Possibly pass this attribute on from the type to a decl. */
4654 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4655 | (int) ATTR_FLAG_FUNCTION_NEXT
4656 | (int) ATTR_FLAG_ARRAY_NEXT))
4658 *no_add_attrs = true;
4659 return tree_cons (name, args, NULL_TREE);
4663 warning (OPT_Wattributes, "%qE attribute ignored",
4672 /* Handle a "pcs" attribute; arguments as in struct
4673 attribute_spec.handler. */
4675 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4678 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4680 warning (OPT_Wattributes, "%qE attribute ignored", name);
4681 *no_add_attrs = true;
4686 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4687 /* Handle the "notshared" attribute. This attribute is another way of
4688 requesting hidden visibility. ARM's compiler supports
4689 "__declspec(notshared)"; we support the same thing via an
4693 arm_handle_notshared_attribute (tree *node,
4694 tree name ATTRIBUTE_UNUSED,
4695 tree args ATTRIBUTE_UNUSED,
4696 int flags ATTRIBUTE_UNUSED,
4699 tree decl = TYPE_NAME (*node);
4703 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4704 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4705 *no_add_attrs = false;
4711 /* Return 0 if the attributes for two types are incompatible, 1 if they
4712 are compatible, and 2 if they are nearly compatible (which causes a
4713 warning to be generated). */
4715 arm_comp_type_attributes (const_tree type1, const_tree type2)
4719 /* Check for mismatch of non-default calling convention. */
4720 if (TREE_CODE (type1) != FUNCTION_TYPE)
4723 /* Check for mismatched call attributes. */
4724 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4725 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4726 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4727 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4729 /* Only bother to check if an attribute is defined. */
4730 if (l1 | l2 | s1 | s2)
4732 /* If one type has an attribute, the other must have the same attribute. */
4733 if ((l1 != l2) || (s1 != s2))
4736 /* Disallow mixed attributes. */
4737 if ((l1 & s2) || (l2 & s1))
4741 /* Check for mismatched ISR attribute. */
4742 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4744 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4745 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4747 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4754 /* Assigns default attributes to newly defined type. This is used to
4755 set short_call/long_call attributes for function types of
4756 functions defined inside corresponding #pragma scopes. */
4758 arm_set_default_type_attributes (tree type)
4760 /* Add __attribute__ ((long_call)) to all functions, when
4761 inside #pragma long_calls or __attribute__ ((short_call)),
4762 when inside #pragma no_long_calls. */
4763 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4765 tree type_attr_list, attr_name;
4766 type_attr_list = TYPE_ATTRIBUTES (type);
4768 if (arm_pragma_long_calls == LONG)
4769 attr_name = get_identifier ("long_call");
4770 else if (arm_pragma_long_calls == SHORT)
4771 attr_name = get_identifier ("short_call");
4775 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4776 TYPE_ATTRIBUTES (type) = type_attr_list;
4780 /* Return true if DECL is known to be linked into section SECTION. */
4783 arm_function_in_section_p (tree decl, section *section)
4785 /* We can only be certain about functions defined in the same
4786 compilation unit. */
4787 if (!TREE_STATIC (decl))
4790 /* Make sure that SYMBOL always binds to the definition in this
4791 compilation unit. */
4792 if (!targetm.binds_local_p (decl))
4795 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4796 if (!DECL_SECTION_NAME (decl))
4798 /* Make sure that we will not create a unique section for DECL. */
4799 if (flag_function_sections || DECL_ONE_ONLY (decl))
4803 return function_section (decl) == section;
4806 /* Return nonzero if a 32-bit "long_call" should be generated for
4807 a call from the current function to DECL. We generate a long_call
4810 a. has an __attribute__((long call))
4811 or b. is within the scope of a #pragma long_calls
4812 or c. the -mlong-calls command line switch has been specified
4814 However we do not generate a long call if the function:
4816 d. has an __attribute__ ((short_call))
4817 or e. is inside the scope of a #pragma no_long_calls
4818 or f. is defined in the same section as the current function. */
4821 arm_is_long_call_p (tree decl)
4826 return TARGET_LONG_CALLS;
4828 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4829 if (lookup_attribute ("short_call", attrs))
4832 /* For "f", be conservative, and only cater for cases in which the
4833 whole of the current function is placed in the same section. */
4834 if (!flag_reorder_blocks_and_partition
4835 && TREE_CODE (decl) == FUNCTION_DECL
4836 && arm_function_in_section_p (decl, current_function_section ()))
4839 if (lookup_attribute ("long_call", attrs))
4842 return TARGET_LONG_CALLS;
4845 /* Return nonzero if it is ok to make a tail-call to DECL. */
4847 arm_function_ok_for_sibcall (tree decl, tree exp)
4849 unsigned long func_type;
4851 if (cfun->machine->sibcall_blocked)
4854 /* Never tailcall something for which we have no decl, or if we
4855 are generating code for Thumb-1. */
4856 if (decl == NULL || TARGET_THUMB1)
4859 /* The PIC register is live on entry to VxWorks PLT entries, so we
4860 must make the call before restoring the PIC register. */
4861 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4864 /* Cannot tail-call to long calls, since these are out of range of
4865 a branch instruction. */
4866 if (arm_is_long_call_p (decl))
4869 /* If we are interworking and the function is not declared static
4870 then we can't tail-call it unless we know that it exists in this
4871 compilation unit (since it might be a Thumb routine). */
4872 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4875 func_type = arm_current_func_type ();
4876 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4877 if (IS_INTERRUPT (func_type))
4880 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4882 /* Check that the return value locations are the same. For
4883 example that we aren't returning a value from the sibling in
4884 a VFP register but then need to transfer it to a core
4888 a = arm_function_value (TREE_TYPE (exp), decl, false);
4889 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4891 if (!rtx_equal_p (a, b))
4895 /* Never tailcall if function may be called with a misaligned SP. */
4896 if (IS_STACKALIGN (func_type))
4899 /* Everything else is ok. */
4904 /* Addressing mode support functions. */
4906 /* Return nonzero if X is a legitimate immediate operand when compiling
4907 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4909 legitimate_pic_operand_p (rtx x)
4911 if (GET_CODE (x) == SYMBOL_REF
4912 || (GET_CODE (x) == CONST
4913 && GET_CODE (XEXP (x, 0)) == PLUS
4914 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4920 /* Record that the current function needs a PIC register. Initialize
4921 cfun->machine->pic_reg if we have not already done so. */
4924 require_pic_register (void)
4926 /* A lot of the logic here is made obscure by the fact that this
4927 routine gets called as part of the rtx cost estimation process.
4928 We don't want those calls to affect any assumptions about the real
4929 function; and further, we can't call entry_of_function() until we
4930 start the real expansion process. */
4931 if (!crtl->uses_pic_offset_table)
4933 gcc_assert (can_create_pseudo_p ());
4934 if (arm_pic_register != INVALID_REGNUM)
4936 if (!cfun->machine->pic_reg)
4937 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4939 /* Play games to avoid marking the function as needing pic
4940 if we are being called as part of the cost-estimation
4942 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4943 crtl->uses_pic_offset_table = 1;
4949 if (!cfun->machine->pic_reg)
4950 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4952 /* Play games to avoid marking the function as needing pic
4953 if we are being called as part of the cost-estimation
4955 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4957 crtl->uses_pic_offset_table = 1;
4960 arm_load_pic_register (0UL);
4964 /* We can be called during expansion of PHI nodes, where
4965 we can't yet emit instructions directly in the final
4966 insn stream. Queue the insns on the entry edge, they will
4967 be committed after everything else is expanded. */
4968 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4975 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4977 if (GET_CODE (orig) == SYMBOL_REF
4978 || GET_CODE (orig) == LABEL_REF)
4984 gcc_assert (can_create_pseudo_p ());
4985 reg = gen_reg_rtx (Pmode);
4988 /* VxWorks does not impose a fixed gap between segments; the run-time
4989 gap can be different from the object-file gap. We therefore can't
4990 use GOTOFF unless we are absolutely sure that the symbol is in the
4991 same segment as the GOT. Unfortunately, the flexibility of linker
4992 scripts means that we can't be sure of that in general, so assume
4993 that GOTOFF is never valid on VxWorks. */
4994 if ((GET_CODE (orig) == LABEL_REF
4995 || (GET_CODE (orig) == SYMBOL_REF &&
4996 SYMBOL_REF_LOCAL_P (orig)))
4998 && !TARGET_VXWORKS_RTP)
4999 insn = arm_pic_static_addr (orig, reg);
5005 /* If this function doesn't have a pic register, create one now. */
5006 require_pic_register ();
5008 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5010 /* Make the MEM as close to a constant as possible. */
5011 mem = SET_SRC (pat);
5012 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5013 MEM_READONLY_P (mem) = 1;
5014 MEM_NOTRAP_P (mem) = 1;
5016 insn = emit_insn (pat);
5019 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5021 set_unique_reg_note (insn, REG_EQUAL, orig);
5025 else if (GET_CODE (orig) == CONST)
5029 if (GET_CODE (XEXP (orig, 0)) == PLUS
5030 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5033 /* Handle the case where we have: const (UNSPEC_TLS). */
5034 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5035 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5038 /* Handle the case where we have:
5039 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5041 if (GET_CODE (XEXP (orig, 0)) == PLUS
5042 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5043 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5045 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5051 gcc_assert (can_create_pseudo_p ());
5052 reg = gen_reg_rtx (Pmode);
5055 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5057 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5058 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5059 base == reg ? 0 : reg);
5061 if (GET_CODE (offset) == CONST_INT)
5063 /* The base register doesn't really matter, we only want to
5064 test the index for the appropriate mode. */
5065 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5067 gcc_assert (can_create_pseudo_p ());
5068 offset = force_reg (Pmode, offset);
5071 if (GET_CODE (offset) == CONST_INT)
5072 return plus_constant (base, INTVAL (offset));
5075 if (GET_MODE_SIZE (mode) > 4
5076 && (GET_MODE_CLASS (mode) == MODE_INT
5077 || TARGET_SOFT_FLOAT))
5079 emit_insn (gen_addsi3 (reg, base, offset));
5083 return gen_rtx_PLUS (Pmode, base, offset);
5090 /* Find a spare register to use during the prolog of a function. */
5093 thumb_find_work_register (unsigned long pushed_regs_mask)
5097 /* Check the argument registers first as these are call-used. The
5098 register allocation order means that sometimes r3 might be used
5099 but earlier argument registers might not, so check them all. */
5100 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5101 if (!df_regs_ever_live_p (reg))
5104 /* Before going on to check the call-saved registers we can try a couple
5105 more ways of deducing that r3 is available. The first is when we are
5106 pushing anonymous arguments onto the stack and we have less than 4
5107 registers worth of fixed arguments(*). In this case r3 will be part of
5108 the variable argument list and so we can be sure that it will be
5109 pushed right at the start of the function. Hence it will be available
5110 for the rest of the prologue.
5111 (*): ie crtl->args.pretend_args_size is greater than 0. */
5112 if (cfun->machine->uses_anonymous_args
5113 && crtl->args.pretend_args_size > 0)
5114 return LAST_ARG_REGNUM;
5116 /* The other case is when we have fixed arguments but less than 4 registers
5117 worth. In this case r3 might be used in the body of the function, but
5118 it is not being used to convey an argument into the function. In theory
5119 we could just check crtl->args.size to see how many bytes are
5120 being passed in argument registers, but it seems that it is unreliable.
5121 Sometimes it will have the value 0 when in fact arguments are being
5122 passed. (See testcase execute/20021111-1.c for an example). So we also
5123 check the args_info.nregs field as well. The problem with this field is
5124 that it makes no allowances for arguments that are passed to the
5125 function but which are not used. Hence we could miss an opportunity
5126 when a function has an unused argument in r3. But it is better to be
5127 safe than to be sorry. */
5128 if (! cfun->machine->uses_anonymous_args
5129 && crtl->args.size >= 0
5130 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5131 && crtl->args.info.nregs < 4)
5132 return LAST_ARG_REGNUM;
5134 /* Otherwise look for a call-saved register that is going to be pushed. */
5135 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5136 if (pushed_regs_mask & (1 << reg))
5141 /* Thumb-2 can use high regs. */
5142 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5143 if (pushed_regs_mask & (1 << reg))
5146 /* Something went wrong - thumb_compute_save_reg_mask()
5147 should have arranged for a suitable register to be pushed. */
5151 static GTY(()) int pic_labelno;
5153 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5157 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5159 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5161 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5164 gcc_assert (flag_pic);
5166 pic_reg = cfun->machine->pic_reg;
5167 if (TARGET_VXWORKS_RTP)
5169 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5170 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5171 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5173 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5175 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5176 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5180 /* We use an UNSPEC rather than a LABEL_REF because this label
5181 never appears in the code stream. */
5183 labelno = GEN_INT (pic_labelno++);
5184 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5185 l1 = gen_rtx_CONST (VOIDmode, l1);
5187 /* On the ARM the PC register contains 'dot + 8' at the time of the
5188 addition, on the Thumb it is 'dot + 4'. */
5189 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5190 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5192 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5196 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5198 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5200 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5202 else /* TARGET_THUMB1 */
5204 if (arm_pic_register != INVALID_REGNUM
5205 && REGNO (pic_reg) > LAST_LO_REGNUM)
5207 /* We will have pushed the pic register, so we should always be
5208 able to find a work register. */
5209 pic_tmp = gen_rtx_REG (SImode,
5210 thumb_find_work_register (saved_regs));
5211 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5212 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5215 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5216 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5220 /* Need to emit this whether or not we obey regdecls,
5221 since setjmp/longjmp can cause life info to screw up. */
5225 /* Generate code to load the address of a static var when flag_pic is set. */
5227 arm_pic_static_addr (rtx orig, rtx reg)
5229 rtx l1, labelno, offset_rtx, insn;
5231 gcc_assert (flag_pic);
5233 /* We use an UNSPEC rather than a LABEL_REF because this label
5234 never appears in the code stream. */
5235 labelno = GEN_INT (pic_labelno++);
5236 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5237 l1 = gen_rtx_CONST (VOIDmode, l1);
5239 /* On the ARM the PC register contains 'dot + 8' at the time of the
5240 addition, on the Thumb it is 'dot + 4'. */
5241 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5242 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5243 UNSPEC_SYMBOL_OFFSET);
5244 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5248 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5250 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5252 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5254 else /* TARGET_THUMB1 */
5256 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5257 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5263 /* Return nonzero if X is valid as an ARM state addressing register. */
5265 arm_address_register_rtx_p (rtx x, int strict_p)
5269 if (GET_CODE (x) != REG)
5275 return ARM_REGNO_OK_FOR_BASE_P (regno);
5277 return (regno <= LAST_ARM_REGNUM
5278 || regno >= FIRST_PSEUDO_REGISTER
5279 || regno == FRAME_POINTER_REGNUM
5280 || regno == ARG_POINTER_REGNUM);
5283 /* Return TRUE if this rtx is the difference of a symbol and a label,
5284 and will reduce to a PC-relative relocation in the object file.
5285 Expressions like this can be left alone when generating PIC, rather
5286 than forced through the GOT. */
5288 pcrel_constant_p (rtx x)
5290 if (GET_CODE (x) == MINUS)
5291 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5296 /* Return true if X will surely end up in an index register after next
5299 will_be_in_index_register (const_rtx x)
5301 /* arm.md: calculate_pic_address will split this into a register. */
5302 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5305 /* Return nonzero if X is a valid ARM state address operand. */
5307 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5311 enum rtx_code code = GET_CODE (x);
5313 if (arm_address_register_rtx_p (x, strict_p))
5316 use_ldrd = (TARGET_LDRD
5318 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5320 if (code == POST_INC || code == PRE_DEC
5321 || ((code == PRE_INC || code == POST_DEC)
5322 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5323 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5325 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5326 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5327 && GET_CODE (XEXP (x, 1)) == PLUS
5328 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5330 rtx addend = XEXP (XEXP (x, 1), 1);
5332 /* Don't allow ldrd post increment by register because it's hard
5333 to fixup invalid register choices. */
5335 && GET_CODE (x) == POST_MODIFY
5336 && GET_CODE (addend) == REG)
5339 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5340 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5343 /* After reload constants split into minipools will have addresses
5344 from a LABEL_REF. */
5345 else if (reload_completed
5346 && (code == LABEL_REF
5348 && GET_CODE (XEXP (x, 0)) == PLUS
5349 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5350 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5353 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5356 else if (code == PLUS)
5358 rtx xop0 = XEXP (x, 0);
5359 rtx xop1 = XEXP (x, 1);
5361 return ((arm_address_register_rtx_p (xop0, strict_p)
5362 && ((GET_CODE(xop1) == CONST_INT
5363 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5364 || (!strict_p && will_be_in_index_register (xop1))))
5365 || (arm_address_register_rtx_p (xop1, strict_p)
5366 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5370 /* Reload currently can't handle MINUS, so disable this for now */
5371 else if (GET_CODE (x) == MINUS)
5373 rtx xop0 = XEXP (x, 0);
5374 rtx xop1 = XEXP (x, 1);
5376 return (arm_address_register_rtx_p (xop0, strict_p)
5377 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5381 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5382 && code == SYMBOL_REF
5383 && CONSTANT_POOL_ADDRESS_P (x)
5385 && symbol_mentioned_p (get_pool_constant (x))
5386 && ! pcrel_constant_p (get_pool_constant (x))))
5392 /* Return nonzero if X is a valid Thumb-2 address operand. */
5394 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5397 enum rtx_code code = GET_CODE (x);
5399 if (arm_address_register_rtx_p (x, strict_p))
5402 use_ldrd = (TARGET_LDRD
5404 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5406 if (code == POST_INC || code == PRE_DEC
5407 || ((code == PRE_INC || code == POST_DEC)
5408 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5409 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5411 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5412 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5413 && GET_CODE (XEXP (x, 1)) == PLUS
5414 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5416 /* Thumb-2 only has autoincrement by constant. */
5417 rtx addend = XEXP (XEXP (x, 1), 1);
5418 HOST_WIDE_INT offset;
5420 if (GET_CODE (addend) != CONST_INT)
5423 offset = INTVAL(addend);
5424 if (GET_MODE_SIZE (mode) <= 4)
5425 return (offset > -256 && offset < 256);
5427 return (use_ldrd && offset > -1024 && offset < 1024
5428 && (offset & 3) == 0);
5431 /* After reload constants split into minipools will have addresses
5432 from a LABEL_REF. */
5433 else if (reload_completed
5434 && (code == LABEL_REF
5436 && GET_CODE (XEXP (x, 0)) == PLUS
5437 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5438 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5441 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5444 else if (code == PLUS)
5446 rtx xop0 = XEXP (x, 0);
5447 rtx xop1 = XEXP (x, 1);
5449 return ((arm_address_register_rtx_p (xop0, strict_p)
5450 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5451 || (!strict_p && will_be_in_index_register (xop1))))
5452 || (arm_address_register_rtx_p (xop1, strict_p)
5453 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5456 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5457 && code == SYMBOL_REF
5458 && CONSTANT_POOL_ADDRESS_P (x)
5460 && symbol_mentioned_p (get_pool_constant (x))
5461 && ! pcrel_constant_p (get_pool_constant (x))))
5467 /* Return nonzero if INDEX is valid for an address index operand in
5470 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5473 HOST_WIDE_INT range;
5474 enum rtx_code code = GET_CODE (index);
5476 /* Standard coprocessor addressing modes. */
5477 if (TARGET_HARD_FLOAT
5478 && (TARGET_FPA || TARGET_MAVERICK)
5479 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5480 || (TARGET_MAVERICK && mode == DImode)))
5481 return (code == CONST_INT && INTVAL (index) < 1024
5482 && INTVAL (index) > -1024
5483 && (INTVAL (index) & 3) == 0);
5486 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5487 return (code == CONST_INT
5488 && INTVAL (index) < 1016
5489 && INTVAL (index) > -1024
5490 && (INTVAL (index) & 3) == 0);
5492 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5493 return (code == CONST_INT
5494 && INTVAL (index) < 1024
5495 && INTVAL (index) > -1024
5496 && (INTVAL (index) & 3) == 0);
5498 if (arm_address_register_rtx_p (index, strict_p)
5499 && (GET_MODE_SIZE (mode) <= 4))
5502 if (mode == DImode || mode == DFmode)
5504 if (code == CONST_INT)
5506 HOST_WIDE_INT val = INTVAL (index);
5509 return val > -256 && val < 256;
5511 return val > -4096 && val < 4092;
5514 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5517 if (GET_MODE_SIZE (mode) <= 4
5521 || (mode == QImode && outer == SIGN_EXTEND))))
5525 rtx xiop0 = XEXP (index, 0);
5526 rtx xiop1 = XEXP (index, 1);
5528 return ((arm_address_register_rtx_p (xiop0, strict_p)
5529 && power_of_two_operand (xiop1, SImode))
5530 || (arm_address_register_rtx_p (xiop1, strict_p)
5531 && power_of_two_operand (xiop0, SImode)));
5533 else if (code == LSHIFTRT || code == ASHIFTRT
5534 || code == ASHIFT || code == ROTATERT)
5536 rtx op = XEXP (index, 1);
5538 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5539 && GET_CODE (op) == CONST_INT
5541 && INTVAL (op) <= 31);
5545 /* For ARM v4 we may be doing a sign-extend operation during the
5551 || (outer == SIGN_EXTEND && mode == QImode))
5557 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5559 return (code == CONST_INT
5560 && INTVAL (index) < range
5561 && INTVAL (index) > -range);
5564 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5565 index operand. i.e. 1, 2, 4 or 8. */
5567 thumb2_index_mul_operand (rtx op)
5571 if (GET_CODE(op) != CONST_INT)
5575 return (val == 1 || val == 2 || val == 4 || val == 8);
5578 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5580 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5582 enum rtx_code code = GET_CODE (index);
5584 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5585 /* Standard coprocessor addressing modes. */
5586 if (TARGET_HARD_FLOAT
5587 && (TARGET_FPA || TARGET_MAVERICK)
5588 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5589 || (TARGET_MAVERICK && mode == DImode)))
5590 return (code == CONST_INT && INTVAL (index) < 1024
5591 && INTVAL (index) > -1024
5592 && (INTVAL (index) & 3) == 0);
5594 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5596 /* For DImode assume values will usually live in core regs
5597 and only allow LDRD addressing modes. */
5598 if (!TARGET_LDRD || mode != DImode)
5599 return (code == CONST_INT
5600 && INTVAL (index) < 1024
5601 && INTVAL (index) > -1024
5602 && (INTVAL (index) & 3) == 0);
5606 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5607 return (code == CONST_INT
5608 && INTVAL (index) < 1016
5609 && INTVAL (index) > -1024
5610 && (INTVAL (index) & 3) == 0);
5612 if (arm_address_register_rtx_p (index, strict_p)
5613 && (GET_MODE_SIZE (mode) <= 4))
5616 if (mode == DImode || mode == DFmode)
5618 if (code == CONST_INT)
5620 HOST_WIDE_INT val = INTVAL (index);
5621 /* ??? Can we assume ldrd for thumb2? */
5622 /* Thumb-2 ldrd only has reg+const addressing modes. */
5623 /* ldrd supports offsets of +-1020.
5624 However the ldr fallback does not. */
5625 return val > -256 && val < 256 && (val & 3) == 0;
5633 rtx xiop0 = XEXP (index, 0);
5634 rtx xiop1 = XEXP (index, 1);
5636 return ((arm_address_register_rtx_p (xiop0, strict_p)
5637 && thumb2_index_mul_operand (xiop1))
5638 || (arm_address_register_rtx_p (xiop1, strict_p)
5639 && thumb2_index_mul_operand (xiop0)));
5641 else if (code == ASHIFT)
5643 rtx op = XEXP (index, 1);
5645 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5646 && GET_CODE (op) == CONST_INT
5648 && INTVAL (op) <= 3);
5651 return (code == CONST_INT
5652 && INTVAL (index) < 4096
5653 && INTVAL (index) > -256);
5656 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5658 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5662 if (GET_CODE (x) != REG)
5668 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5670 return (regno <= LAST_LO_REGNUM
5671 || regno > LAST_VIRTUAL_REGISTER
5672 || regno == FRAME_POINTER_REGNUM
5673 || (GET_MODE_SIZE (mode) >= 4
5674 && (regno == STACK_POINTER_REGNUM
5675 || regno >= FIRST_PSEUDO_REGISTER
5676 || x == hard_frame_pointer_rtx
5677 || x == arg_pointer_rtx)));
5680 /* Return nonzero if x is a legitimate index register. This is the case
5681 for any base register that can access a QImode object. */
5683 thumb1_index_register_rtx_p (rtx x, int strict_p)
5685 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5688 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5690 The AP may be eliminated to either the SP or the FP, so we use the
5691 least common denominator, e.g. SImode, and offsets from 0 to 64.
5693 ??? Verify whether the above is the right approach.
5695 ??? Also, the FP may be eliminated to the SP, so perhaps that
5696 needs special handling also.
5698 ??? Look at how the mips16 port solves this problem. It probably uses
5699 better ways to solve some of these problems.
5701 Although it is not incorrect, we don't accept QImode and HImode
5702 addresses based on the frame pointer or arg pointer until the
5703 reload pass starts. This is so that eliminating such addresses
5704 into stack based ones won't produce impossible code. */
5706 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5708 /* ??? Not clear if this is right. Experiment. */
5709 if (GET_MODE_SIZE (mode) < 4
5710 && !(reload_in_progress || reload_completed)
5711 && (reg_mentioned_p (frame_pointer_rtx, x)
5712 || reg_mentioned_p (arg_pointer_rtx, x)
5713 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5714 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5715 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5716 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5719 /* Accept any base register. SP only in SImode or larger. */
5720 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5723 /* This is PC relative data before arm_reorg runs. */
5724 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5725 && GET_CODE (x) == SYMBOL_REF
5726 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5729 /* This is PC relative data after arm_reorg runs. */
5730 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5732 && (GET_CODE (x) == LABEL_REF
5733 || (GET_CODE (x) == CONST
5734 && GET_CODE (XEXP (x, 0)) == PLUS
5735 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5736 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5739 /* Post-inc indexing only supported for SImode and larger. */
5740 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5741 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5744 else if (GET_CODE (x) == PLUS)
5746 /* REG+REG address can be any two index registers. */
5747 /* We disallow FRAME+REG addressing since we know that FRAME
5748 will be replaced with STACK, and SP relative addressing only
5749 permits SP+OFFSET. */
5750 if (GET_MODE_SIZE (mode) <= 4
5751 && XEXP (x, 0) != frame_pointer_rtx
5752 && XEXP (x, 1) != frame_pointer_rtx
5753 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5754 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5755 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5758 /* REG+const has 5-7 bit offset for non-SP registers. */
5759 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5760 || XEXP (x, 0) == arg_pointer_rtx)
5761 && GET_CODE (XEXP (x, 1)) == CONST_INT
5762 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5765 /* REG+const has 10-bit offset for SP, but only SImode and
5766 larger is supported. */
5767 /* ??? Should probably check for DI/DFmode overflow here
5768 just like GO_IF_LEGITIMATE_OFFSET does. */
5769 else if (GET_CODE (XEXP (x, 0)) == REG
5770 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5771 && GET_MODE_SIZE (mode) >= 4
5772 && GET_CODE (XEXP (x, 1)) == CONST_INT
5773 && INTVAL (XEXP (x, 1)) >= 0
5774 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5775 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5778 else if (GET_CODE (XEXP (x, 0)) == REG
5779 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5780 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5781 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5782 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5783 && GET_MODE_SIZE (mode) >= 4
5784 && GET_CODE (XEXP (x, 1)) == CONST_INT
5785 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5789 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5790 && GET_MODE_SIZE (mode) == 4
5791 && GET_CODE (x) == SYMBOL_REF
5792 && CONSTANT_POOL_ADDRESS_P (x)
5794 && symbol_mentioned_p (get_pool_constant (x))
5795 && ! pcrel_constant_p (get_pool_constant (x))))
5801 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5802 instruction of mode MODE. */
5804 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5806 switch (GET_MODE_SIZE (mode))
5809 return val >= 0 && val < 32;
5812 return val >= 0 && val < 64 && (val & 1) == 0;
5816 && (val + GET_MODE_SIZE (mode)) <= 128
5822 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5825 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5826 else if (TARGET_THUMB2)
5827 return thumb2_legitimate_address_p (mode, x, strict_p);
5828 else /* if (TARGET_THUMB1) */
5829 return thumb1_legitimate_address_p (mode, x, strict_p);
5832 /* Build the SYMBOL_REF for __tls_get_addr. */
5834 static GTY(()) rtx tls_get_addr_libfunc;
5837 get_tls_get_addr (void)
5839 if (!tls_get_addr_libfunc)
5840 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5841 return tls_get_addr_libfunc;
5845 arm_load_tp (rtx target)
5848 target = gen_reg_rtx (SImode);
5852 /* Can return in any reg. */
5853 emit_insn (gen_load_tp_hard (target));
5857 /* Always returned in r0. Immediately copy the result into a pseudo,
5858 otherwise other uses of r0 (e.g. setting up function arguments) may
5859 clobber the value. */
5863 emit_insn (gen_load_tp_soft ());
5865 tmp = gen_rtx_REG (SImode, 0);
5866 emit_move_insn (target, tmp);
5872 load_tls_operand (rtx x, rtx reg)
5876 if (reg == NULL_RTX)
5877 reg = gen_reg_rtx (SImode);
5879 tmp = gen_rtx_CONST (SImode, x);
5881 emit_move_insn (reg, tmp);
5887 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5889 rtx insns, label, labelno, sum;
5893 labelno = GEN_INT (pic_labelno++);
5894 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5895 label = gen_rtx_CONST (VOIDmode, label);
5897 sum = gen_rtx_UNSPEC (Pmode,
5898 gen_rtvec (4, x, GEN_INT (reloc), label,
5899 GEN_INT (TARGET_ARM ? 8 : 4)),
5901 reg = load_tls_operand (sum, reg);
5904 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5905 else if (TARGET_THUMB2)
5906 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5907 else /* TARGET_THUMB1 */
5908 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5910 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5911 Pmode, 1, reg, Pmode);
5913 insns = get_insns ();
5920 legitimize_tls_address (rtx x, rtx reg)
5922 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5923 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5927 case TLS_MODEL_GLOBAL_DYNAMIC:
5928 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5929 dest = gen_reg_rtx (Pmode);
5930 emit_libcall_block (insns, dest, ret, x);
5933 case TLS_MODEL_LOCAL_DYNAMIC:
5934 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5936 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5937 share the LDM result with other LD model accesses. */
5938 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5940 dest = gen_reg_rtx (Pmode);
5941 emit_libcall_block (insns, dest, ret, eqv);
5943 /* Load the addend. */
5944 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5946 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5947 return gen_rtx_PLUS (Pmode, dest, addend);
5949 case TLS_MODEL_INITIAL_EXEC:
5950 labelno = GEN_INT (pic_labelno++);
5951 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5952 label = gen_rtx_CONST (VOIDmode, label);
5953 sum = gen_rtx_UNSPEC (Pmode,
5954 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5955 GEN_INT (TARGET_ARM ? 8 : 4)),
5957 reg = load_tls_operand (sum, reg);
5960 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5961 else if (TARGET_THUMB2)
5962 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5965 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5966 emit_move_insn (reg, gen_const_mem (SImode, reg));
5969 tp = arm_load_tp (NULL_RTX);
5971 return gen_rtx_PLUS (Pmode, tp, reg);
5973 case TLS_MODEL_LOCAL_EXEC:
5974 tp = arm_load_tp (NULL_RTX);
5976 reg = gen_rtx_UNSPEC (Pmode,
5977 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5979 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5981 return gen_rtx_PLUS (Pmode, tp, reg);
5988 /* Try machine-dependent ways of modifying an illegitimate address
5989 to be legitimate. If we find one, return the new, valid address. */
5991 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5995 /* TODO: legitimize_address for Thumb2. */
5998 return thumb_legitimize_address (x, orig_x, mode);
6001 if (arm_tls_symbol_p (x))
6002 return legitimize_tls_address (x, NULL_RTX);
6004 if (GET_CODE (x) == PLUS)
6006 rtx xop0 = XEXP (x, 0);
6007 rtx xop1 = XEXP (x, 1);
6009 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6010 xop0 = force_reg (SImode, xop0);
6012 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6013 xop1 = force_reg (SImode, xop1);
6015 if (ARM_BASE_REGISTER_RTX_P (xop0)
6016 && GET_CODE (xop1) == CONST_INT)
6018 HOST_WIDE_INT n, low_n;
6022 /* VFP addressing modes actually allow greater offsets, but for
6023 now we just stick with the lowest common denominator. */
6025 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6037 low_n = ((mode) == TImode ? 0
6038 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6042 base_reg = gen_reg_rtx (SImode);
6043 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6044 emit_move_insn (base_reg, val);
6045 x = plus_constant (base_reg, low_n);
6047 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6048 x = gen_rtx_PLUS (SImode, xop0, xop1);
6051 /* XXX We don't allow MINUS any more -- see comment in
6052 arm_legitimate_address_outer_p (). */
6053 else if (GET_CODE (x) == MINUS)
6055 rtx xop0 = XEXP (x, 0);
6056 rtx xop1 = XEXP (x, 1);
6058 if (CONSTANT_P (xop0))
6059 xop0 = force_reg (SImode, xop0);
6061 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6062 xop1 = force_reg (SImode, xop1);
6064 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6065 x = gen_rtx_MINUS (SImode, xop0, xop1);
6068 /* Make sure to take full advantage of the pre-indexed addressing mode
6069 with absolute addresses which often allows for the base register to
6070 be factorized for multiple adjacent memory references, and it might
6071 even allows for the mini pool to be avoided entirely. */
6072 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6075 HOST_WIDE_INT mask, base, index;
6078 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6079 use a 8-bit index. So let's use a 12-bit index for SImode only and
6080 hope that arm_gen_constant will enable ldrb to use more bits. */
6081 bits = (mode == SImode) ? 12 : 8;
6082 mask = (1 << bits) - 1;
6083 base = INTVAL (x) & ~mask;
6084 index = INTVAL (x) & mask;
6085 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6087 /* It'll most probably be more efficient to generate the base
6088 with more bits set and use a negative index instead. */
6092 base_reg = force_reg (SImode, GEN_INT (base));
6093 x = plus_constant (base_reg, index);
6098 /* We need to find and carefully transform any SYMBOL and LABEL
6099 references; so go back to the original address expression. */
6100 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6102 if (new_x != orig_x)
6110 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6111 to be legitimate. If we find one, return the new, valid address. */
6113 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6115 if (arm_tls_symbol_p (x))
6116 return legitimize_tls_address (x, NULL_RTX);
6118 if (GET_CODE (x) == PLUS
6119 && GET_CODE (XEXP (x, 1)) == CONST_INT
6120 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6121 || INTVAL (XEXP (x, 1)) < 0))
6123 rtx xop0 = XEXP (x, 0);
6124 rtx xop1 = XEXP (x, 1);
6125 HOST_WIDE_INT offset = INTVAL (xop1);
6127 /* Try and fold the offset into a biasing of the base register and
6128 then offsetting that. Don't do this when optimizing for space
6129 since it can cause too many CSEs. */
6130 if (optimize_size && offset >= 0
6131 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6133 HOST_WIDE_INT delta;
6136 delta = offset - (256 - GET_MODE_SIZE (mode));
6137 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6138 delta = 31 * GET_MODE_SIZE (mode);
6140 delta = offset & (~31 * GET_MODE_SIZE (mode));
6142 xop0 = force_operand (plus_constant (xop0, offset - delta),
6144 x = plus_constant (xop0, delta);
6146 else if (offset < 0 && offset > -256)
6147 /* Small negative offsets are best done with a subtract before the
6148 dereference, forcing these into a register normally takes two
6150 x = force_operand (x, NULL_RTX);
6153 /* For the remaining cases, force the constant into a register. */
6154 xop1 = force_reg (SImode, xop1);
6155 x = gen_rtx_PLUS (SImode, xop0, xop1);
6158 else if (GET_CODE (x) == PLUS
6159 && s_register_operand (XEXP (x, 1), SImode)
6160 && !s_register_operand (XEXP (x, 0), SImode))
6162 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6164 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6169 /* We need to find and carefully transform any SYMBOL and LABEL
6170 references; so go back to the original address expression. */
6171 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6173 if (new_x != orig_x)
6181 thumb_legitimize_reload_address (rtx *x_p,
6182 enum machine_mode mode,
6183 int opnum, int type,
6184 int ind_levels ATTRIBUTE_UNUSED)
6188 if (GET_CODE (x) == PLUS
6189 && GET_MODE_SIZE (mode) < 4
6190 && REG_P (XEXP (x, 0))
6191 && XEXP (x, 0) == stack_pointer_rtx
6192 && GET_CODE (XEXP (x, 1)) == CONST_INT
6193 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6198 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6199 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6203 /* If both registers are hi-regs, then it's better to reload the
6204 entire expression rather than each register individually. That
6205 only requires one reload register rather than two. */
6206 if (GET_CODE (x) == PLUS
6207 && REG_P (XEXP (x, 0))
6208 && REG_P (XEXP (x, 1))
6209 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6210 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6215 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6216 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6223 /* Test for various thread-local symbols. */
6225 /* Return TRUE if X is a thread-local symbol. */
6228 arm_tls_symbol_p (rtx x)
6230 if (! TARGET_HAVE_TLS)
6233 if (GET_CODE (x) != SYMBOL_REF)
6236 return SYMBOL_REF_TLS_MODEL (x) != 0;
6239 /* Helper for arm_tls_referenced_p. */
6242 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6244 if (GET_CODE (*x) == SYMBOL_REF)
6245 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6247 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6248 TLS offsets, not real symbol references. */
6249 if (GET_CODE (*x) == UNSPEC
6250 && XINT (*x, 1) == UNSPEC_TLS)
6256 /* Return TRUE if X contains any TLS symbol references. */
6259 arm_tls_referenced_p (rtx x)
6261 if (! TARGET_HAVE_TLS)
6264 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6267 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6270 arm_cannot_force_const_mem (rtx x)
6274 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6276 split_const (x, &base, &offset);
6277 if (GET_CODE (base) == SYMBOL_REF
6278 && !offset_within_block_p (base, INTVAL (offset)))
6281 return arm_tls_referenced_p (x);
6284 #define REG_OR_SUBREG_REG(X) \
6285 (GET_CODE (X) == REG \
6286 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6288 #define REG_OR_SUBREG_RTX(X) \
6289 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6291 #ifndef COSTS_N_INSNS
6292 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6295 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6297 enum machine_mode mode = GET_MODE (x);
6311 return COSTS_N_INSNS (1);
6314 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6317 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6324 return COSTS_N_INSNS (2) + cycles;
6326 return COSTS_N_INSNS (1) + 16;
6329 return (COSTS_N_INSNS (1)
6330 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6331 + GET_CODE (SET_DEST (x)) == MEM));
6336 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6338 if (thumb_shiftable_const (INTVAL (x)))
6339 return COSTS_N_INSNS (2);
6340 return COSTS_N_INSNS (3);
6342 else if ((outer == PLUS || outer == COMPARE)
6343 && INTVAL (x) < 256 && INTVAL (x) > -256)
6345 else if ((outer == IOR || outer == XOR || outer == AND)
6346 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6347 return COSTS_N_INSNS (1);
6348 else if (outer == AND)
6351 /* This duplicates the tests in the andsi3 expander. */
6352 for (i = 9; i <= 31; i++)
6353 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6354 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6355 return COSTS_N_INSNS (2);
6357 else if (outer == ASHIFT || outer == ASHIFTRT
6358 || outer == LSHIFTRT)
6360 return COSTS_N_INSNS (2);
6366 return COSTS_N_INSNS (3);
6384 /* XXX another guess. */
6385 /* Memory costs quite a lot for the first word, but subsequent words
6386 load at the equivalent of a single insn each. */
6387 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6388 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6393 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6399 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6400 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6406 return total + COSTS_N_INSNS (1);
6408 /* Assume a two-shift sequence. Increase the cost slightly so
6409 we prefer actual shifts over an extend operation. */
6410 return total + 1 + COSTS_N_INSNS (2);
6418 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6420 enum machine_mode mode = GET_MODE (x);
6421 enum rtx_code subcode;
6423 enum rtx_code code = GET_CODE (x);
6429 /* Memory costs quite a lot for the first word, but subsequent words
6430 load at the equivalent of a single insn each. */
6431 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6438 if (TARGET_HARD_FLOAT && mode == SFmode)
6439 *total = COSTS_N_INSNS (2);
6440 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6441 *total = COSTS_N_INSNS (4);
6443 *total = COSTS_N_INSNS (20);
6447 if (GET_CODE (XEXP (x, 1)) == REG)
6448 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6449 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6450 *total = rtx_cost (XEXP (x, 1), code, speed);
6456 *total += COSTS_N_INSNS (4);
6461 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6462 *total += rtx_cost (XEXP (x, 0), code, speed);
6465 *total += COSTS_N_INSNS (3);
6469 *total += COSTS_N_INSNS (1);
6470 /* Increase the cost of complex shifts because they aren't any faster,
6471 and reduce dual issue opportunities. */
6472 if (arm_tune_cortex_a9
6473 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6481 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6482 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6483 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6485 *total += rtx_cost (XEXP (x, 1), code, speed);
6489 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6490 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6492 *total += rtx_cost (XEXP (x, 0), code, speed);
6499 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6501 if (TARGET_HARD_FLOAT
6503 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6505 *total = COSTS_N_INSNS (1);
6506 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6507 && arm_const_double_rtx (XEXP (x, 0)))
6509 *total += rtx_cost (XEXP (x, 1), code, speed);
6513 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6514 && arm_const_double_rtx (XEXP (x, 1)))
6516 *total += rtx_cost (XEXP (x, 0), code, speed);
6522 *total = COSTS_N_INSNS (20);
6526 *total = COSTS_N_INSNS (1);
6527 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6528 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6530 *total += rtx_cost (XEXP (x, 1), code, speed);
6534 subcode = GET_CODE (XEXP (x, 1));
6535 if (subcode == ASHIFT || subcode == ASHIFTRT
6536 || subcode == LSHIFTRT
6537 || subcode == ROTATE || subcode == ROTATERT)
6539 *total += rtx_cost (XEXP (x, 0), code, speed);
6540 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6544 /* A shift as a part of RSB costs no more than RSB itself. */
6545 if (GET_CODE (XEXP (x, 0)) == MULT
6546 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6548 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6549 *total += rtx_cost (XEXP (x, 1), code, speed);
6554 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6556 *total += rtx_cost (XEXP (x, 0), code, speed);
6557 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6561 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6562 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6564 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6565 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6566 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6567 *total += COSTS_N_INSNS (1);
6575 if (code == PLUS && arm_arch6 && mode == SImode
6576 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6577 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6579 *total = COSTS_N_INSNS (1);
6580 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6582 *total += rtx_cost (XEXP (x, 1), code, speed);
6586 /* MLA: All arguments must be registers. We filter out
6587 multiplication by a power of two, so that we fall down into
6589 if (GET_CODE (XEXP (x, 0)) == MULT
6590 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6592 /* The cost comes from the cost of the multiply. */
6596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6598 if (TARGET_HARD_FLOAT
6600 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6602 *total = COSTS_N_INSNS (1);
6603 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6604 && arm_const_double_rtx (XEXP (x, 1)))
6606 *total += rtx_cost (XEXP (x, 0), code, speed);
6613 *total = COSTS_N_INSNS (20);
6617 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6618 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6620 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6621 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6622 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6623 *total += COSTS_N_INSNS (1);
6629 case AND: case XOR: case IOR:
6631 /* Normally the frame registers will be spilt into reg+const during
6632 reload, so it is a bad idea to combine them with other instructions,
6633 since then they might not be moved outside of loops. As a compromise
6634 we allow integration with ops that have a constant as their second
6636 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6637 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6638 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6639 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6640 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6645 *total += COSTS_N_INSNS (2);
6646 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6647 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6649 *total += rtx_cost (XEXP (x, 0), code, speed);
6656 *total += COSTS_N_INSNS (1);
6657 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6658 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6660 *total += rtx_cost (XEXP (x, 0), code, speed);
6663 subcode = GET_CODE (XEXP (x, 0));
6664 if (subcode == ASHIFT || subcode == ASHIFTRT
6665 || subcode == LSHIFTRT
6666 || subcode == ROTATE || subcode == ROTATERT)
6668 *total += rtx_cost (XEXP (x, 1), code, speed);
6669 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6674 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6676 *total += rtx_cost (XEXP (x, 1), code, speed);
6677 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6681 if (subcode == UMIN || subcode == UMAX
6682 || subcode == SMIN || subcode == SMAX)
6684 *total = COSTS_N_INSNS (3);
6691 /* This should have been handled by the CPU specific routines. */
6695 if (arm_arch3m && mode == SImode
6696 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6697 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6698 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6699 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6700 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6701 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6703 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6706 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6710 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6712 if (TARGET_HARD_FLOAT
6714 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6716 *total = COSTS_N_INSNS (1);
6719 *total = COSTS_N_INSNS (2);
6725 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6726 if (mode == SImode && code == NOT)
6728 subcode = GET_CODE (XEXP (x, 0));
6729 if (subcode == ASHIFT || subcode == ASHIFTRT
6730 || subcode == LSHIFTRT
6731 || subcode == ROTATE || subcode == ROTATERT
6733 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6735 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6736 /* Register shifts cost an extra cycle. */
6737 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6738 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6747 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6749 *total = COSTS_N_INSNS (4);
6753 operand = XEXP (x, 0);
6755 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6756 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6757 && GET_CODE (XEXP (operand, 0)) == REG
6758 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6759 *total += COSTS_N_INSNS (1);
6760 *total += (rtx_cost (XEXP (x, 1), code, speed)
6761 + rtx_cost (XEXP (x, 2), code, speed));
6765 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6767 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6773 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6774 && mode == SImode && XEXP (x, 1) == const0_rtx)
6776 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6782 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6783 && mode == SImode && XEXP (x, 1) == const0_rtx)
6785 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6805 /* SCC insns. In the case where the comparison has already been
6806 performed, then they cost 2 instructions. Otherwise they need
6807 an additional comparison before them. */
6808 *total = COSTS_N_INSNS (2);
6809 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6816 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6822 *total += COSTS_N_INSNS (1);
6823 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6824 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6826 *total += rtx_cost (XEXP (x, 0), code, speed);
6830 subcode = GET_CODE (XEXP (x, 0));
6831 if (subcode == ASHIFT || subcode == ASHIFTRT
6832 || subcode == LSHIFTRT
6833 || subcode == ROTATE || subcode == ROTATERT)
6835 *total += rtx_cost (XEXP (x, 1), code, speed);
6836 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6841 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6843 *total += rtx_cost (XEXP (x, 1), code, speed);
6844 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6854 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6855 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6856 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6857 *total += rtx_cost (XEXP (x, 1), code, speed);
6861 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6863 if (TARGET_HARD_FLOAT
6865 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6867 *total = COSTS_N_INSNS (1);
6870 *total = COSTS_N_INSNS (20);
6873 *total = COSTS_N_INSNS (1);
6875 *total += COSTS_N_INSNS (3);
6881 if (GET_MODE_CLASS (mode) == MODE_INT)
6883 rtx op = XEXP (x, 0);
6884 enum machine_mode opmode = GET_MODE (op);
6887 *total += COSTS_N_INSNS (1);
6889 if (opmode != SImode)
6893 /* If !arm_arch4, we use one of the extendhisi2_mem
6894 or movhi_bytes patterns for HImode. For a QImode
6895 sign extension, we first zero-extend from memory
6896 and then perform a shift sequence. */
6897 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6898 *total += COSTS_N_INSNS (2);
6901 *total += COSTS_N_INSNS (1);
6903 /* We don't have the necessary insn, so we need to perform some
6905 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6906 /* An and with constant 255. */
6907 *total += COSTS_N_INSNS (1);
6909 /* A shift sequence. Increase costs slightly to avoid
6910 combining two shifts into an extend operation. */
6911 *total += COSTS_N_INSNS (2) + 1;
6917 switch (GET_MODE (XEXP (x, 0)))
6924 *total = COSTS_N_INSNS (1);
6934 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6938 if (const_ok_for_arm (INTVAL (x))
6939 || const_ok_for_arm (~INTVAL (x)))
6940 *total = COSTS_N_INSNS (1);
6942 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6943 INTVAL (x), NULL_RTX,
6950 *total = COSTS_N_INSNS (3);
6954 *total = COSTS_N_INSNS (1);
6958 *total = COSTS_N_INSNS (1);
6959 *total += rtx_cost (XEXP (x, 0), code, speed);
6963 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6964 && (mode == SFmode || !TARGET_VFP_SINGLE))
6965 *total = COSTS_N_INSNS (1);
6967 *total = COSTS_N_INSNS (4);
6971 *total = COSTS_N_INSNS (4);
6976 /* Estimates the size cost of thumb1 instructions.
6977 For now most of the code is copied from thumb1_rtx_costs. We need more
6978 fine grain tuning when we have more related test cases. */
6980 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6982 enum machine_mode mode = GET_MODE (x);
6995 return COSTS_N_INSNS (1);
6998 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7000 /* Thumb1 mul instruction can't operate on const. We must Load it
7001 into a register first. */
7002 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7003 return COSTS_N_INSNS (1) + const_size;
7005 return COSTS_N_INSNS (1);
7008 return (COSTS_N_INSNS (1)
7009 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7010 + GET_CODE (SET_DEST (x)) == MEM));
7015 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7017 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7018 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7019 return COSTS_N_INSNS (2);
7020 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7021 if (thumb_shiftable_const (INTVAL (x)))
7022 return COSTS_N_INSNS (2);
7023 return COSTS_N_INSNS (3);
7025 else if ((outer == PLUS || outer == COMPARE)
7026 && INTVAL (x) < 256 && INTVAL (x) > -256)
7028 else if ((outer == IOR || outer == XOR || outer == AND)
7029 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7030 return COSTS_N_INSNS (1);
7031 else if (outer == AND)
7034 /* This duplicates the tests in the andsi3 expander. */
7035 for (i = 9; i <= 31; i++)
7036 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7037 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7038 return COSTS_N_INSNS (2);
7040 else if (outer == ASHIFT || outer == ASHIFTRT
7041 || outer == LSHIFTRT)
7043 return COSTS_N_INSNS (2);
7049 return COSTS_N_INSNS (3);
7067 /* XXX another guess. */
7068 /* Memory costs quite a lot for the first word, but subsequent words
7069 load at the equivalent of a single insn each. */
7070 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7071 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7076 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7081 /* XXX still guessing. */
7082 switch (GET_MODE (XEXP (x, 0)))
7085 return (1 + (mode == DImode ? 4 : 0)
7086 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7089 return (4 + (mode == DImode ? 4 : 0)
7090 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7093 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7104 /* RTX costs when optimizing for size. */
7106 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7109 enum machine_mode mode = GET_MODE (x);
7112 *total = thumb1_size_rtx_costs (x, code, outer_code);
7116 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7120 /* A memory access costs 1 insn if the mode is small, or the address is
7121 a single register, otherwise it costs one insn per word. */
7122 if (REG_P (XEXP (x, 0)))
7123 *total = COSTS_N_INSNS (1);
7125 && GET_CODE (XEXP (x, 0)) == PLUS
7126 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7127 /* This will be split into two instructions.
7128 See arm.md:calculate_pic_address. */
7129 *total = COSTS_N_INSNS (2);
7131 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7138 /* Needs a libcall, so it costs about this. */
7139 *total = COSTS_N_INSNS (2);
7143 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7145 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7153 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7155 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7158 else if (mode == SImode)
7160 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7161 /* Slightly disparage register shifts, but not by much. */
7162 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7163 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7167 /* Needs a libcall. */
7168 *total = COSTS_N_INSNS (2);
7172 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7173 && (mode == SFmode || !TARGET_VFP_SINGLE))
7175 *total = COSTS_N_INSNS (1);
7181 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7182 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7184 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7185 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7186 || subcode1 == ROTATE || subcode1 == ROTATERT
7187 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7188 || subcode1 == ASHIFTRT)
7190 /* It's just the cost of the two operands. */
7195 *total = COSTS_N_INSNS (1);
7199 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7203 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7204 && (mode == SFmode || !TARGET_VFP_SINGLE))
7206 *total = COSTS_N_INSNS (1);
7210 /* A shift as a part of ADD costs nothing. */
7211 if (GET_CODE (XEXP (x, 0)) == MULT
7212 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7214 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7215 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7216 *total += rtx_cost (XEXP (x, 1), code, false);
7221 case AND: case XOR: case IOR:
7224 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7226 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7227 || subcode == LSHIFTRT || subcode == ASHIFTRT
7228 || (code == AND && subcode == NOT))
7230 /* It's just the cost of the two operands. */
7236 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7240 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7244 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7245 && (mode == SFmode || !TARGET_VFP_SINGLE))
7247 *total = COSTS_N_INSNS (1);
7253 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7262 if (cc_register (XEXP (x, 0), VOIDmode))
7265 *total = COSTS_N_INSNS (1);
7269 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7270 && (mode == SFmode || !TARGET_VFP_SINGLE))
7271 *total = COSTS_N_INSNS (1);
7273 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7278 return arm_rtx_costs_1 (x, outer_code, total, 0);
7281 if (const_ok_for_arm (INTVAL (x)))
7282 /* A multiplication by a constant requires another instruction
7283 to load the constant to a register. */
7284 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7286 else if (const_ok_for_arm (~INTVAL (x)))
7287 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7288 else if (const_ok_for_arm (-INTVAL (x)))
7290 if (outer_code == COMPARE || outer_code == PLUS
7291 || outer_code == MINUS)
7294 *total = COSTS_N_INSNS (1);
7297 *total = COSTS_N_INSNS (2);
7303 *total = COSTS_N_INSNS (2);
7307 *total = COSTS_N_INSNS (4);
7312 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7313 cost of these slightly. */
7314 *total = COSTS_N_INSNS (1) + 1;
7318 if (mode != VOIDmode)
7319 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7321 *total = COSTS_N_INSNS (4); /* How knows? */
7326 /* RTX costs when optimizing for size. */
7328 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7332 return arm_size_rtx_costs (x, (enum rtx_code) code,
7333 (enum rtx_code) outer_code, total);
7335 return current_tune->rtx_costs (x, (enum rtx_code) code,
7336 (enum rtx_code) outer_code,
7340 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7341 supported on any "slowmul" cores, so it can be ignored. */
7344 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7345 int *total, bool speed)
7347 enum machine_mode mode = GET_MODE (x);
7351 *total = thumb1_rtx_costs (x, code, outer_code);
7358 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7361 *total = COSTS_N_INSNS (20);
7365 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7367 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7368 & (unsigned HOST_WIDE_INT) 0xffffffff);
7369 int cost, const_ok = const_ok_for_arm (i);
7370 int j, booth_unit_size;
7372 /* Tune as appropriate. */
7373 cost = const_ok ? 4 : 8;
7374 booth_unit_size = 2;
7375 for (j = 0; i && j < 32; j += booth_unit_size)
7377 i >>= booth_unit_size;
7381 *total = COSTS_N_INSNS (cost);
7382 *total += rtx_cost (XEXP (x, 0), code, speed);
7386 *total = COSTS_N_INSNS (20);
7390 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7395 /* RTX cost for cores with a fast multiply unit (M variants). */
7398 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7399 int *total, bool speed)
7401 enum machine_mode mode = GET_MODE (x);
7405 *total = thumb1_rtx_costs (x, code, outer_code);
7409 /* ??? should thumb2 use different costs? */
7413 /* There is no point basing this on the tuning, since it is always the
7414 fast variant if it exists at all. */
7416 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7417 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7418 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7420 *total = COSTS_N_INSNS(2);
7427 *total = COSTS_N_INSNS (5);
7431 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7433 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7434 & (unsigned HOST_WIDE_INT) 0xffffffff);
7435 int cost, const_ok = const_ok_for_arm (i);
7436 int j, booth_unit_size;
7438 /* Tune as appropriate. */
7439 cost = const_ok ? 4 : 8;
7440 booth_unit_size = 8;
7441 for (j = 0; i && j < 32; j += booth_unit_size)
7443 i >>= booth_unit_size;
7447 *total = COSTS_N_INSNS(cost);
7453 *total = COSTS_N_INSNS (4);
7457 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7459 if (TARGET_HARD_FLOAT
7461 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7463 *total = COSTS_N_INSNS (1);
7468 /* Requires a lib call */
7469 *total = COSTS_N_INSNS (20);
7473 return arm_rtx_costs_1 (x, outer_code, total, speed);
7478 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7479 so it can be ignored. */
7482 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7483 int *total, bool speed)
7485 enum machine_mode mode = GET_MODE (x);
7489 *total = thumb1_rtx_costs (x, code, outer_code);
7496 if (GET_CODE (XEXP (x, 0)) != MULT)
7497 return arm_rtx_costs_1 (x, outer_code, total, speed);
7499 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7500 will stall until the multiplication is complete. */
7501 *total = COSTS_N_INSNS (3);
7505 /* There is no point basing this on the tuning, since it is always the
7506 fast variant if it exists at all. */
7508 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7509 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7510 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7512 *total = COSTS_N_INSNS (2);
7519 *total = COSTS_N_INSNS (5);
7523 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7525 /* If operand 1 is a constant we can more accurately
7526 calculate the cost of the multiply. The multiplier can
7527 retire 15 bits on the first cycle and a further 12 on the
7528 second. We do, of course, have to load the constant into
7529 a register first. */
7530 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7531 /* There's a general overhead of one cycle. */
7533 unsigned HOST_WIDE_INT masked_const;
7538 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7540 masked_const = i & 0xffff8000;
7541 if (masked_const != 0)
7544 masked_const = i & 0xf8000000;
7545 if (masked_const != 0)
7548 *total = COSTS_N_INSNS (cost);
7554 *total = COSTS_N_INSNS (3);
7558 /* Requires a lib call */
7559 *total = COSTS_N_INSNS (20);
7563 return arm_rtx_costs_1 (x, outer_code, total, speed);
7568 /* RTX costs for 9e (and later) cores. */
7571 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7572 int *total, bool speed)
7574 enum machine_mode mode = GET_MODE (x);
7581 *total = COSTS_N_INSNS (3);
7585 *total = thumb1_rtx_costs (x, code, outer_code);
7593 /* There is no point basing this on the tuning, since it is always the
7594 fast variant if it exists at all. */
7596 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7597 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7598 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7600 *total = COSTS_N_INSNS (2);
7607 *total = COSTS_N_INSNS (5);
7613 *total = COSTS_N_INSNS (2);
7617 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7619 if (TARGET_HARD_FLOAT
7621 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7623 *total = COSTS_N_INSNS (1);
7628 *total = COSTS_N_INSNS (20);
7632 return arm_rtx_costs_1 (x, outer_code, total, speed);
7635 /* All address computations that can be done are free, but rtx cost returns
7636 the same for practically all of them. So we weight the different types
7637 of address here in the order (most pref first):
7638 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7640 arm_arm_address_cost (rtx x)
7642 enum rtx_code c = GET_CODE (x);
7644 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7646 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7651 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7654 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7664 arm_thumb_address_cost (rtx x)
7666 enum rtx_code c = GET_CODE (x);
7671 && GET_CODE (XEXP (x, 0)) == REG
7672 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7679 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7681 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7685 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7689 /* Some true dependencies can have a higher cost depending
7690 on precisely how certain input operands are used. */
7692 && REG_NOTE_KIND (link) == 0
7693 && recog_memoized (insn) >= 0
7694 && recog_memoized (dep) >= 0)
7696 int shift_opnum = get_attr_shift (insn);
7697 enum attr_type attr_type = get_attr_type (dep);
7699 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7700 operand for INSN. If we have a shifted input operand and the
7701 instruction we depend on is another ALU instruction, then we may
7702 have to account for an additional stall. */
7703 if (shift_opnum != 0
7704 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7706 rtx shifted_operand;
7709 /* Get the shifted operand. */
7710 extract_insn (insn);
7711 shifted_operand = recog_data.operand[shift_opnum];
7713 /* Iterate over all the operands in DEP. If we write an operand
7714 that overlaps with SHIFTED_OPERAND, then we have increase the
7715 cost of this dependency. */
7717 preprocess_constraints ();
7718 for (opno = 0; opno < recog_data.n_operands; opno++)
7720 /* We can ignore strict inputs. */
7721 if (recog_data.operand_type[opno] == OP_IN)
7724 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7731 /* XXX This is not strictly true for the FPA. */
7732 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7733 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7736 /* Call insns don't incur a stall, even if they follow a load. */
7737 if (REG_NOTE_KIND (link) == 0
7738 && GET_CODE (insn) == CALL_INSN)
7741 if ((i_pat = single_set (insn)) != NULL
7742 && GET_CODE (SET_SRC (i_pat)) == MEM
7743 && (d_pat = single_set (dep)) != NULL
7744 && GET_CODE (SET_DEST (d_pat)) == MEM)
7746 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7747 /* This is a load after a store, there is no conflict if the load reads
7748 from a cached area. Assume that loads from the stack, and from the
7749 constant pool are cached, and that others will miss. This is a
7752 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7753 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7754 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7755 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7762 static int fp_consts_inited = 0;
7764 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7765 static const char * const strings_fp[8] =
7768 "4", "5", "0.5", "10"
7771 static REAL_VALUE_TYPE values_fp[8];
7774 init_fp_table (void)
7780 fp_consts_inited = 1;
7782 fp_consts_inited = 8;
7784 for (i = 0; i < fp_consts_inited; i++)
7786 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7791 /* Return TRUE if rtx X is a valid immediate FP constant. */
7793 arm_const_double_rtx (rtx x)
7798 if (!fp_consts_inited)
7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7802 if (REAL_VALUE_MINUS_ZERO (r))
7805 for (i = 0; i < fp_consts_inited; i++)
7806 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7812 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7814 neg_const_double_rtx_ok_for_fpa (rtx x)
7819 if (!fp_consts_inited)
7822 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7823 r = real_value_negate (&r);
7824 if (REAL_VALUE_MINUS_ZERO (r))
7827 for (i = 0; i < 8; i++)
7828 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7835 /* VFPv3 has a fairly wide range of representable immediates, formed from
7836 "quarter-precision" floating-point values. These can be evaluated using this
7837 formula (with ^ for exponentiation):
7841 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7842 16 <= n <= 31 and 0 <= r <= 7.
7844 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7846 - A (most-significant) is the sign bit.
7847 - BCD are the exponent (encoded as r XOR 3).
7848 - EFGH are the mantissa (encoded as n - 16).
7851 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7852 fconst[sd] instruction, or -1 if X isn't suitable. */
7854 vfp3_const_double_index (rtx x)
7856 REAL_VALUE_TYPE r, m;
7858 unsigned HOST_WIDE_INT mantissa, mant_hi;
7859 unsigned HOST_WIDE_INT mask;
7860 HOST_WIDE_INT m1, m2;
7861 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7863 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7868 /* We can't represent these things, so detect them first. */
7869 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7872 /* Extract sign, exponent and mantissa. */
7873 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7874 r = real_value_abs (&r);
7875 exponent = REAL_EXP (&r);
7876 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7877 highest (sign) bit, with a fixed binary point at bit point_pos.
7878 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7879 bits for the mantissa, this may fail (low bits would be lost). */
7880 real_ldexp (&m, &r, point_pos - exponent);
7881 REAL_VALUE_TO_INT (&m1, &m2, m);
7885 /* If there are bits set in the low part of the mantissa, we can't
7886 represent this value. */
7890 /* Now make it so that mantissa contains the most-significant bits, and move
7891 the point_pos to indicate that the least-significant bits have been
7893 point_pos -= HOST_BITS_PER_WIDE_INT;
7896 /* We can permit four significant bits of mantissa only, plus a high bit
7897 which is always 1. */
7898 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7899 if ((mantissa & mask) != 0)
7902 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7903 mantissa >>= point_pos - 5;
7905 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7906 floating-point immediate zero with Neon using an integer-zero load, but
7907 that case is handled elsewhere.) */
7911 gcc_assert (mantissa >= 16 && mantissa <= 31);
7913 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7914 normalized significands are in the range [1, 2). (Our mantissa is shifted
7915 left 4 places at this point relative to normalized IEEE754 values). GCC
7916 internally uses [0.5, 1) (see real.c), so the exponent returned from
7917 REAL_EXP must be altered. */
7918 exponent = 5 - exponent;
7920 if (exponent < 0 || exponent > 7)
7923 /* Sign, mantissa and exponent are now in the correct form to plug into the
7924 formula described in the comment above. */
7925 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7928 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7930 vfp3_const_double_rtx (rtx x)
7935 return vfp3_const_double_index (x) != -1;
7938 /* Recognize immediates which can be used in various Neon instructions. Legal
7939 immediates are described by the following table (for VMVN variants, the
7940 bitwise inverse of the constant shown is recognized. In either case, VMOV
7941 is output and the correct instruction to use for a given constant is chosen
7942 by the assembler). The constant shown is replicated across all elements of
7943 the destination vector.
7945 insn elems variant constant (binary)
7946 ---- ----- ------- -----------------
7947 vmov i32 0 00000000 00000000 00000000 abcdefgh
7948 vmov i32 1 00000000 00000000 abcdefgh 00000000
7949 vmov i32 2 00000000 abcdefgh 00000000 00000000
7950 vmov i32 3 abcdefgh 00000000 00000000 00000000
7951 vmov i16 4 00000000 abcdefgh
7952 vmov i16 5 abcdefgh 00000000
7953 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7954 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7955 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7956 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7957 vmvn i16 10 00000000 abcdefgh
7958 vmvn i16 11 abcdefgh 00000000
7959 vmov i32 12 00000000 00000000 abcdefgh 11111111
7960 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7961 vmov i32 14 00000000 abcdefgh 11111111 11111111
7962 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7964 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7965 eeeeeeee ffffffff gggggggg hhhhhhhh
7966 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7968 For case 18, B = !b. Representable values are exactly those accepted by
7969 vfp3_const_double_index, but are output as floating-point numbers rather
7972 Variants 0-5 (inclusive) may also be used as immediates for the second
7973 operand of VORR/VBIC instructions.
7975 The INVERSE argument causes the bitwise inverse of the given operand to be
7976 recognized instead (used for recognizing legal immediates for the VAND/VORN
7977 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7978 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7979 output, rather than the real insns vbic/vorr).
7981 INVERSE makes no difference to the recognition of float vectors.
7983 The return value is the variant of immediate as shown in the above table, or
7984 -1 if the given value doesn't match any of the listed patterns.
7987 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7988 rtx *modconst, int *elementwidth)
7990 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7992 for (i = 0; i < idx; i += (STRIDE)) \
7997 immtype = (CLASS); \
7998 elsize = (ELSIZE); \
8002 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8003 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8004 unsigned char bytes[16];
8005 int immtype = -1, matches;
8006 unsigned int invmask = inverse ? 0xff : 0;
8008 /* Vectors of float constants. */
8009 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8011 rtx el0 = CONST_VECTOR_ELT (op, 0);
8014 if (!vfp3_const_double_rtx (el0))
8017 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8019 for (i = 1; i < n_elts; i++)
8021 rtx elt = CONST_VECTOR_ELT (op, i);
8024 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8026 if (!REAL_VALUES_EQUAL (r0, re))
8031 *modconst = CONST_VECTOR_ELT (op, 0);
8039 /* Splat vector constant out into a byte vector. */
8040 for (i = 0; i < n_elts; i++)
8042 rtx el = CONST_VECTOR_ELT (op, i);
8043 unsigned HOST_WIDE_INT elpart;
8044 unsigned int part, parts;
8046 if (GET_CODE (el) == CONST_INT)
8048 elpart = INTVAL (el);
8051 else if (GET_CODE (el) == CONST_DOUBLE)
8053 elpart = CONST_DOUBLE_LOW (el);
8059 for (part = 0; part < parts; part++)
8062 for (byte = 0; byte < innersize; byte++)
8064 bytes[idx++] = (elpart & 0xff) ^ invmask;
8065 elpart >>= BITS_PER_UNIT;
8067 if (GET_CODE (el) == CONST_DOUBLE)
8068 elpart = CONST_DOUBLE_HIGH (el);
8073 gcc_assert (idx == GET_MODE_SIZE (mode));
8077 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8078 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8080 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8081 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8083 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8084 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8086 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8087 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8089 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8091 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8093 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8094 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8096 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8097 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8099 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8100 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8102 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8103 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8105 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8107 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8109 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8110 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8112 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8113 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8115 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8116 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8118 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8119 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8121 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8123 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8124 && bytes[i] == bytes[(i + 8) % idx]);
8132 *elementwidth = elsize;
8136 unsigned HOST_WIDE_INT imm = 0;
8138 /* Un-invert bytes of recognized vector, if necessary. */
8140 for (i = 0; i < idx; i++)
8141 bytes[i] ^= invmask;
8145 /* FIXME: Broken on 32-bit H_W_I hosts. */
8146 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8148 for (i = 0; i < 8; i++)
8149 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8150 << (i * BITS_PER_UNIT);
8152 *modconst = GEN_INT (imm);
8156 unsigned HOST_WIDE_INT imm = 0;
8158 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8159 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8161 *modconst = GEN_INT (imm);
8169 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8170 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8171 float elements), and a modified constant (whatever should be output for a
8172 VMOV) in *MODCONST. */
8175 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8176 rtx *modconst, int *elementwidth)
8180 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8186 *modconst = tmpconst;
8189 *elementwidth = tmpwidth;
8194 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8195 the immediate is valid, write a constant suitable for using as an operand
8196 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8197 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8200 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8201 rtx *modconst, int *elementwidth)
8205 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8207 if (retval < 0 || retval > 5)
8211 *modconst = tmpconst;
8214 *elementwidth = tmpwidth;
8219 /* Return a string suitable for output of Neon immediate logic operation
8223 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8224 int inverse, int quad)
8226 int width, is_valid;
8227 static char templ[40];
8229 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8231 gcc_assert (is_valid != 0);
8234 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8236 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8241 /* Output a sequence of pairwise operations to implement a reduction.
8242 NOTE: We do "too much work" here, because pairwise operations work on two
8243 registers-worth of operands in one go. Unfortunately we can't exploit those
8244 extra calculations to do the full operation in fewer steps, I don't think.
8245 Although all vector elements of the result but the first are ignored, we
8246 actually calculate the same result in each of the elements. An alternative
8247 such as initially loading a vector with zero to use as each of the second
8248 operands would use up an additional register and take an extra instruction,
8249 for no particular gain. */
8252 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8253 rtx (*reduc) (rtx, rtx, rtx))
8255 enum machine_mode inner = GET_MODE_INNER (mode);
8256 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8259 for (i = parts / 2; i >= 1; i /= 2)
8261 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8262 emit_insn (reduc (dest, tmpsum, tmpsum));
8267 /* If VALS is a vector constant that can be loaded into a register
8268 using VDUP, generate instructions to do so and return an RTX to
8269 assign to the register. Otherwise return NULL_RTX. */
8272 neon_vdup_constant (rtx vals)
8274 enum machine_mode mode = GET_MODE (vals);
8275 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8276 int n_elts = GET_MODE_NUNITS (mode);
8277 bool all_same = true;
8281 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8284 for (i = 0; i < n_elts; ++i)
8286 x = XVECEXP (vals, 0, i);
8287 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8292 /* The elements are not all the same. We could handle repeating
8293 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8294 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8298 /* We can load this constant by using VDUP and a constant in a
8299 single ARM register. This will be cheaper than a vector
8302 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8303 return gen_rtx_VEC_DUPLICATE (mode, x);
8306 /* Generate code to load VALS, which is a PARALLEL containing only
8307 constants (for vec_init) or CONST_VECTOR, efficiently into a
8308 register. Returns an RTX to copy into the register, or NULL_RTX
8309 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8312 neon_make_constant (rtx vals)
8314 enum machine_mode mode = GET_MODE (vals);
8316 rtx const_vec = NULL_RTX;
8317 int n_elts = GET_MODE_NUNITS (mode);
8321 if (GET_CODE (vals) == CONST_VECTOR)
8323 else if (GET_CODE (vals) == PARALLEL)
8325 /* A CONST_VECTOR must contain only CONST_INTs and
8326 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8327 Only store valid constants in a CONST_VECTOR. */
8328 for (i = 0; i < n_elts; ++i)
8330 rtx x = XVECEXP (vals, 0, i);
8331 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8334 if (n_const == n_elts)
8335 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8340 if (const_vec != NULL
8341 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8342 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8344 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8345 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8346 pipeline cycle; creating the constant takes one or two ARM
8349 else if (const_vec != NULL_RTX)
8350 /* Load from constant pool. On Cortex-A8 this takes two cycles
8351 (for either double or quad vectors). We can not take advantage
8352 of single-cycle VLD1 because we need a PC-relative addressing
8356 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8357 We can not construct an initializer. */
8361 /* Initialize vector TARGET to VALS. */
8364 neon_expand_vector_init (rtx target, rtx vals)
8366 enum machine_mode mode = GET_MODE (target);
8367 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8368 int n_elts = GET_MODE_NUNITS (mode);
8369 int n_var = 0, one_var = -1;
8370 bool all_same = true;
8374 for (i = 0; i < n_elts; ++i)
8376 x = XVECEXP (vals, 0, i);
8377 if (!CONSTANT_P (x))
8378 ++n_var, one_var = i;
8380 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8386 rtx constant = neon_make_constant (vals);
8387 if (constant != NULL_RTX)
8389 emit_move_insn (target, constant);
8394 /* Splat a single non-constant element if we can. */
8395 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8397 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8398 emit_insn (gen_rtx_SET (VOIDmode, target,
8399 gen_rtx_VEC_DUPLICATE (mode, x)));
8403 /* One field is non-constant. Load constant then overwrite varying
8404 field. This is more efficient than using the stack. */
8407 rtx copy = copy_rtx (vals);
8408 rtx index = GEN_INT (one_var);
8410 /* Load constant part of vector, substitute neighboring value for
8412 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8413 neon_expand_vector_init (target, copy);
8415 /* Insert variable. */
8416 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8420 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8423 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8426 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8429 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8432 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8435 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8438 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8441 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8444 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8452 /* Construct the vector in memory one field at a time
8453 and load the whole vector. */
8454 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8455 for (i = 0; i < n_elts; i++)
8456 emit_move_insn (adjust_address_nv (mem, inner_mode,
8457 i * GET_MODE_SIZE (inner_mode)),
8458 XVECEXP (vals, 0, i));
8459 emit_move_insn (target, mem);
8462 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8463 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8464 reported source locations are bogus. */
8467 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8472 gcc_assert (GET_CODE (operand) == CONST_INT);
8474 lane = INTVAL (operand);
8476 if (lane < low || lane >= high)
8480 /* Bounds-check lanes. */
8483 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8485 bounds_check (operand, low, high, "lane out of range");
8488 /* Bounds-check constants. */
8491 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8493 bounds_check (operand, low, high, "constant out of range");
8497 neon_element_bits (enum machine_mode mode)
8500 return GET_MODE_BITSIZE (mode);
8502 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8506 /* Predicates for `match_operand' and `match_operator'. */
8508 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8510 cirrus_memory_offset (rtx op)
8512 /* Reject eliminable registers. */
8513 if (! (reload_in_progress || reload_completed)
8514 && ( reg_mentioned_p (frame_pointer_rtx, op)
8515 || reg_mentioned_p (arg_pointer_rtx, op)
8516 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8517 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8518 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8519 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8522 if (GET_CODE (op) == MEM)
8528 /* Match: (mem (reg)). */
8529 if (GET_CODE (ind) == REG)
8535 if (GET_CODE (ind) == PLUS
8536 && GET_CODE (XEXP (ind, 0)) == REG
8537 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8538 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8545 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8546 WB is true if full writeback address modes are allowed and is false
8547 if limited writeback address modes (POST_INC and PRE_DEC) are
8551 arm_coproc_mem_operand (rtx op, bool wb)
8555 /* Reject eliminable registers. */
8556 if (! (reload_in_progress || reload_completed)
8557 && ( reg_mentioned_p (frame_pointer_rtx, op)
8558 || reg_mentioned_p (arg_pointer_rtx, op)
8559 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8560 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8561 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8562 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8565 /* Constants are converted into offsets from labels. */
8566 if (GET_CODE (op) != MEM)
8571 if (reload_completed
8572 && (GET_CODE (ind) == LABEL_REF
8573 || (GET_CODE (ind) == CONST
8574 && GET_CODE (XEXP (ind, 0)) == PLUS
8575 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8576 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8579 /* Match: (mem (reg)). */
8580 if (GET_CODE (ind) == REG)
8581 return arm_address_register_rtx_p (ind, 0);
8583 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8584 acceptable in any case (subject to verification by
8585 arm_address_register_rtx_p). We need WB to be true to accept
8586 PRE_INC and POST_DEC. */
8587 if (GET_CODE (ind) == POST_INC
8588 || GET_CODE (ind) == PRE_DEC
8590 && (GET_CODE (ind) == PRE_INC
8591 || GET_CODE (ind) == POST_DEC)))
8592 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8595 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8596 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8597 && GET_CODE (XEXP (ind, 1)) == PLUS
8598 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8599 ind = XEXP (ind, 1);
8604 if (GET_CODE (ind) == PLUS
8605 && GET_CODE (XEXP (ind, 0)) == REG
8606 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8607 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8608 && INTVAL (XEXP (ind, 1)) > -1024
8609 && INTVAL (XEXP (ind, 1)) < 1024
8610 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8616 /* Return TRUE if OP is a memory operand which we can load or store a vector
8617 to/from. TYPE is one of the following values:
8618 0 - Vector load/stor (vldr)
8619 1 - Core registers (ldm)
8620 2 - Element/structure loads (vld1)
8623 neon_vector_mem_operand (rtx op, int type)
8627 /* Reject eliminable registers. */
8628 if (! (reload_in_progress || reload_completed)
8629 && ( reg_mentioned_p (frame_pointer_rtx, op)
8630 || reg_mentioned_p (arg_pointer_rtx, op)
8631 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8632 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8633 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8634 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8637 /* Constants are converted into offsets from labels. */
8638 if (GET_CODE (op) != MEM)
8643 if (reload_completed
8644 && (GET_CODE (ind) == LABEL_REF
8645 || (GET_CODE (ind) == CONST
8646 && GET_CODE (XEXP (ind, 0)) == PLUS
8647 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8648 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8651 /* Match: (mem (reg)). */
8652 if (GET_CODE (ind) == REG)
8653 return arm_address_register_rtx_p (ind, 0);
8655 /* Allow post-increment with Neon registers. */
8656 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8657 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8659 /* FIXME: vld1 allows register post-modify. */
8665 && GET_CODE (ind) == PLUS
8666 && GET_CODE (XEXP (ind, 0)) == REG
8667 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8668 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8669 && INTVAL (XEXP (ind, 1)) > -1024
8670 && INTVAL (XEXP (ind, 1)) < 1016
8671 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8677 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8680 neon_struct_mem_operand (rtx op)
8684 /* Reject eliminable registers. */
8685 if (! (reload_in_progress || reload_completed)
8686 && ( reg_mentioned_p (frame_pointer_rtx, op)
8687 || reg_mentioned_p (arg_pointer_rtx, op)
8688 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8689 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8690 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8691 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8694 /* Constants are converted into offsets from labels. */
8695 if (GET_CODE (op) != MEM)
8700 if (reload_completed
8701 && (GET_CODE (ind) == LABEL_REF
8702 || (GET_CODE (ind) == CONST
8703 && GET_CODE (XEXP (ind, 0)) == PLUS
8704 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8705 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8708 /* Match: (mem (reg)). */
8709 if (GET_CODE (ind) == REG)
8710 return arm_address_register_rtx_p (ind, 0);
8715 /* Return true if X is a register that will be eliminated later on. */
8717 arm_eliminable_register (rtx x)
8719 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8720 || REGNO (x) == ARG_POINTER_REGNUM
8721 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8722 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8725 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8726 coprocessor registers. Otherwise return NO_REGS. */
8729 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8733 if (!TARGET_NEON_FP16)
8734 return GENERAL_REGS;
8735 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8737 return GENERAL_REGS;
8741 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8742 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8743 && neon_vector_mem_operand (x, 0))
8746 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8749 return GENERAL_REGS;
8752 /* Values which must be returned in the most-significant end of the return
8756 arm_return_in_msb (const_tree valtype)
8758 return (TARGET_AAPCS_BASED
8760 && (AGGREGATE_TYPE_P (valtype)
8761 || TREE_CODE (valtype) == COMPLEX_TYPE));
8764 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8765 Use by the Cirrus Maverick code which has to workaround
8766 a hardware bug triggered by such instructions. */
8768 arm_memory_load_p (rtx insn)
8770 rtx body, lhs, rhs;;
8772 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8775 body = PATTERN (insn);
8777 if (GET_CODE (body) != SET)
8780 lhs = XEXP (body, 0);
8781 rhs = XEXP (body, 1);
8783 lhs = REG_OR_SUBREG_RTX (lhs);
8785 /* If the destination is not a general purpose
8786 register we do not have to worry. */
8787 if (GET_CODE (lhs) != REG
8788 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8791 /* As well as loads from memory we also have to react
8792 to loads of invalid constants which will be turned
8793 into loads from the minipool. */
8794 return (GET_CODE (rhs) == MEM
8795 || GET_CODE (rhs) == SYMBOL_REF
8796 || note_invalid_constants (insn, -1, false));
8799 /* Return TRUE if INSN is a Cirrus instruction. */
8801 arm_cirrus_insn_p (rtx insn)
8803 enum attr_cirrus attr;
8805 /* get_attr cannot accept USE or CLOBBER. */
8807 || GET_CODE (insn) != INSN
8808 || GET_CODE (PATTERN (insn)) == USE
8809 || GET_CODE (PATTERN (insn)) == CLOBBER)
8812 attr = get_attr_cirrus (insn);
8814 return attr != CIRRUS_NOT;
8817 /* Cirrus reorg for invalid instruction combinations. */
8819 cirrus_reorg (rtx first)
8821 enum attr_cirrus attr;
8822 rtx body = PATTERN (first);
8826 /* Any branch must be followed by 2 non Cirrus instructions. */
8827 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8830 t = next_nonnote_insn (first);
8832 if (arm_cirrus_insn_p (t))
8835 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8839 emit_insn_after (gen_nop (), first);
8844 /* (float (blah)) is in parallel with a clobber. */
8845 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8846 body = XVECEXP (body, 0, 0);
8848 if (GET_CODE (body) == SET)
8850 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8852 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8853 be followed by a non Cirrus insn. */
8854 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8856 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8857 emit_insn_after (gen_nop (), first);
8861 else if (arm_memory_load_p (first))
8863 unsigned int arm_regno;
8865 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8866 ldr/cfmv64hr combination where the Rd field is the same
8867 in both instructions must be split with a non Cirrus
8874 /* Get Arm register number for ldr insn. */
8875 if (GET_CODE (lhs) == REG)
8876 arm_regno = REGNO (lhs);
8879 gcc_assert (GET_CODE (rhs) == REG);
8880 arm_regno = REGNO (rhs);
8884 first = next_nonnote_insn (first);
8886 if (! arm_cirrus_insn_p (first))
8889 body = PATTERN (first);
8891 /* (float (blah)) is in parallel with a clobber. */
8892 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8893 body = XVECEXP (body, 0, 0);
8895 if (GET_CODE (body) == FLOAT)
8896 body = XEXP (body, 0);
8898 if (get_attr_cirrus (first) == CIRRUS_MOVE
8899 && GET_CODE (XEXP (body, 1)) == REG
8900 && arm_regno == REGNO (XEXP (body, 1)))
8901 emit_insn_after (gen_nop (), first);
8907 /* get_attr cannot accept USE or CLOBBER. */
8909 || GET_CODE (first) != INSN
8910 || GET_CODE (PATTERN (first)) == USE
8911 || GET_CODE (PATTERN (first)) == CLOBBER)
8914 attr = get_attr_cirrus (first);
8916 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8917 must be followed by a non-coprocessor instruction. */
8918 if (attr == CIRRUS_COMPARE)
8922 t = next_nonnote_insn (first);
8924 if (arm_cirrus_insn_p (t))
8927 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8931 emit_insn_after (gen_nop (), first);
8937 /* Return TRUE if X references a SYMBOL_REF. */
8939 symbol_mentioned_p (rtx x)
8944 if (GET_CODE (x) == SYMBOL_REF)
8947 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8948 are constant offsets, not symbols. */
8949 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8952 fmt = GET_RTX_FORMAT (GET_CODE (x));
8954 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8960 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8961 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8964 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8971 /* Return TRUE if X references a LABEL_REF. */
8973 label_mentioned_p (rtx x)
8978 if (GET_CODE (x) == LABEL_REF)
8981 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8982 instruction, but they are constant offsets, not symbols. */
8983 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8986 fmt = GET_RTX_FORMAT (GET_CODE (x));
8987 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8993 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8994 if (label_mentioned_p (XVECEXP (x, i, j)))
8997 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9005 tls_mentioned_p (rtx x)
9007 switch (GET_CODE (x))
9010 return tls_mentioned_p (XEXP (x, 0));
9013 if (XINT (x, 1) == UNSPEC_TLS)
9021 /* Must not copy any rtx that uses a pc-relative address. */
9024 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9026 if (GET_CODE (*x) == UNSPEC
9027 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9033 arm_cannot_copy_insn_p (rtx insn)
9035 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9041 enum rtx_code code = GET_CODE (x);
9058 /* Return 1 if memory locations are adjacent. */
9060 adjacent_mem_locations (rtx a, rtx b)
9062 /* We don't guarantee to preserve the order of these memory refs. */
9063 if (volatile_refs_p (a) || volatile_refs_p (b))
9066 if ((GET_CODE (XEXP (a, 0)) == REG
9067 || (GET_CODE (XEXP (a, 0)) == PLUS
9068 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9069 && (GET_CODE (XEXP (b, 0)) == REG
9070 || (GET_CODE (XEXP (b, 0)) == PLUS
9071 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9073 HOST_WIDE_INT val0 = 0, val1 = 0;
9077 if (GET_CODE (XEXP (a, 0)) == PLUS)
9079 reg0 = XEXP (XEXP (a, 0), 0);
9080 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9085 if (GET_CODE (XEXP (b, 0)) == PLUS)
9087 reg1 = XEXP (XEXP (b, 0), 0);
9088 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9093 /* Don't accept any offset that will require multiple
9094 instructions to handle, since this would cause the
9095 arith_adjacentmem pattern to output an overlong sequence. */
9096 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9099 /* Don't allow an eliminable register: register elimination can make
9100 the offset too large. */
9101 if (arm_eliminable_register (reg0))
9104 val_diff = val1 - val0;
9108 /* If the target has load delay slots, then there's no benefit
9109 to using an ldm instruction unless the offset is zero and
9110 we are optimizing for size. */
9111 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9112 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9113 && (val_diff == 4 || val_diff == -4));
9116 return ((REGNO (reg0) == REGNO (reg1))
9117 && (val_diff == 4 || val_diff == -4));
9123 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9124 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9125 instruction. ADD_OFFSET is nonzero if the base address register needs
9126 to be modified with an add instruction before we can use it. */
9129 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9130 int nops, HOST_WIDE_INT add_offset)
9132 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9133 if the offset isn't small enough. The reason 2 ldrs are faster
9134 is because these ARMs are able to do more than one cache access
9135 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9136 whilst the ARM8 has a double bandwidth cache. This means that
9137 these cores can do both an instruction fetch and a data fetch in
9138 a single cycle, so the trick of calculating the address into a
9139 scratch register (one of the result regs) and then doing a load
9140 multiple actually becomes slower (and no smaller in code size).
9141 That is the transformation
9143 ldr rd1, [rbase + offset]
9144 ldr rd2, [rbase + offset + 4]
9148 add rd1, rbase, offset
9149 ldmia rd1, {rd1, rd2}
9151 produces worse code -- '3 cycles + any stalls on rd2' instead of
9152 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9153 access per cycle, the first sequence could never complete in less
9154 than 6 cycles, whereas the ldm sequence would only take 5 and
9155 would make better use of sequential accesses if not hitting the
9158 We cheat here and test 'arm_ld_sched' which we currently know to
9159 only be true for the ARM8, ARM9 and StrongARM. If this ever
9160 changes, then the test below needs to be reworked. */
9161 if (nops == 2 && arm_ld_sched && add_offset != 0)
9167 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9168 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9169 an array ORDER which describes the sequence to use when accessing the
9170 offsets that produces an ascending order. In this sequence, each
9171 offset must be larger by exactly 4 than the previous one. ORDER[0]
9172 must have been filled in with the lowest offset by the caller.
9173 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9174 we use to verify that ORDER produces an ascending order of registers.
9175 Return true if it was possible to construct such an order, false if
9179 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9183 for (i = 1; i < nops; i++)
9187 order[i] = order[i - 1];
9188 for (j = 0; j < nops; j++)
9189 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9191 /* We must find exactly one offset that is higher than the
9192 previous one by 4. */
9193 if (order[i] != order[i - 1])
9197 if (order[i] == order[i - 1])
9199 /* The register numbers must be ascending. */
9200 if (unsorted_regs != NULL
9201 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9208 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9209 HOST_WIDE_INT *load_offset)
9211 int unsorted_regs[MAX_LDM_STM_OPS];
9212 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9213 int order[MAX_LDM_STM_OPS];
9217 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9218 easily extended if required. */
9219 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9221 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9223 /* Loop over the operands and check that the memory references are
9224 suitable (i.e. immediate offsets from the same base register). At
9225 the same time, extract the target register, and the memory
9227 for (i = 0; i < nops; i++)
9232 /* Convert a subreg of a mem into the mem itself. */
9233 if (GET_CODE (operands[nops + i]) == SUBREG)
9234 operands[nops + i] = alter_subreg (operands + (nops + i));
9236 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9238 /* Don't reorder volatile memory references; it doesn't seem worth
9239 looking for the case where the order is ok anyway. */
9240 if (MEM_VOLATILE_P (operands[nops + i]))
9243 offset = const0_rtx;
9245 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9246 || (GET_CODE (reg) == SUBREG
9247 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9248 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9249 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9251 || (GET_CODE (reg) == SUBREG
9252 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9253 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9257 base_reg = REGNO (reg);
9260 if (base_reg != (int) REGNO (reg))
9261 /* Not addressed from the same base register. */
9264 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9265 ? REGNO (operands[i])
9266 : REGNO (SUBREG_REG (operands[i])));
9268 /* If it isn't an integer register, or if it overwrites the
9269 base register but isn't the last insn in the list, then
9270 we can't do this. */
9271 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9272 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9275 unsorted_offsets[i] = INTVAL (offset);
9276 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9280 /* Not a suitable memory address. */
9284 /* All the useful information has now been extracted from the
9285 operands into unsorted_regs and unsorted_offsets; additionally,
9286 order[0] has been set to the lowest offset in the list. Sort
9287 the offsets into order, verifying that they are adjacent, and
9288 check that the register numbers are ascending. */
9289 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9296 for (i = 0; i < nops; i++)
9297 regs[i] = unsorted_regs[order[i]];
9299 *load_offset = unsorted_offsets[order[0]];
9302 if (unsorted_offsets[order[0]] == 0)
9303 ldm_case = 1; /* ldmia */
9304 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9305 ldm_case = 2; /* ldmib */
9306 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9307 ldm_case = 3; /* ldmda */
9308 else if (unsorted_offsets[order[nops - 1]] == -4)
9309 ldm_case = 4; /* ldmdb */
9310 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9311 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9316 if (!multiple_operation_profitable_p (false, nops,
9318 ? unsorted_offsets[order[0]] : 0))
9325 emit_ldm_seq (rtx *operands, int nops)
9327 int regs[MAX_LDM_STM_OPS];
9329 HOST_WIDE_INT offset;
9333 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9336 strcpy (buf, "ldm%(ia%)\t");
9340 strcpy (buf, "ldm%(ib%)\t");
9344 strcpy (buf, "ldm%(da%)\t");
9348 strcpy (buf, "ldm%(db%)\t");
9353 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9354 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9357 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9358 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9360 output_asm_insn (buf, operands);
9362 strcpy (buf, "ldm%(ia%)\t");
9369 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9370 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9372 for (i = 1; i < nops; i++)
9373 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9374 reg_names[regs[i]]);
9376 strcat (buf, "}\t%@ phole ldm");
9378 output_asm_insn (buf, operands);
9383 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9384 HOST_WIDE_INT * load_offset)
9386 int unsorted_regs[MAX_LDM_STM_OPS];
9387 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9388 int order[MAX_LDM_STM_OPS];
9392 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9393 easily extended if required. */
9394 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9396 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9398 /* Loop over the operands and check that the memory references are
9399 suitable (i.e. immediate offsets from the same base register). At
9400 the same time, extract the target register, and the memory
9402 for (i = 0; i < nops; i++)
9407 /* Convert a subreg of a mem into the mem itself. */
9408 if (GET_CODE (operands[nops + i]) == SUBREG)
9409 operands[nops + i] = alter_subreg (operands + (nops + i));
9411 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9413 /* Don't reorder volatile memory references; it doesn't seem worth
9414 looking for the case where the order is ok anyway. */
9415 if (MEM_VOLATILE_P (operands[nops + i]))
9418 offset = const0_rtx;
9420 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9421 || (GET_CODE (reg) == SUBREG
9422 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9423 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9424 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9426 || (GET_CODE (reg) == SUBREG
9427 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9428 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9431 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9432 ? REGNO (operands[i])
9433 : REGNO (SUBREG_REG (operands[i])));
9435 base_reg = REGNO (reg);
9436 else if (base_reg != (int) REGNO (reg))
9437 /* Not addressed from the same base register. */
9440 /* If it isn't an integer register, then we can't do this. */
9441 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9444 unsorted_offsets[i] = INTVAL (offset);
9445 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9449 /* Not a suitable memory address. */
9453 /* All the useful information has now been extracted from the
9454 operands into unsorted_regs and unsorted_offsets; additionally,
9455 order[0] has been set to the lowest offset in the list. Sort
9456 the offsets into order, verifying that they are adjacent, and
9457 check that the register numbers are ascending. */
9458 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9465 for (i = 0; i < nops; i++)
9466 regs[i] = unsorted_regs[order[i]];
9468 *load_offset = unsorted_offsets[order[0]];
9471 if (unsorted_offsets[order[0]] == 0)
9472 stm_case = 1; /* stmia */
9473 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9474 stm_case = 2; /* stmib */
9475 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9476 stm_case = 3; /* stmda */
9477 else if (unsorted_offsets[order[nops - 1]] == -4)
9478 stm_case = 4; /* stmdb */
9482 if (!multiple_operation_profitable_p (false, nops, 0))
9489 emit_stm_seq (rtx *operands, int nops)
9491 int regs[MAX_LDM_STM_OPS];
9493 HOST_WIDE_INT offset;
9497 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9500 strcpy (buf, "stm%(ia%)\t");
9504 strcpy (buf, "stm%(ib%)\t");
9508 strcpy (buf, "stm%(da%)\t");
9512 strcpy (buf, "stm%(db%)\t");
9519 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9520 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9522 for (i = 1; i < nops; i++)
9523 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9524 reg_names[regs[i]]);
9526 strcat (buf, "}\t%@ phole stm");
9528 output_asm_insn (buf, operands);
9532 /* Routines for use in generating RTL. */
9535 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9536 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9538 HOST_WIDE_INT offset = *offsetp;
9541 int sign = up ? 1 : -1;
9544 /* XScale has load-store double instructions, but they have stricter
9545 alignment requirements than load-store multiple, so we cannot
9548 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9549 the pipeline until completion.
9557 An ldr instruction takes 1-3 cycles, but does not block the
9566 Best case ldr will always win. However, the more ldr instructions
9567 we issue, the less likely we are to be able to schedule them well.
9568 Using ldr instructions also increases code size.
9570 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9571 for counts of 3 or 4 regs. */
9572 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9578 for (i = 0; i < count; i++)
9580 addr = plus_constant (from, i * 4 * sign);
9581 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9582 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9588 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9598 result = gen_rtx_PARALLEL (VOIDmode,
9599 rtvec_alloc (count + (write_back ? 1 : 0)));
9602 XVECEXP (result, 0, 0)
9603 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9608 for (j = 0; i < count; i++, j++)
9610 addr = plus_constant (from, j * 4 * sign);
9611 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9612 XVECEXP (result, 0, i)
9613 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9624 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9625 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9627 HOST_WIDE_INT offset = *offsetp;
9630 int sign = up ? 1 : -1;
9633 /* See arm_gen_load_multiple for discussion of
9634 the pros/cons of ldm/stm usage for XScale. */
9635 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9641 for (i = 0; i < count; i++)
9643 addr = plus_constant (to, i * 4 * sign);
9644 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9645 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9651 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9661 result = gen_rtx_PARALLEL (VOIDmode,
9662 rtvec_alloc (count + (write_back ? 1 : 0)));
9665 XVECEXP (result, 0, 0)
9666 = gen_rtx_SET (VOIDmode, to,
9667 plus_constant (to, count * 4 * sign));
9672 for (j = 0; i < count; i++, j++)
9674 addr = plus_constant (to, j * 4 * sign);
9675 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9676 XVECEXP (result, 0, i)
9677 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9688 arm_gen_movmemqi (rtx *operands)
9690 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9691 HOST_WIDE_INT srcoffset, dstoffset;
9693 rtx src, dst, srcbase, dstbase;
9694 rtx part_bytes_reg = NULL;
9697 if (GET_CODE (operands[2]) != CONST_INT
9698 || GET_CODE (operands[3]) != CONST_INT
9699 || INTVAL (operands[2]) > 64
9700 || INTVAL (operands[3]) & 3)
9703 dstbase = operands[0];
9704 srcbase = operands[1];
9706 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9707 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9709 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9710 out_words_to_go = INTVAL (operands[2]) / 4;
9711 last_bytes = INTVAL (operands[2]) & 3;
9712 dstoffset = srcoffset = 0;
9714 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9715 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9717 for (i = 0; in_words_to_go >= 2; i+=4)
9719 if (in_words_to_go > 4)
9720 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9721 srcbase, &srcoffset));
9723 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9724 FALSE, srcbase, &srcoffset));
9726 if (out_words_to_go)
9728 if (out_words_to_go > 4)
9729 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9730 dstbase, &dstoffset));
9731 else if (out_words_to_go != 1)
9732 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9736 dstbase, &dstoffset));
9739 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9740 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9741 if (last_bytes != 0)
9743 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9749 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9750 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9753 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9754 if (out_words_to_go)
9758 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9759 sreg = copy_to_reg (mem);
9761 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9762 emit_move_insn (mem, sreg);
9765 gcc_assert (!in_words_to_go); /* Sanity check */
9770 gcc_assert (in_words_to_go > 0);
9772 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9773 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9776 gcc_assert (!last_bytes || part_bytes_reg);
9778 if (BYTES_BIG_ENDIAN && last_bytes)
9780 rtx tmp = gen_reg_rtx (SImode);
9782 /* The bytes we want are in the top end of the word. */
9783 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9784 GEN_INT (8 * (4 - last_bytes))));
9785 part_bytes_reg = tmp;
9789 mem = adjust_automodify_address (dstbase, QImode,
9790 plus_constant (dst, last_bytes - 1),
9791 dstoffset + last_bytes - 1);
9792 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9796 tmp = gen_reg_rtx (SImode);
9797 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9798 part_bytes_reg = tmp;
9807 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9808 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9812 rtx tmp = gen_reg_rtx (SImode);
9813 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9814 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9815 part_bytes_reg = tmp;
9822 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9823 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9830 /* Select a dominance comparison mode if possible for a test of the general
9831 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9832 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9833 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9834 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9835 In all cases OP will be either EQ or NE, but we don't need to know which
9836 here. If we are unable to support a dominance comparison we return
9837 CC mode. This will then fail to match for the RTL expressions that
9838 generate this call. */
9840 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9842 enum rtx_code cond1, cond2;
9845 /* Currently we will probably get the wrong result if the individual
9846 comparisons are not simple. This also ensures that it is safe to
9847 reverse a comparison if necessary. */
9848 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9850 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9854 /* The if_then_else variant of this tests the second condition if the
9855 first passes, but is true if the first fails. Reverse the first
9856 condition to get a true "inclusive-or" expression. */
9857 if (cond_or == DOM_CC_NX_OR_Y)
9858 cond1 = reverse_condition (cond1);
9860 /* If the comparisons are not equal, and one doesn't dominate the other,
9861 then we can't do this. */
9863 && !comparison_dominates_p (cond1, cond2)
9864 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9869 enum rtx_code temp = cond1;
9877 if (cond_or == DOM_CC_X_AND_Y)
9882 case EQ: return CC_DEQmode;
9883 case LE: return CC_DLEmode;
9884 case LEU: return CC_DLEUmode;
9885 case GE: return CC_DGEmode;
9886 case GEU: return CC_DGEUmode;
9887 default: gcc_unreachable ();
9891 if (cond_or == DOM_CC_X_AND_Y)
9907 if (cond_or == DOM_CC_X_AND_Y)
9923 if (cond_or == DOM_CC_X_AND_Y)
9939 if (cond_or == DOM_CC_X_AND_Y)
9954 /* The remaining cases only occur when both comparisons are the
9957 gcc_assert (cond1 == cond2);
9961 gcc_assert (cond1 == cond2);
9965 gcc_assert (cond1 == cond2);
9969 gcc_assert (cond1 == cond2);
9973 gcc_assert (cond1 == cond2);
9982 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9984 /* All floating point compares return CCFP if it is an equality
9985 comparison, and CCFPE otherwise. */
9986 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10006 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10011 gcc_unreachable ();
10015 /* A compare with a shifted operand. Because of canonicalization, the
10016 comparison will have to be swapped when we emit the assembler. */
10017 if (GET_MODE (y) == SImode
10018 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10019 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10020 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10021 || GET_CODE (x) == ROTATERT))
10024 /* This operation is performed swapped, but since we only rely on the Z
10025 flag we don't need an additional mode. */
10026 if (GET_MODE (y) == SImode
10027 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10028 && GET_CODE (x) == NEG
10029 && (op == EQ || op == NE))
10032 /* This is a special case that is used by combine to allow a
10033 comparison of a shifted byte load to be split into a zero-extend
10034 followed by a comparison of the shifted integer (only valid for
10035 equalities and unsigned inequalities). */
10036 if (GET_MODE (x) == SImode
10037 && GET_CODE (x) == ASHIFT
10038 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10039 && GET_CODE (XEXP (x, 0)) == SUBREG
10040 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10041 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10042 && (op == EQ || op == NE
10043 || op == GEU || op == GTU || op == LTU || op == LEU)
10044 && GET_CODE (y) == CONST_INT)
10047 /* A construct for a conditional compare, if the false arm contains
10048 0, then both conditions must be true, otherwise either condition
10049 must be true. Not all conditions are possible, so CCmode is
10050 returned if it can't be done. */
10051 if (GET_CODE (x) == IF_THEN_ELSE
10052 && (XEXP (x, 2) == const0_rtx
10053 || XEXP (x, 2) == const1_rtx)
10054 && COMPARISON_P (XEXP (x, 0))
10055 && COMPARISON_P (XEXP (x, 1)))
10056 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10057 INTVAL (XEXP (x, 2)));
10059 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10060 if (GET_CODE (x) == AND
10061 && COMPARISON_P (XEXP (x, 0))
10062 && COMPARISON_P (XEXP (x, 1)))
10063 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10066 if (GET_CODE (x) == IOR
10067 && COMPARISON_P (XEXP (x, 0))
10068 && COMPARISON_P (XEXP (x, 1)))
10069 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10072 /* An operation (on Thumb) where we want to test for a single bit.
10073 This is done by shifting that bit up into the top bit of a
10074 scratch register; we can then branch on the sign bit. */
10076 && GET_MODE (x) == SImode
10077 && (op == EQ || op == NE)
10078 && GET_CODE (x) == ZERO_EXTRACT
10079 && XEXP (x, 1) == const1_rtx)
10082 /* An operation that sets the condition codes as a side-effect, the
10083 V flag is not set correctly, so we can only use comparisons where
10084 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10086 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10087 if (GET_MODE (x) == SImode
10089 && (op == EQ || op == NE || op == LT || op == GE)
10090 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10091 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10092 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10093 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10094 || GET_CODE (x) == LSHIFTRT
10095 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10096 || GET_CODE (x) == ROTATERT
10097 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10098 return CC_NOOVmode;
10100 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10103 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10104 && GET_CODE (x) == PLUS
10105 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10108 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10110 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10112 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10119 /* A DImode comparison against zero can be implemented by
10120 or'ing the two halves together. */
10121 if (y == const0_rtx)
10124 /* We can do an equality test in three Thumb instructions. */
10134 /* DImode unsigned comparisons can be implemented by cmp +
10135 cmpeq without a scratch register. Not worth doing in
10146 /* DImode signed and unsigned comparisons can be implemented
10147 by cmp + sbcs with a scratch register, but that does not
10148 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10149 gcc_assert (op != EQ && op != NE);
10153 gcc_unreachable ();
10160 /* X and Y are two things to compare using CODE. Emit the compare insn and
10161 return the rtx for register 0 in the proper mode. FP means this is a
10162 floating point compare: I don't think that it is needed on the arm. */
10164 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10166 enum machine_mode mode;
10168 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10170 /* We might have X as a constant, Y as a register because of the predicates
10171 used for cmpdi. If so, force X to a register here. */
10172 if (dimode_comparison && !REG_P (x))
10173 x = force_reg (DImode, x);
10175 mode = SELECT_CC_MODE (code, x, y);
10176 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10178 if (dimode_comparison
10179 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10180 && mode != CC_CZmode)
10184 /* To compare two non-zero values for equality, XOR them and
10185 then compare against zero. Not used for ARM mode; there
10186 CC_CZmode is cheaper. */
10187 if (mode == CC_Zmode && y != const0_rtx)
10189 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10192 /* A scratch register is required. */
10193 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10194 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10195 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10198 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10203 /* Generate a sequence of insns that will generate the correct return
10204 address mask depending on the physical architecture that the program
10207 arm_gen_return_addr_mask (void)
10209 rtx reg = gen_reg_rtx (Pmode);
10211 emit_insn (gen_return_addr_mask (reg));
10216 arm_reload_in_hi (rtx *operands)
10218 rtx ref = operands[1];
10220 HOST_WIDE_INT offset = 0;
10222 if (GET_CODE (ref) == SUBREG)
10224 offset = SUBREG_BYTE (ref);
10225 ref = SUBREG_REG (ref);
10228 if (GET_CODE (ref) == REG)
10230 /* We have a pseudo which has been spilt onto the stack; there
10231 are two cases here: the first where there is a simple
10232 stack-slot replacement and a second where the stack-slot is
10233 out of range, or is used as a subreg. */
10234 if (reg_equiv_mem[REGNO (ref)])
10236 ref = reg_equiv_mem[REGNO (ref)];
10237 base = find_replacement (&XEXP (ref, 0));
10240 /* The slot is out of range, or was dressed up in a SUBREG. */
10241 base = reg_equiv_address[REGNO (ref)];
10244 base = find_replacement (&XEXP (ref, 0));
10246 /* Handle the case where the address is too complex to be offset by 1. */
10247 if (GET_CODE (base) == MINUS
10248 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10250 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10252 emit_set_insn (base_plus, base);
10255 else if (GET_CODE (base) == PLUS)
10257 /* The addend must be CONST_INT, or we would have dealt with it above. */
10258 HOST_WIDE_INT hi, lo;
10260 offset += INTVAL (XEXP (base, 1));
10261 base = XEXP (base, 0);
10263 /* Rework the address into a legal sequence of insns. */
10264 /* Valid range for lo is -4095 -> 4095 */
10267 : -((-offset) & 0xfff));
10269 /* Corner case, if lo is the max offset then we would be out of range
10270 once we have added the additional 1 below, so bump the msb into the
10271 pre-loading insn(s). */
10275 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10276 ^ (HOST_WIDE_INT) 0x80000000)
10277 - (HOST_WIDE_INT) 0x80000000);
10279 gcc_assert (hi + lo == offset);
10283 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10285 /* Get the base address; addsi3 knows how to handle constants
10286 that require more than one insn. */
10287 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10293 /* Operands[2] may overlap operands[0] (though it won't overlap
10294 operands[1]), that's why we asked for a DImode reg -- so we can
10295 use the bit that does not overlap. */
10296 if (REGNO (operands[2]) == REGNO (operands[0]))
10297 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10299 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10301 emit_insn (gen_zero_extendqisi2 (scratch,
10302 gen_rtx_MEM (QImode,
10303 plus_constant (base,
10305 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10306 gen_rtx_MEM (QImode,
10307 plus_constant (base,
10309 if (!BYTES_BIG_ENDIAN)
10310 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10311 gen_rtx_IOR (SImode,
10314 gen_rtx_SUBREG (SImode, operands[0], 0),
10318 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10319 gen_rtx_IOR (SImode,
10320 gen_rtx_ASHIFT (SImode, scratch,
10322 gen_rtx_SUBREG (SImode, operands[0], 0)));
10325 /* Handle storing a half-word to memory during reload by synthesizing as two
10326 byte stores. Take care not to clobber the input values until after we
10327 have moved them somewhere safe. This code assumes that if the DImode
10328 scratch in operands[2] overlaps either the input value or output address
10329 in some way, then that value must die in this insn (we absolutely need
10330 two scratch registers for some corner cases). */
10332 arm_reload_out_hi (rtx *operands)
10334 rtx ref = operands[0];
10335 rtx outval = operands[1];
10337 HOST_WIDE_INT offset = 0;
10339 if (GET_CODE (ref) == SUBREG)
10341 offset = SUBREG_BYTE (ref);
10342 ref = SUBREG_REG (ref);
10345 if (GET_CODE (ref) == REG)
10347 /* We have a pseudo which has been spilt onto the stack; there
10348 are two cases here: the first where there is a simple
10349 stack-slot replacement and a second where the stack-slot is
10350 out of range, or is used as a subreg. */
10351 if (reg_equiv_mem[REGNO (ref)])
10353 ref = reg_equiv_mem[REGNO (ref)];
10354 base = find_replacement (&XEXP (ref, 0));
10357 /* The slot is out of range, or was dressed up in a SUBREG. */
10358 base = reg_equiv_address[REGNO (ref)];
10361 base = find_replacement (&XEXP (ref, 0));
10363 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10365 /* Handle the case where the address is too complex to be offset by 1. */
10366 if (GET_CODE (base) == MINUS
10367 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10369 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10371 /* Be careful not to destroy OUTVAL. */
10372 if (reg_overlap_mentioned_p (base_plus, outval))
10374 /* Updating base_plus might destroy outval, see if we can
10375 swap the scratch and base_plus. */
10376 if (!reg_overlap_mentioned_p (scratch, outval))
10379 scratch = base_plus;
10384 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10386 /* Be conservative and copy OUTVAL into the scratch now,
10387 this should only be necessary if outval is a subreg
10388 of something larger than a word. */
10389 /* XXX Might this clobber base? I can't see how it can,
10390 since scratch is known to overlap with OUTVAL, and
10391 must be wider than a word. */
10392 emit_insn (gen_movhi (scratch_hi, outval));
10393 outval = scratch_hi;
10397 emit_set_insn (base_plus, base);
10400 else if (GET_CODE (base) == PLUS)
10402 /* The addend must be CONST_INT, or we would have dealt with it above. */
10403 HOST_WIDE_INT hi, lo;
10405 offset += INTVAL (XEXP (base, 1));
10406 base = XEXP (base, 0);
10408 /* Rework the address into a legal sequence of insns. */
10409 /* Valid range for lo is -4095 -> 4095 */
10412 : -((-offset) & 0xfff));
10414 /* Corner case, if lo is the max offset then we would be out of range
10415 once we have added the additional 1 below, so bump the msb into the
10416 pre-loading insn(s). */
10420 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10421 ^ (HOST_WIDE_INT) 0x80000000)
10422 - (HOST_WIDE_INT) 0x80000000);
10424 gcc_assert (hi + lo == offset);
10428 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10430 /* Be careful not to destroy OUTVAL. */
10431 if (reg_overlap_mentioned_p (base_plus, outval))
10433 /* Updating base_plus might destroy outval, see if we
10434 can swap the scratch and base_plus. */
10435 if (!reg_overlap_mentioned_p (scratch, outval))
10438 scratch = base_plus;
10443 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10445 /* Be conservative and copy outval into scratch now,
10446 this should only be necessary if outval is a
10447 subreg of something larger than a word. */
10448 /* XXX Might this clobber base? I can't see how it
10449 can, since scratch is known to overlap with
10451 emit_insn (gen_movhi (scratch_hi, outval));
10452 outval = scratch_hi;
10456 /* Get the base address; addsi3 knows how to handle constants
10457 that require more than one insn. */
10458 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10464 if (BYTES_BIG_ENDIAN)
10466 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10467 plus_constant (base, offset + 1)),
10468 gen_lowpart (QImode, outval)));
10469 emit_insn (gen_lshrsi3 (scratch,
10470 gen_rtx_SUBREG (SImode, outval, 0),
10472 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10473 gen_lowpart (QImode, scratch)));
10477 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10478 gen_lowpart (QImode, outval)));
10479 emit_insn (gen_lshrsi3 (scratch,
10480 gen_rtx_SUBREG (SImode, outval, 0),
10482 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10483 plus_constant (base, offset + 1)),
10484 gen_lowpart (QImode, scratch)));
10488 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10489 (padded to the size of a word) should be passed in a register. */
10492 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10494 if (TARGET_AAPCS_BASED)
10495 return must_pass_in_stack_var_size (mode, type);
10497 return must_pass_in_stack_var_size_or_pad (mode, type);
10501 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10502 Return true if an argument passed on the stack should be padded upwards,
10503 i.e. if the least-significant byte has useful data.
10504 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10505 aggregate types are placed in the lowest memory address. */
10508 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10510 if (!TARGET_AAPCS_BASED)
10511 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10513 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10520 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10521 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10522 byte of the register has useful data, and return the opposite if the
10523 most significant byte does.
10524 For AAPCS, small aggregates and small complex types are always padded
10528 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10529 tree type, int first ATTRIBUTE_UNUSED)
10531 if (TARGET_AAPCS_BASED
10532 && BYTES_BIG_ENDIAN
10533 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10534 && int_size_in_bytes (type) <= 4)
10537 /* Otherwise, use default padding. */
10538 return !BYTES_BIG_ENDIAN;
10542 /* Print a symbolic form of X to the debug file, F. */
10544 arm_print_value (FILE *f, rtx x)
10546 switch (GET_CODE (x))
10549 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10553 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10561 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10563 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10564 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10572 fprintf (f, "\"%s\"", XSTR (x, 0));
10576 fprintf (f, "`%s'", XSTR (x, 0));
10580 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10584 arm_print_value (f, XEXP (x, 0));
10588 arm_print_value (f, XEXP (x, 0));
10590 arm_print_value (f, XEXP (x, 1));
10598 fprintf (f, "????");
10603 /* Routines for manipulation of the constant pool. */
10605 /* Arm instructions cannot load a large constant directly into a
10606 register; they have to come from a pc relative load. The constant
10607 must therefore be placed in the addressable range of the pc
10608 relative load. Depending on the precise pc relative load
10609 instruction the range is somewhere between 256 bytes and 4k. This
10610 means that we often have to dump a constant inside a function, and
10611 generate code to branch around it.
10613 It is important to minimize this, since the branches will slow
10614 things down and make the code larger.
10616 Normally we can hide the table after an existing unconditional
10617 branch so that there is no interruption of the flow, but in the
10618 worst case the code looks like this:
10636 We fix this by performing a scan after scheduling, which notices
10637 which instructions need to have their operands fetched from the
10638 constant table and builds the table.
10640 The algorithm starts by building a table of all the constants that
10641 need fixing up and all the natural barriers in the function (places
10642 where a constant table can be dropped without breaking the flow).
10643 For each fixup we note how far the pc-relative replacement will be
10644 able to reach and the offset of the instruction into the function.
10646 Having built the table we then group the fixes together to form
10647 tables that are as large as possible (subject to addressing
10648 constraints) and emit each table of constants after the last
10649 barrier that is within range of all the instructions in the group.
10650 If a group does not contain a barrier, then we forcibly create one
10651 by inserting a jump instruction into the flow. Once the table has
10652 been inserted, the insns are then modified to reference the
10653 relevant entry in the pool.
10655 Possible enhancements to the algorithm (not implemented) are:
10657 1) For some processors and object formats, there may be benefit in
10658 aligning the pools to the start of cache lines; this alignment
10659 would need to be taken into account when calculating addressability
10662 /* These typedefs are located at the start of this file, so that
10663 they can be used in the prototypes there. This comment is to
10664 remind readers of that fact so that the following structures
10665 can be understood more easily.
10667 typedef struct minipool_node Mnode;
10668 typedef struct minipool_fixup Mfix; */
10670 struct minipool_node
10672 /* Doubly linked chain of entries. */
10675 /* The maximum offset into the code that this entry can be placed. While
10676 pushing fixes for forward references, all entries are sorted in order
10677 of increasing max_address. */
10678 HOST_WIDE_INT max_address;
10679 /* Similarly for an entry inserted for a backwards ref. */
10680 HOST_WIDE_INT min_address;
10681 /* The number of fixes referencing this entry. This can become zero
10682 if we "unpush" an entry. In this case we ignore the entry when we
10683 come to emit the code. */
10685 /* The offset from the start of the minipool. */
10686 HOST_WIDE_INT offset;
10687 /* The value in table. */
10689 /* The mode of value. */
10690 enum machine_mode mode;
10691 /* The size of the value. With iWMMXt enabled
10692 sizes > 4 also imply an alignment of 8-bytes. */
10696 struct minipool_fixup
10700 HOST_WIDE_INT address;
10702 enum machine_mode mode;
10706 HOST_WIDE_INT forwards;
10707 HOST_WIDE_INT backwards;
10710 /* Fixes less than a word need padding out to a word boundary. */
10711 #define MINIPOOL_FIX_SIZE(mode) \
10712 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10714 static Mnode * minipool_vector_head;
10715 static Mnode * minipool_vector_tail;
10716 static rtx minipool_vector_label;
10717 static int minipool_pad;
10719 /* The linked list of all minipool fixes required for this function. */
10720 Mfix * minipool_fix_head;
10721 Mfix * minipool_fix_tail;
10722 /* The fix entry for the current minipool, once it has been placed. */
10723 Mfix * minipool_barrier;
10725 /* Determines if INSN is the start of a jump table. Returns the end
10726 of the TABLE or NULL_RTX. */
10728 is_jump_table (rtx insn)
10732 if (GET_CODE (insn) == JUMP_INSN
10733 && JUMP_LABEL (insn) != NULL
10734 && ((table = next_real_insn (JUMP_LABEL (insn)))
10735 == next_real_insn (insn))
10737 && GET_CODE (table) == JUMP_INSN
10738 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10739 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10745 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10746 #define JUMP_TABLES_IN_TEXT_SECTION 0
10749 static HOST_WIDE_INT
10750 get_jump_table_size (rtx insn)
10752 /* ADDR_VECs only take room if read-only data does into the text
10754 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10756 rtx body = PATTERN (insn);
10757 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10758 HOST_WIDE_INT size;
10759 HOST_WIDE_INT modesize;
10761 modesize = GET_MODE_SIZE (GET_MODE (body));
10762 size = modesize * XVECLEN (body, elt);
10766 /* Round up size of TBB table to a halfword boundary. */
10767 size = (size + 1) & ~(HOST_WIDE_INT)1;
10770 /* No padding necessary for TBH. */
10773 /* Add two bytes for alignment on Thumb. */
10778 gcc_unreachable ();
10786 /* Move a minipool fix MP from its current location to before MAX_MP.
10787 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10788 constraints may need updating. */
10790 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10791 HOST_WIDE_INT max_address)
10793 /* The code below assumes these are different. */
10794 gcc_assert (mp != max_mp);
10796 if (max_mp == NULL)
10798 if (max_address < mp->max_address)
10799 mp->max_address = max_address;
10803 if (max_address > max_mp->max_address - mp->fix_size)
10804 mp->max_address = max_mp->max_address - mp->fix_size;
10806 mp->max_address = max_address;
10808 /* Unlink MP from its current position. Since max_mp is non-null,
10809 mp->prev must be non-null. */
10810 mp->prev->next = mp->next;
10811 if (mp->next != NULL)
10812 mp->next->prev = mp->prev;
10814 minipool_vector_tail = mp->prev;
10816 /* Re-insert it before MAX_MP. */
10818 mp->prev = max_mp->prev;
10821 if (mp->prev != NULL)
10822 mp->prev->next = mp;
10824 minipool_vector_head = mp;
10827 /* Save the new entry. */
10830 /* Scan over the preceding entries and adjust their addresses as
10832 while (mp->prev != NULL
10833 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10835 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10842 /* Add a constant to the minipool for a forward reference. Returns the
10843 node added or NULL if the constant will not fit in this pool. */
10845 add_minipool_forward_ref (Mfix *fix)
10847 /* If set, max_mp is the first pool_entry that has a lower
10848 constraint than the one we are trying to add. */
10849 Mnode * max_mp = NULL;
10850 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10853 /* If the minipool starts before the end of FIX->INSN then this FIX
10854 can not be placed into the current pool. Furthermore, adding the
10855 new constant pool entry may cause the pool to start FIX_SIZE bytes
10857 if (minipool_vector_head &&
10858 (fix->address + get_attr_length (fix->insn)
10859 >= minipool_vector_head->max_address - fix->fix_size))
10862 /* Scan the pool to see if a constant with the same value has
10863 already been added. While we are doing this, also note the
10864 location where we must insert the constant if it doesn't already
10866 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10868 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10869 && fix->mode == mp->mode
10870 && (GET_CODE (fix->value) != CODE_LABEL
10871 || (CODE_LABEL_NUMBER (fix->value)
10872 == CODE_LABEL_NUMBER (mp->value)))
10873 && rtx_equal_p (fix->value, mp->value))
10875 /* More than one fix references this entry. */
10877 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10880 /* Note the insertion point if necessary. */
10882 && mp->max_address > max_address)
10885 /* If we are inserting an 8-bytes aligned quantity and
10886 we have not already found an insertion point, then
10887 make sure that all such 8-byte aligned quantities are
10888 placed at the start of the pool. */
10889 if (ARM_DOUBLEWORD_ALIGN
10891 && fix->fix_size >= 8
10892 && mp->fix_size < 8)
10895 max_address = mp->max_address;
10899 /* The value is not currently in the minipool, so we need to create
10900 a new entry for it. If MAX_MP is NULL, the entry will be put on
10901 the end of the list since the placement is less constrained than
10902 any existing entry. Otherwise, we insert the new fix before
10903 MAX_MP and, if necessary, adjust the constraints on the other
10906 mp->fix_size = fix->fix_size;
10907 mp->mode = fix->mode;
10908 mp->value = fix->value;
10910 /* Not yet required for a backwards ref. */
10911 mp->min_address = -65536;
10913 if (max_mp == NULL)
10915 mp->max_address = max_address;
10917 mp->prev = minipool_vector_tail;
10919 if (mp->prev == NULL)
10921 minipool_vector_head = mp;
10922 minipool_vector_label = gen_label_rtx ();
10925 mp->prev->next = mp;
10927 minipool_vector_tail = mp;
10931 if (max_address > max_mp->max_address - mp->fix_size)
10932 mp->max_address = max_mp->max_address - mp->fix_size;
10934 mp->max_address = max_address;
10937 mp->prev = max_mp->prev;
10939 if (mp->prev != NULL)
10940 mp->prev->next = mp;
10942 minipool_vector_head = mp;
10945 /* Save the new entry. */
10948 /* Scan over the preceding entries and adjust their addresses as
10950 while (mp->prev != NULL
10951 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10953 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10961 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10962 HOST_WIDE_INT min_address)
10964 HOST_WIDE_INT offset;
10966 /* The code below assumes these are different. */
10967 gcc_assert (mp != min_mp);
10969 if (min_mp == NULL)
10971 if (min_address > mp->min_address)
10972 mp->min_address = min_address;
10976 /* We will adjust this below if it is too loose. */
10977 mp->min_address = min_address;
10979 /* Unlink MP from its current position. Since min_mp is non-null,
10980 mp->next must be non-null. */
10981 mp->next->prev = mp->prev;
10982 if (mp->prev != NULL)
10983 mp->prev->next = mp->next;
10985 minipool_vector_head = mp->next;
10987 /* Reinsert it after MIN_MP. */
10989 mp->next = min_mp->next;
10991 if (mp->next != NULL)
10992 mp->next->prev = mp;
10994 minipool_vector_tail = mp;
11000 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11002 mp->offset = offset;
11003 if (mp->refcount > 0)
11004 offset += mp->fix_size;
11006 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11007 mp->next->min_address = mp->min_address + mp->fix_size;
11013 /* Add a constant to the minipool for a backward reference. Returns the
11014 node added or NULL if the constant will not fit in this pool.
11016 Note that the code for insertion for a backwards reference can be
11017 somewhat confusing because the calculated offsets for each fix do
11018 not take into account the size of the pool (which is still under
11021 add_minipool_backward_ref (Mfix *fix)
11023 /* If set, min_mp is the last pool_entry that has a lower constraint
11024 than the one we are trying to add. */
11025 Mnode *min_mp = NULL;
11026 /* This can be negative, since it is only a constraint. */
11027 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11030 /* If we can't reach the current pool from this insn, or if we can't
11031 insert this entry at the end of the pool without pushing other
11032 fixes out of range, then we don't try. This ensures that we
11033 can't fail later on. */
11034 if (min_address >= minipool_barrier->address
11035 || (minipool_vector_tail->min_address + fix->fix_size
11036 >= minipool_barrier->address))
11039 /* Scan the pool to see if a constant with the same value has
11040 already been added. While we are doing this, also note the
11041 location where we must insert the constant if it doesn't already
11043 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11045 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11046 && fix->mode == mp->mode
11047 && (GET_CODE (fix->value) != CODE_LABEL
11048 || (CODE_LABEL_NUMBER (fix->value)
11049 == CODE_LABEL_NUMBER (mp->value)))
11050 && rtx_equal_p (fix->value, mp->value)
11051 /* Check that there is enough slack to move this entry to the
11052 end of the table (this is conservative). */
11053 && (mp->max_address
11054 > (minipool_barrier->address
11055 + minipool_vector_tail->offset
11056 + minipool_vector_tail->fix_size)))
11059 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11062 if (min_mp != NULL)
11063 mp->min_address += fix->fix_size;
11066 /* Note the insertion point if necessary. */
11067 if (mp->min_address < min_address)
11069 /* For now, we do not allow the insertion of 8-byte alignment
11070 requiring nodes anywhere but at the start of the pool. */
11071 if (ARM_DOUBLEWORD_ALIGN
11072 && fix->fix_size >= 8 && mp->fix_size < 8)
11077 else if (mp->max_address
11078 < minipool_barrier->address + mp->offset + fix->fix_size)
11080 /* Inserting before this entry would push the fix beyond
11081 its maximum address (which can happen if we have
11082 re-located a forwards fix); force the new fix to come
11084 if (ARM_DOUBLEWORD_ALIGN
11085 && fix->fix_size >= 8 && mp->fix_size < 8)
11090 min_address = mp->min_address + fix->fix_size;
11093 /* Do not insert a non-8-byte aligned quantity before 8-byte
11094 aligned quantities. */
11095 else if (ARM_DOUBLEWORD_ALIGN
11096 && fix->fix_size < 8
11097 && mp->fix_size >= 8)
11100 min_address = mp->min_address + fix->fix_size;
11105 /* We need to create a new entry. */
11107 mp->fix_size = fix->fix_size;
11108 mp->mode = fix->mode;
11109 mp->value = fix->value;
11111 mp->max_address = minipool_barrier->address + 65536;
11113 mp->min_address = min_address;
11115 if (min_mp == NULL)
11118 mp->next = minipool_vector_head;
11120 if (mp->next == NULL)
11122 minipool_vector_tail = mp;
11123 minipool_vector_label = gen_label_rtx ();
11126 mp->next->prev = mp;
11128 minipool_vector_head = mp;
11132 mp->next = min_mp->next;
11136 if (mp->next != NULL)
11137 mp->next->prev = mp;
11139 minipool_vector_tail = mp;
11142 /* Save the new entry. */
11150 /* Scan over the following entries and adjust their offsets. */
11151 while (mp->next != NULL)
11153 if (mp->next->min_address < mp->min_address + mp->fix_size)
11154 mp->next->min_address = mp->min_address + mp->fix_size;
11157 mp->next->offset = mp->offset + mp->fix_size;
11159 mp->next->offset = mp->offset;
11168 assign_minipool_offsets (Mfix *barrier)
11170 HOST_WIDE_INT offset = 0;
11173 minipool_barrier = barrier;
11175 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11177 mp->offset = offset;
11179 if (mp->refcount > 0)
11180 offset += mp->fix_size;
11184 /* Output the literal table */
11186 dump_minipool (rtx scan)
11192 if (ARM_DOUBLEWORD_ALIGN)
11193 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11194 if (mp->refcount > 0 && mp->fix_size >= 8)
11201 fprintf (dump_file,
11202 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11203 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11205 scan = emit_label_after (gen_label_rtx (), scan);
11206 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11207 scan = emit_label_after (minipool_vector_label, scan);
11209 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11211 if (mp->refcount > 0)
11215 fprintf (dump_file,
11216 ";; Offset %u, min %ld, max %ld ",
11217 (unsigned) mp->offset, (unsigned long) mp->min_address,
11218 (unsigned long) mp->max_address);
11219 arm_print_value (dump_file, mp->value);
11220 fputc ('\n', dump_file);
11223 switch (mp->fix_size)
11225 #ifdef HAVE_consttable_1
11227 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11231 #ifdef HAVE_consttable_2
11233 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11237 #ifdef HAVE_consttable_4
11239 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11243 #ifdef HAVE_consttable_8
11245 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11249 #ifdef HAVE_consttable_16
11251 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11256 gcc_unreachable ();
11264 minipool_vector_head = minipool_vector_tail = NULL;
11265 scan = emit_insn_after (gen_consttable_end (), scan);
11266 scan = emit_barrier_after (scan);
11269 /* Return the cost of forcibly inserting a barrier after INSN. */
11271 arm_barrier_cost (rtx insn)
11273 /* Basing the location of the pool on the loop depth is preferable,
11274 but at the moment, the basic block information seems to be
11275 corrupt by this stage of the compilation. */
11276 int base_cost = 50;
11277 rtx next = next_nonnote_insn (insn);
11279 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11282 switch (GET_CODE (insn))
11285 /* It will always be better to place the table before the label, rather
11294 return base_cost - 10;
11297 return base_cost + 10;
11301 /* Find the best place in the insn stream in the range
11302 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11303 Create the barrier by inserting a jump and add a new fix entry for
11306 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11308 HOST_WIDE_INT count = 0;
11310 rtx from = fix->insn;
11311 /* The instruction after which we will insert the jump. */
11312 rtx selected = NULL;
11314 /* The address at which the jump instruction will be placed. */
11315 HOST_WIDE_INT selected_address;
11317 HOST_WIDE_INT max_count = max_address - fix->address;
11318 rtx label = gen_label_rtx ();
11320 selected_cost = arm_barrier_cost (from);
11321 selected_address = fix->address;
11323 while (from && count < max_count)
11328 /* This code shouldn't have been called if there was a natural barrier
11330 gcc_assert (GET_CODE (from) != BARRIER);
11332 /* Count the length of this insn. */
11333 count += get_attr_length (from);
11335 /* If there is a jump table, add its length. */
11336 tmp = is_jump_table (from);
11339 count += get_jump_table_size (tmp);
11341 /* Jump tables aren't in a basic block, so base the cost on
11342 the dispatch insn. If we select this location, we will
11343 still put the pool after the table. */
11344 new_cost = arm_barrier_cost (from);
11346 if (count < max_count
11347 && (!selected || new_cost <= selected_cost))
11350 selected_cost = new_cost;
11351 selected_address = fix->address + count;
11354 /* Continue after the dispatch table. */
11355 from = NEXT_INSN (tmp);
11359 new_cost = arm_barrier_cost (from);
11361 if (count < max_count
11362 && (!selected || new_cost <= selected_cost))
11365 selected_cost = new_cost;
11366 selected_address = fix->address + count;
11369 from = NEXT_INSN (from);
11372 /* Make sure that we found a place to insert the jump. */
11373 gcc_assert (selected);
11375 /* Create a new JUMP_INSN that branches around a barrier. */
11376 from = emit_jump_insn_after (gen_jump (label), selected);
11377 JUMP_LABEL (from) = label;
11378 barrier = emit_barrier_after (from);
11379 emit_label_after (label, barrier);
11381 /* Create a minipool barrier entry for the new barrier. */
11382 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11383 new_fix->insn = barrier;
11384 new_fix->address = selected_address;
11385 new_fix->next = fix->next;
11386 fix->next = new_fix;
11391 /* Record that there is a natural barrier in the insn stream at
11394 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11396 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11399 fix->address = address;
11402 if (minipool_fix_head != NULL)
11403 minipool_fix_tail->next = fix;
11405 minipool_fix_head = fix;
11407 minipool_fix_tail = fix;
11410 /* Record INSN, which will need fixing up to load a value from the
11411 minipool. ADDRESS is the offset of the insn since the start of the
11412 function; LOC is a pointer to the part of the insn which requires
11413 fixing; VALUE is the constant that must be loaded, which is of type
11416 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11417 enum machine_mode mode, rtx value)
11419 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11422 fix->address = address;
11425 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11426 fix->value = value;
11427 fix->forwards = get_attr_pool_range (insn);
11428 fix->backwards = get_attr_neg_pool_range (insn);
11429 fix->minipool = NULL;
11431 /* If an insn doesn't have a range defined for it, then it isn't
11432 expecting to be reworked by this code. Better to stop now than
11433 to generate duff assembly code. */
11434 gcc_assert (fix->forwards || fix->backwards);
11436 /* If an entry requires 8-byte alignment then assume all constant pools
11437 require 4 bytes of padding. Trying to do this later on a per-pool
11438 basis is awkward because existing pool entries have to be modified. */
11439 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11444 fprintf (dump_file,
11445 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11446 GET_MODE_NAME (mode),
11447 INSN_UID (insn), (unsigned long) address,
11448 -1 * (long)fix->backwards, (long)fix->forwards);
11449 arm_print_value (dump_file, fix->value);
11450 fprintf (dump_file, "\n");
11453 /* Add it to the chain of fixes. */
11456 if (minipool_fix_head != NULL)
11457 minipool_fix_tail->next = fix;
11459 minipool_fix_head = fix;
11461 minipool_fix_tail = fix;
11464 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11465 Returns the number of insns needed, or 99 if we don't know how to
11468 arm_const_double_inline_cost (rtx val)
11470 rtx lowpart, highpart;
11471 enum machine_mode mode;
11473 mode = GET_MODE (val);
11475 if (mode == VOIDmode)
11478 gcc_assert (GET_MODE_SIZE (mode) == 8);
11480 lowpart = gen_lowpart (SImode, val);
11481 highpart = gen_highpart_mode (SImode, mode, val);
11483 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11484 gcc_assert (GET_CODE (highpart) == CONST_INT);
11486 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11487 NULL_RTX, NULL_RTX, 0, 0)
11488 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11489 NULL_RTX, NULL_RTX, 0, 0));
11492 /* Return true if it is worthwhile to split a 64-bit constant into two
11493 32-bit operations. This is the case if optimizing for size, or
11494 if we have load delay slots, or if one 32-bit part can be done with
11495 a single data operation. */
11497 arm_const_double_by_parts (rtx val)
11499 enum machine_mode mode = GET_MODE (val);
11502 if (optimize_size || arm_ld_sched)
11505 if (mode == VOIDmode)
11508 part = gen_highpart_mode (SImode, mode, val);
11510 gcc_assert (GET_CODE (part) == CONST_INT);
11512 if (const_ok_for_arm (INTVAL (part))
11513 || const_ok_for_arm (~INTVAL (part)))
11516 part = gen_lowpart (SImode, val);
11518 gcc_assert (GET_CODE (part) == CONST_INT);
11520 if (const_ok_for_arm (INTVAL (part))
11521 || const_ok_for_arm (~INTVAL (part)))
11527 /* Return true if it is possible to inline both the high and low parts
11528 of a 64-bit constant into 32-bit data processing instructions. */
11530 arm_const_double_by_immediates (rtx val)
11532 enum machine_mode mode = GET_MODE (val);
11535 if (mode == VOIDmode)
11538 part = gen_highpart_mode (SImode, mode, val);
11540 gcc_assert (GET_CODE (part) == CONST_INT);
11542 if (!const_ok_for_arm (INTVAL (part)))
11545 part = gen_lowpart (SImode, val);
11547 gcc_assert (GET_CODE (part) == CONST_INT);
11549 if (!const_ok_for_arm (INTVAL (part)))
11555 /* Scan INSN and note any of its operands that need fixing.
11556 If DO_PUSHES is false we do not actually push any of the fixups
11557 needed. The function returns TRUE if any fixups were needed/pushed.
11558 This is used by arm_memory_load_p() which needs to know about loads
11559 of constants that will be converted into minipool loads. */
11561 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11563 bool result = false;
11566 extract_insn (insn);
11568 if (!constrain_operands (1))
11569 fatal_insn_not_found (insn);
11571 if (recog_data.n_alternatives == 0)
11574 /* Fill in recog_op_alt with information about the constraints of
11576 preprocess_constraints ();
11578 for (opno = 0; opno < recog_data.n_operands; opno++)
11580 /* Things we need to fix can only occur in inputs. */
11581 if (recog_data.operand_type[opno] != OP_IN)
11584 /* If this alternative is a memory reference, then any mention
11585 of constants in this alternative is really to fool reload
11586 into allowing us to accept one there. We need to fix them up
11587 now so that we output the right code. */
11588 if (recog_op_alt[opno][which_alternative].memory_ok)
11590 rtx op = recog_data.operand[opno];
11592 if (CONSTANT_P (op))
11595 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11596 recog_data.operand_mode[opno], op);
11599 else if (GET_CODE (op) == MEM
11600 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11601 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11605 rtx cop = avoid_constant_pool_reference (op);
11607 /* Casting the address of something to a mode narrower
11608 than a word can cause avoid_constant_pool_reference()
11609 to return the pool reference itself. That's no good to
11610 us here. Lets just hope that we can use the
11611 constant pool value directly. */
11613 cop = get_pool_constant (XEXP (op, 0));
11615 push_minipool_fix (insn, address,
11616 recog_data.operand_loc[opno],
11617 recog_data.operand_mode[opno], cop);
11628 /* Convert instructions to their cc-clobbering variant if possible, since
11629 that allows us to use smaller encodings. */
11632 thumb2_reorg (void)
11637 INIT_REG_SET (&live);
11639 /* We are freeing block_for_insn in the toplev to keep compatibility
11640 with old MDEP_REORGS that are not CFG based. Recompute it now. */
11641 compute_bb_for_insn ();
11647 COPY_REG_SET (&live, DF_LR_OUT (bb));
11648 df_simulate_initialize_backwards (bb, &live);
11649 FOR_BB_INSNS_REVERSE (bb, insn)
11651 if (NONJUMP_INSN_P (insn)
11652 && !REGNO_REG_SET_P (&live, CC_REGNUM))
11654 rtx pat = PATTERN (insn);
11655 if (GET_CODE (pat) == SET
11656 && low_register_operand (XEXP (pat, 0), SImode)
11657 && thumb_16bit_operator (XEXP (pat, 1), SImode)
11658 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
11659 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
11661 rtx dst = XEXP (pat, 0);
11662 rtx src = XEXP (pat, 1);
11663 rtx op0 = XEXP (src, 0);
11664 if (rtx_equal_p (dst, op0)
11665 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
11667 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
11668 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
11669 rtvec vec = gen_rtvec (2, pat, clobber);
11670 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
11671 INSN_CODE (insn) = -1;
11675 if (NONDEBUG_INSN_P (insn))
11676 df_simulate_one_insn_backwards (bb, insn, &live);
11679 CLEAR_REG_SET (&live);
11682 /* Gcc puts the pool in the wrong place for ARM, since we can only
11683 load addresses a limited distance around the pc. We do some
11684 special munging to move the constant pool values to the correct
11685 point in the code. */
11690 HOST_WIDE_INT address = 0;
11696 minipool_fix_head = minipool_fix_tail = NULL;
11698 /* The first insn must always be a note, or the code below won't
11699 scan it properly. */
11700 insn = get_insns ();
11701 gcc_assert (GET_CODE (insn) == NOTE);
11704 /* Scan all the insns and record the operands that will need fixing. */
11705 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11707 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11708 && (arm_cirrus_insn_p (insn)
11709 || GET_CODE (insn) == JUMP_INSN
11710 || arm_memory_load_p (insn)))
11711 cirrus_reorg (insn);
11713 if (GET_CODE (insn) == BARRIER)
11714 push_minipool_barrier (insn, address);
11715 else if (INSN_P (insn))
11719 note_invalid_constants (insn, address, true);
11720 address += get_attr_length (insn);
11722 /* If the insn is a vector jump, add the size of the table
11723 and skip the table. */
11724 if ((table = is_jump_table (insn)) != NULL)
11726 address += get_jump_table_size (table);
11732 fix = minipool_fix_head;
11734 /* Now scan the fixups and perform the required changes. */
11739 Mfix * last_added_fix;
11740 Mfix * last_barrier = NULL;
11743 /* Skip any further barriers before the next fix. */
11744 while (fix && GET_CODE (fix->insn) == BARRIER)
11747 /* No more fixes. */
11751 last_added_fix = NULL;
11753 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11755 if (GET_CODE (ftmp->insn) == BARRIER)
11757 if (ftmp->address >= minipool_vector_head->max_address)
11760 last_barrier = ftmp;
11762 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11765 last_added_fix = ftmp; /* Keep track of the last fix added. */
11768 /* If we found a barrier, drop back to that; any fixes that we
11769 could have reached but come after the barrier will now go in
11770 the next mini-pool. */
11771 if (last_barrier != NULL)
11773 /* Reduce the refcount for those fixes that won't go into this
11775 for (fdel = last_barrier->next;
11776 fdel && fdel != ftmp;
11779 fdel->minipool->refcount--;
11780 fdel->minipool = NULL;
11783 ftmp = last_barrier;
11787 /* ftmp is first fix that we can't fit into this pool and
11788 there no natural barriers that we could use. Insert a
11789 new barrier in the code somewhere between the previous
11790 fix and this one, and arrange to jump around it. */
11791 HOST_WIDE_INT max_address;
11793 /* The last item on the list of fixes must be a barrier, so
11794 we can never run off the end of the list of fixes without
11795 last_barrier being set. */
11798 max_address = minipool_vector_head->max_address;
11799 /* Check that there isn't another fix that is in range that
11800 we couldn't fit into this pool because the pool was
11801 already too large: we need to put the pool before such an
11802 instruction. The pool itself may come just after the
11803 fix because create_fix_barrier also allows space for a
11804 jump instruction. */
11805 if (ftmp->address < max_address)
11806 max_address = ftmp->address + 1;
11808 last_barrier = create_fix_barrier (last_added_fix, max_address);
11811 assign_minipool_offsets (last_barrier);
11815 if (GET_CODE (ftmp->insn) != BARRIER
11816 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11823 /* Scan over the fixes we have identified for this pool, fixing them
11824 up and adding the constants to the pool itself. */
11825 for (this_fix = fix; this_fix && ftmp != this_fix;
11826 this_fix = this_fix->next)
11827 if (GET_CODE (this_fix->insn) != BARRIER)
11830 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11831 minipool_vector_label),
11832 this_fix->minipool->offset);
11833 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11836 dump_minipool (last_barrier->insn);
11840 /* From now on we must synthesize any constants that we can't handle
11841 directly. This can happen if the RTL gets split during final
11842 instruction generation. */
11843 after_arm_reorg = 1;
11845 /* Free the minipool memory. */
11846 obstack_free (&minipool_obstack, minipool_startobj);
11849 /* Routines to output assembly language. */
11851 /* If the rtx is the correct value then return the string of the number.
11852 In this way we can ensure that valid double constants are generated even
11853 when cross compiling. */
11855 fp_immediate_constant (rtx x)
11860 if (!fp_consts_inited)
11863 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11864 for (i = 0; i < 8; i++)
11865 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11866 return strings_fp[i];
11868 gcc_unreachable ();
11871 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11872 static const char *
11873 fp_const_from_val (REAL_VALUE_TYPE *r)
11877 if (!fp_consts_inited)
11880 for (i = 0; i < 8; i++)
11881 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11882 return strings_fp[i];
11884 gcc_unreachable ();
11887 /* Output the operands of a LDM/STM instruction to STREAM.
11888 MASK is the ARM register set mask of which only bits 0-15 are important.
11889 REG is the base register, either the frame pointer or the stack pointer,
11890 INSTR is the possibly suffixed load or store instruction.
11891 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11894 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11895 unsigned long mask, int rfe)
11898 bool not_first = FALSE;
11900 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11901 fputc ('\t', stream);
11902 asm_fprintf (stream, instr, reg);
11903 fputc ('{', stream);
11905 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11906 if (mask & (1 << i))
11909 fprintf (stream, ", ");
11911 asm_fprintf (stream, "%r", i);
11916 fprintf (stream, "}^\n");
11918 fprintf (stream, "}\n");
11922 /* Output a FLDMD instruction to STREAM.
11923 BASE if the register containing the address.
11924 REG and COUNT specify the register range.
11925 Extra registers may be added to avoid hardware bugs.
11927 We output FLDMD even for ARMv5 VFP implementations. Although
11928 FLDMD is technically not supported until ARMv6, it is believed
11929 that all VFP implementations support its use in this context. */
11932 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11936 /* Workaround ARM10 VFPr1 bug. */
11937 if (count == 2 && !arm_arch6)
11944 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11945 load into multiple parts if we have to handle more than 16 registers. */
11948 vfp_output_fldmd (stream, base, reg, 16);
11949 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11953 fputc ('\t', stream);
11954 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11956 for (i = reg; i < reg + count; i++)
11959 fputs (", ", stream);
11960 asm_fprintf (stream, "d%d", i);
11962 fputs ("}\n", stream);
11967 /* Output the assembly for a store multiple. */
11970 vfp_output_fstmd (rtx * operands)
11977 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11978 p = strlen (pattern);
11980 gcc_assert (GET_CODE (operands[1]) == REG);
11982 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11983 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11985 p += sprintf (&pattern[p], ", d%d", base + i);
11987 strcpy (&pattern[p], "}");
11989 output_asm_insn (pattern, operands);
11994 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11995 number of bytes pushed. */
11998 vfp_emit_fstmd (int base_reg, int count)
12005 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12006 register pairs are stored by a store multiple insn. We avoid this
12007 by pushing an extra pair. */
12008 if (count == 2 && !arm_arch6)
12010 if (base_reg == LAST_VFP_REGNUM - 3)
12015 /* FSTMD may not store more than 16 doubleword registers at once. Split
12016 larger stores into multiple parts (up to a maximum of two, in
12021 /* NOTE: base_reg is an internal register number, so each D register
12023 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12024 saved += vfp_emit_fstmd (base_reg, 16);
12028 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12029 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12031 reg = gen_rtx_REG (DFmode, base_reg);
12034 XVECEXP (par, 0, 0)
12035 = gen_rtx_SET (VOIDmode,
12038 gen_rtx_PRE_MODIFY (Pmode,
12041 (stack_pointer_rtx,
12044 gen_rtx_UNSPEC (BLKmode,
12045 gen_rtvec (1, reg),
12046 UNSPEC_PUSH_MULT));
12048 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12049 plus_constant (stack_pointer_rtx, -(count * 8)));
12050 RTX_FRAME_RELATED_P (tmp) = 1;
12051 XVECEXP (dwarf, 0, 0) = tmp;
12053 tmp = gen_rtx_SET (VOIDmode,
12054 gen_frame_mem (DFmode, stack_pointer_rtx),
12056 RTX_FRAME_RELATED_P (tmp) = 1;
12057 XVECEXP (dwarf, 0, 1) = tmp;
12059 for (i = 1; i < count; i++)
12061 reg = gen_rtx_REG (DFmode, base_reg);
12063 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12065 tmp = gen_rtx_SET (VOIDmode,
12066 gen_frame_mem (DFmode,
12067 plus_constant (stack_pointer_rtx,
12070 RTX_FRAME_RELATED_P (tmp) = 1;
12071 XVECEXP (dwarf, 0, i + 1) = tmp;
12074 par = emit_insn (par);
12075 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12076 RTX_FRAME_RELATED_P (par) = 1;
12081 /* Emit a call instruction with pattern PAT. ADDR is the address of
12082 the call target. */
12085 arm_emit_call_insn (rtx pat, rtx addr)
12089 insn = emit_call_insn (pat);
12091 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12092 If the call might use such an entry, add a use of the PIC register
12093 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12094 if (TARGET_VXWORKS_RTP
12096 && GET_CODE (addr) == SYMBOL_REF
12097 && (SYMBOL_REF_DECL (addr)
12098 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12099 : !SYMBOL_REF_LOCAL_P (addr)))
12101 require_pic_register ();
12102 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12106 /* Output a 'call' insn. */
12108 output_call (rtx *operands)
12110 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12112 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12113 if (REGNO (operands[0]) == LR_REGNUM)
12115 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12116 output_asm_insn ("mov%?\t%0, %|lr", operands);
12119 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12121 if (TARGET_INTERWORK || arm_arch4t)
12122 output_asm_insn ("bx%?\t%0", operands);
12124 output_asm_insn ("mov%?\t%|pc, %0", operands);
12129 /* Output a 'call' insn that is a reference in memory. This is
12130 disabled for ARMv5 and we prefer a blx instead because otherwise
12131 there's a significant performance overhead. */
12133 output_call_mem (rtx *operands)
12135 gcc_assert (!arm_arch5);
12136 if (TARGET_INTERWORK)
12138 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12139 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12140 output_asm_insn ("bx%?\t%|ip", operands);
12142 else if (regno_use_in (LR_REGNUM, operands[0]))
12144 /* LR is used in the memory address. We load the address in the
12145 first instruction. It's safe to use IP as the target of the
12146 load since the call will kill it anyway. */
12147 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12148 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12150 output_asm_insn ("bx%?\t%|ip", operands);
12152 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12156 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12157 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12164 /* Output a move from arm registers to an fpa registers.
12165 OPERANDS[0] is an fpa register.
12166 OPERANDS[1] is the first registers of an arm register pair. */
12168 output_mov_long_double_fpa_from_arm (rtx *operands)
12170 int arm_reg0 = REGNO (operands[1]);
12173 gcc_assert (arm_reg0 != IP_REGNUM);
12175 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12176 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12177 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12179 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12180 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12185 /* Output a move from an fpa register to arm registers.
12186 OPERANDS[0] is the first registers of an arm register pair.
12187 OPERANDS[1] is an fpa register. */
12189 output_mov_long_double_arm_from_fpa (rtx *operands)
12191 int arm_reg0 = REGNO (operands[0]);
12194 gcc_assert (arm_reg0 != IP_REGNUM);
12196 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12197 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12198 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12200 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12201 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12205 /* Output a move from arm registers to arm registers of a long double
12206 OPERANDS[0] is the destination.
12207 OPERANDS[1] is the source. */
12209 output_mov_long_double_arm_from_arm (rtx *operands)
12211 /* We have to be careful here because the two might overlap. */
12212 int dest_start = REGNO (operands[0]);
12213 int src_start = REGNO (operands[1]);
12217 if (dest_start < src_start)
12219 for (i = 0; i < 3; i++)
12221 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12222 ops[1] = gen_rtx_REG (SImode, src_start + i);
12223 output_asm_insn ("mov%?\t%0, %1", ops);
12228 for (i = 2; i >= 0; i--)
12230 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12231 ops[1] = gen_rtx_REG (SImode, src_start + i);
12232 output_asm_insn ("mov%?\t%0, %1", ops);
12240 arm_emit_movpair (rtx dest, rtx src)
12242 /* If the src is an immediate, simplify it. */
12243 if (CONST_INT_P (src))
12245 HOST_WIDE_INT val = INTVAL (src);
12246 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12247 if ((val >> 16) & 0x0000ffff)
12248 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12250 GEN_INT ((val >> 16) & 0x0000ffff));
12253 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12254 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12257 /* Output a move from arm registers to an fpa registers.
12258 OPERANDS[0] is an fpa register.
12259 OPERANDS[1] is the first registers of an arm register pair. */
12261 output_mov_double_fpa_from_arm (rtx *operands)
12263 int arm_reg0 = REGNO (operands[1]);
12266 gcc_assert (arm_reg0 != IP_REGNUM);
12268 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12269 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12270 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12271 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12275 /* Output a move from an fpa register to arm registers.
12276 OPERANDS[0] is the first registers of an arm register pair.
12277 OPERANDS[1] is an fpa register. */
12279 output_mov_double_arm_from_fpa (rtx *operands)
12281 int arm_reg0 = REGNO (operands[0]);
12284 gcc_assert (arm_reg0 != IP_REGNUM);
12286 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12287 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12288 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12289 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12293 /* Output a move between double words.
12294 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12295 or MEM<-REG and all MEMs must be offsettable addresses. */
12297 output_move_double (rtx *operands)
12299 enum rtx_code code0 = GET_CODE (operands[0]);
12300 enum rtx_code code1 = GET_CODE (operands[1]);
12305 unsigned int reg0 = REGNO (operands[0]);
12307 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12309 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12311 switch (GET_CODE (XEXP (operands[1], 0)))
12315 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12316 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12318 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12322 gcc_assert (TARGET_LDRD);
12323 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12328 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12330 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12335 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12337 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12341 gcc_assert (TARGET_LDRD);
12342 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12347 /* Autoicrement addressing modes should never have overlapping
12348 base and destination registers, and overlapping index registers
12349 are already prohibited, so this doesn't need to worry about
12351 otherops[0] = operands[0];
12352 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12353 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12355 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12357 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12359 /* Registers overlap so split out the increment. */
12360 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12361 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12365 /* Use a single insn if we can.
12366 FIXME: IWMMXT allows offsets larger than ldrd can
12367 handle, fix these up with a pair of ldr. */
12369 || GET_CODE (otherops[2]) != CONST_INT
12370 || (INTVAL (otherops[2]) > -256
12371 && INTVAL (otherops[2]) < 256))
12372 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12375 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12376 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12382 /* Use a single insn if we can.
12383 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12384 fix these up with a pair of ldr. */
12386 || GET_CODE (otherops[2]) != CONST_INT
12387 || (INTVAL (otherops[2]) > -256
12388 && INTVAL (otherops[2]) < 256))
12389 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12392 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12393 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12400 /* We might be able to use ldrd %0, %1 here. However the range is
12401 different to ldr/adr, and it is broken on some ARMv7-M
12402 implementations. */
12403 /* Use the second register of the pair to avoid problematic
12405 otherops[1] = operands[1];
12406 output_asm_insn ("adr%?\t%0, %1", otherops);
12407 operands[1] = otherops[0];
12409 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12411 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12414 /* ??? This needs checking for thumb2. */
12416 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12417 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12419 otherops[0] = operands[0];
12420 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12421 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12423 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12425 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12427 switch ((int) INTVAL (otherops[2]))
12430 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12435 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12440 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12444 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12445 operands[1] = otherops[0];
12447 && (GET_CODE (otherops[2]) == REG
12449 || (GET_CODE (otherops[2]) == CONST_INT
12450 && INTVAL (otherops[2]) > -256
12451 && INTVAL (otherops[2]) < 256)))
12453 if (reg_overlap_mentioned_p (operands[0],
12457 /* Swap base and index registers over to
12458 avoid a conflict. */
12460 otherops[1] = otherops[2];
12463 /* If both registers conflict, it will usually
12464 have been fixed by a splitter. */
12465 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12466 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12468 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12469 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12473 otherops[0] = operands[0];
12474 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12479 if (GET_CODE (otherops[2]) == CONST_INT)
12481 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12482 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12484 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12487 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12490 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12493 return "ldr%(d%)\t%0, [%1]";
12495 return "ldm%(ia%)\t%1, %M0";
12499 otherops[1] = adjust_address (operands[1], SImode, 4);
12500 /* Take care of overlapping base/data reg. */
12501 if (reg_mentioned_p (operands[0], operands[1]))
12503 output_asm_insn ("ldr%?\t%0, %1", otherops);
12504 output_asm_insn ("ldr%?\t%0, %1", operands);
12508 output_asm_insn ("ldr%?\t%0, %1", operands);
12509 output_asm_insn ("ldr%?\t%0, %1", otherops);
12516 /* Constraints should ensure this. */
12517 gcc_assert (code0 == MEM && code1 == REG);
12518 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12520 switch (GET_CODE (XEXP (operands[0], 0)))
12524 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12526 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12530 gcc_assert (TARGET_LDRD);
12531 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12536 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12538 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12543 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12545 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12549 gcc_assert (TARGET_LDRD);
12550 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12555 otherops[0] = operands[1];
12556 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12557 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12559 /* IWMMXT allows offsets larger than ldrd can handle,
12560 fix these up with a pair of ldr. */
12562 && GET_CODE (otherops[2]) == CONST_INT
12563 && (INTVAL(otherops[2]) <= -256
12564 || INTVAL(otherops[2]) >= 256))
12566 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12568 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12569 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12573 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12574 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12577 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12578 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12580 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12584 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12585 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12587 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12590 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12596 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12602 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12607 && (GET_CODE (otherops[2]) == REG
12609 || (GET_CODE (otherops[2]) == CONST_INT
12610 && INTVAL (otherops[2]) > -256
12611 && INTVAL (otherops[2]) < 256)))
12613 otherops[0] = operands[1];
12614 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12615 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12621 otherops[0] = adjust_address (operands[0], SImode, 4);
12622 otherops[1] = operands[1];
12623 output_asm_insn ("str%?\t%1, %0", operands);
12624 output_asm_insn ("str%?\t%H1, %0", otherops);
12631 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12632 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12635 output_move_quad (rtx *operands)
12637 if (REG_P (operands[0]))
12639 /* Load, or reg->reg move. */
12641 if (MEM_P (operands[1]))
12643 switch (GET_CODE (XEXP (operands[1], 0)))
12646 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12651 output_asm_insn ("adr%?\t%0, %1", operands);
12652 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12656 gcc_unreachable ();
12664 gcc_assert (REG_P (operands[1]));
12666 dest = REGNO (operands[0]);
12667 src = REGNO (operands[1]);
12669 /* This seems pretty dumb, but hopefully GCC won't try to do it
12672 for (i = 0; i < 4; i++)
12674 ops[0] = gen_rtx_REG (SImode, dest + i);
12675 ops[1] = gen_rtx_REG (SImode, src + i);
12676 output_asm_insn ("mov%?\t%0, %1", ops);
12679 for (i = 3; i >= 0; i--)
12681 ops[0] = gen_rtx_REG (SImode, dest + i);
12682 ops[1] = gen_rtx_REG (SImode, src + i);
12683 output_asm_insn ("mov%?\t%0, %1", ops);
12689 gcc_assert (MEM_P (operands[0]));
12690 gcc_assert (REG_P (operands[1]));
12691 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12693 switch (GET_CODE (XEXP (operands[0], 0)))
12696 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12700 gcc_unreachable ();
12707 /* Output a VFP load or store instruction. */
12710 output_move_vfp (rtx *operands)
12712 rtx reg, mem, addr, ops[2];
12713 int load = REG_P (operands[0]);
12714 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12715 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12718 enum machine_mode mode;
12720 reg = operands[!load];
12721 mem = operands[load];
12723 mode = GET_MODE (reg);
12725 gcc_assert (REG_P (reg));
12726 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12727 gcc_assert (mode == SFmode
12731 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12732 gcc_assert (MEM_P (mem));
12734 addr = XEXP (mem, 0);
12736 switch (GET_CODE (addr))
12739 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12740 ops[0] = XEXP (addr, 0);
12745 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12746 ops[0] = XEXP (addr, 0);
12751 templ = "f%s%c%%?\t%%%s0, %%1%s";
12757 sprintf (buff, templ,
12758 load ? "ld" : "st",
12761 integer_p ? "\t%@ int" : "");
12762 output_asm_insn (buff, ops);
12767 /* Output a Neon quad-word load or store, or a load or store for
12768 larger structure modes.
12770 WARNING: The ordering of elements is weird in big-endian mode,
12771 because we use VSTM, as required by the EABI. GCC RTL defines
12772 element ordering based on in-memory order. This can be differ
12773 from the architectural ordering of elements within a NEON register.
12774 The intrinsics defined in arm_neon.h use the NEON register element
12775 ordering, not the GCC RTL element ordering.
12777 For example, the in-memory ordering of a big-endian a quadword
12778 vector with 16-bit elements when stored from register pair {d0,d1}
12779 will be (lowest address first, d0[N] is NEON register element N):
12781 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12783 When necessary, quadword registers (dN, dN+1) are moved to ARM
12784 registers from rN in the order:
12786 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12788 So that STM/LDM can be used on vectors in ARM registers, and the
12789 same memory layout will result as if VSTM/VLDM were used. */
12792 output_move_neon (rtx *operands)
12794 rtx reg, mem, addr, ops[2];
12795 int regno, load = REG_P (operands[0]);
12798 enum machine_mode mode;
12800 reg = operands[!load];
12801 mem = operands[load];
12803 mode = GET_MODE (reg);
12805 gcc_assert (REG_P (reg));
12806 regno = REGNO (reg);
12807 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12808 || NEON_REGNO_OK_FOR_QUAD (regno));
12809 gcc_assert (VALID_NEON_DREG_MODE (mode)
12810 || VALID_NEON_QREG_MODE (mode)
12811 || VALID_NEON_STRUCT_MODE (mode));
12812 gcc_assert (MEM_P (mem));
12814 addr = XEXP (mem, 0);
12816 /* Strip off const from addresses like (const (plus (...))). */
12817 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12818 addr = XEXP (addr, 0);
12820 switch (GET_CODE (addr))
12823 templ = "v%smia%%?\t%%0!, %%h1";
12824 ops[0] = XEXP (addr, 0);
12829 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12830 templ = "v%smdb%%?\t%%0!, %%h1";
12831 ops[0] = XEXP (addr, 0);
12836 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12837 gcc_unreachable ();
12842 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12845 for (i = 0; i < nregs; i++)
12847 /* We're only using DImode here because it's a convenient size. */
12848 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12849 ops[1] = adjust_address (mem, DImode, 8 * i);
12850 if (reg_overlap_mentioned_p (ops[0], mem))
12852 gcc_assert (overlap == -1);
12857 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12858 output_asm_insn (buff, ops);
12863 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12864 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12865 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12866 output_asm_insn (buff, ops);
12873 templ = "v%smia%%?\t%%m0, %%h1";
12878 sprintf (buff, templ, load ? "ld" : "st");
12879 output_asm_insn (buff, ops);
12884 /* Compute and return the length of neon_mov<mode>, where <mode> is
12885 one of VSTRUCT modes: EI, OI, CI or XI. */
12887 arm_attr_length_move_neon (rtx insn)
12889 rtx reg, mem, addr;
12891 enum machine_mode mode;
12893 extract_insn_cached (insn);
12895 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
12897 mode = GET_MODE (recog_data.operand[0]);
12908 gcc_unreachable ();
12912 load = REG_P (recog_data.operand[0]);
12913 reg = recog_data.operand[!load];
12914 mem = recog_data.operand[load];
12916 gcc_assert (MEM_P (mem));
12918 mode = GET_MODE (reg);
12919 addr = XEXP (mem, 0);
12921 /* Strip off const from addresses like (const (plus (...))). */
12922 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12923 addr = XEXP (addr, 0);
12925 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
12927 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12934 /* Output an ADD r, s, #n where n may be too big for one instruction.
12935 If adding zero to one register, output nothing. */
12937 output_add_immediate (rtx *operands)
12939 HOST_WIDE_INT n = INTVAL (operands[2]);
12941 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12944 output_multi_immediate (operands,
12945 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12948 output_multi_immediate (operands,
12949 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12956 /* Output a multiple immediate operation.
12957 OPERANDS is the vector of operands referred to in the output patterns.
12958 INSTR1 is the output pattern to use for the first constant.
12959 INSTR2 is the output pattern to use for subsequent constants.
12960 IMMED_OP is the index of the constant slot in OPERANDS.
12961 N is the constant value. */
12962 static const char *
12963 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12964 int immed_op, HOST_WIDE_INT n)
12966 #if HOST_BITS_PER_WIDE_INT > 32
12972 /* Quick and easy output. */
12973 operands[immed_op] = const0_rtx;
12974 output_asm_insn (instr1, operands);
12979 const char * instr = instr1;
12981 /* Note that n is never zero here (which would give no output). */
12982 for (i = 0; i < 32; i += 2)
12986 operands[immed_op] = GEN_INT (n & (255 << i));
12987 output_asm_insn (instr, operands);
12997 /* Return the name of a shifter operation. */
12998 static const char *
12999 arm_shift_nmem(enum rtx_code code)
13004 return ARM_LSL_NAME;
13020 /* Return the appropriate ARM instruction for the operation code.
13021 The returned result should not be overwritten. OP is the rtx of the
13022 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13025 arithmetic_instr (rtx op, int shift_first_arg)
13027 switch (GET_CODE (op))
13033 return shift_first_arg ? "rsb" : "sub";
13048 return arm_shift_nmem(GET_CODE(op));
13051 gcc_unreachable ();
13055 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13056 for the operation code. The returned result should not be overwritten.
13057 OP is the rtx code of the shift.
13058 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13060 static const char *
13061 shift_op (rtx op, HOST_WIDE_INT *amountp)
13064 enum rtx_code code = GET_CODE (op);
13066 switch (GET_CODE (XEXP (op, 1)))
13074 *amountp = INTVAL (XEXP (op, 1));
13078 gcc_unreachable ();
13084 gcc_assert (*amountp != -1);
13085 *amountp = 32 - *amountp;
13088 /* Fall through. */
13094 mnem = arm_shift_nmem(code);
13098 /* We never have to worry about the amount being other than a
13099 power of 2, since this case can never be reloaded from a reg. */
13100 gcc_assert (*amountp != -1);
13101 *amountp = int_log2 (*amountp);
13102 return ARM_LSL_NAME;
13105 gcc_unreachable ();
13108 if (*amountp != -1)
13110 /* This is not 100% correct, but follows from the desire to merge
13111 multiplication by a power of 2 with the recognizer for a
13112 shift. >=32 is not a valid shift for "lsl", so we must try and
13113 output a shift that produces the correct arithmetical result.
13114 Using lsr #32 is identical except for the fact that the carry bit
13115 is not set correctly if we set the flags; but we never use the
13116 carry bit from such an operation, so we can ignore that. */
13117 if (code == ROTATERT)
13118 /* Rotate is just modulo 32. */
13120 else if (*amountp != (*amountp & 31))
13122 if (code == ASHIFT)
13127 /* Shifts of 0 are no-ops. */
13135 /* Obtain the shift from the POWER of two. */
13137 static HOST_WIDE_INT
13138 int_log2 (HOST_WIDE_INT power)
13140 HOST_WIDE_INT shift = 0;
13142 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13144 gcc_assert (shift <= 31);
13151 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13152 because /bin/as is horribly restrictive. The judgement about
13153 whether or not each character is 'printable' (and can be output as
13154 is) or not (and must be printed with an octal escape) must be made
13155 with reference to the *host* character set -- the situation is
13156 similar to that discussed in the comments above pp_c_char in
13157 c-pretty-print.c. */
13159 #define MAX_ASCII_LEN 51
13162 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13165 int len_so_far = 0;
13167 fputs ("\t.ascii\t\"", stream);
13169 for (i = 0; i < len; i++)
13173 if (len_so_far >= MAX_ASCII_LEN)
13175 fputs ("\"\n\t.ascii\t\"", stream);
13181 if (c == '\\' || c == '\"')
13183 putc ('\\', stream);
13191 fprintf (stream, "\\%03o", c);
13196 fputs ("\"\n", stream);
13199 /* Compute the register save mask for registers 0 through 12
13200 inclusive. This code is used by arm_compute_save_reg_mask. */
13202 static unsigned long
13203 arm_compute_save_reg0_reg12_mask (void)
13205 unsigned long func_type = arm_current_func_type ();
13206 unsigned long save_reg_mask = 0;
13209 if (IS_INTERRUPT (func_type))
13211 unsigned int max_reg;
13212 /* Interrupt functions must not corrupt any registers,
13213 even call clobbered ones. If this is a leaf function
13214 we can just examine the registers used by the RTL, but
13215 otherwise we have to assume that whatever function is
13216 called might clobber anything, and so we have to save
13217 all the call-clobbered registers as well. */
13218 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13219 /* FIQ handlers have registers r8 - r12 banked, so
13220 we only need to check r0 - r7, Normal ISRs only
13221 bank r14 and r15, so we must check up to r12.
13222 r13 is the stack pointer which is always preserved,
13223 so we do not need to consider it here. */
13228 for (reg = 0; reg <= max_reg; reg++)
13229 if (df_regs_ever_live_p (reg)
13230 || (! current_function_is_leaf && call_used_regs[reg]))
13231 save_reg_mask |= (1 << reg);
13233 /* Also save the pic base register if necessary. */
13235 && !TARGET_SINGLE_PIC_BASE
13236 && arm_pic_register != INVALID_REGNUM
13237 && crtl->uses_pic_offset_table)
13238 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13240 else if (IS_VOLATILE(func_type))
13242 /* For noreturn functions we historically omitted register saves
13243 altogether. However this really messes up debugging. As a
13244 compromise save just the frame pointers. Combined with the link
13245 register saved elsewhere this should be sufficient to get
13247 if (frame_pointer_needed)
13248 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13249 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13250 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13251 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13252 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13256 /* In the normal case we only need to save those registers
13257 which are call saved and which are used by this function. */
13258 for (reg = 0; reg <= 11; reg++)
13259 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13260 save_reg_mask |= (1 << reg);
13262 /* Handle the frame pointer as a special case. */
13263 if (frame_pointer_needed)
13264 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13266 /* If we aren't loading the PIC register,
13267 don't stack it even though it may be live. */
13269 && !TARGET_SINGLE_PIC_BASE
13270 && arm_pic_register != INVALID_REGNUM
13271 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13272 || crtl->uses_pic_offset_table))
13273 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13275 /* The prologue will copy SP into R0, so save it. */
13276 if (IS_STACKALIGN (func_type))
13277 save_reg_mask |= 1;
13280 /* Save registers so the exception handler can modify them. */
13281 if (crtl->calls_eh_return)
13287 reg = EH_RETURN_DATA_REGNO (i);
13288 if (reg == INVALID_REGNUM)
13290 save_reg_mask |= 1 << reg;
13294 return save_reg_mask;
13298 /* Compute the number of bytes used to store the static chain register on the
13299 stack, above the stack frame. We need to know this accurately to get the
13300 alignment of the rest of the stack frame correct. */
13302 static int arm_compute_static_chain_stack_bytes (void)
13304 unsigned long func_type = arm_current_func_type ();
13305 int static_chain_stack_bytes = 0;
13307 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13308 IS_NESTED (func_type) &&
13309 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13310 static_chain_stack_bytes = 4;
13312 return static_chain_stack_bytes;
13316 /* Compute a bit mask of which registers need to be
13317 saved on the stack for the current function.
13318 This is used by arm_get_frame_offsets, which may add extra registers. */
13320 static unsigned long
13321 arm_compute_save_reg_mask (void)
13323 unsigned int save_reg_mask = 0;
13324 unsigned long func_type = arm_current_func_type ();
13327 if (IS_NAKED (func_type))
13328 /* This should never really happen. */
13331 /* If we are creating a stack frame, then we must save the frame pointer,
13332 IP (which will hold the old stack pointer), LR and the PC. */
13333 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13335 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13338 | (1 << PC_REGNUM);
13340 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13342 /* Decide if we need to save the link register.
13343 Interrupt routines have their own banked link register,
13344 so they never need to save it.
13345 Otherwise if we do not use the link register we do not need to save
13346 it. If we are pushing other registers onto the stack however, we
13347 can save an instruction in the epilogue by pushing the link register
13348 now and then popping it back into the PC. This incurs extra memory
13349 accesses though, so we only do it when optimizing for size, and only
13350 if we know that we will not need a fancy return sequence. */
13351 if (df_regs_ever_live_p (LR_REGNUM)
13354 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13355 && !crtl->calls_eh_return))
13356 save_reg_mask |= 1 << LR_REGNUM;
13358 if (cfun->machine->lr_save_eliminated)
13359 save_reg_mask &= ~ (1 << LR_REGNUM);
13361 if (TARGET_REALLY_IWMMXT
13362 && ((bit_count (save_reg_mask)
13363 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13364 arm_compute_static_chain_stack_bytes())
13367 /* The total number of registers that are going to be pushed
13368 onto the stack is odd. We need to ensure that the stack
13369 is 64-bit aligned before we start to save iWMMXt registers,
13370 and also before we start to create locals. (A local variable
13371 might be a double or long long which we will load/store using
13372 an iWMMXt instruction). Therefore we need to push another
13373 ARM register, so that the stack will be 64-bit aligned. We
13374 try to avoid using the arg registers (r0 -r3) as they might be
13375 used to pass values in a tail call. */
13376 for (reg = 4; reg <= 12; reg++)
13377 if ((save_reg_mask & (1 << reg)) == 0)
13381 save_reg_mask |= (1 << reg);
13384 cfun->machine->sibcall_blocked = 1;
13385 save_reg_mask |= (1 << 3);
13389 /* We may need to push an additional register for use initializing the
13390 PIC base register. */
13391 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13392 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13394 reg = thumb_find_work_register (1 << 4);
13395 if (!call_used_regs[reg])
13396 save_reg_mask |= (1 << reg);
13399 return save_reg_mask;
13403 /* Compute a bit mask of which registers need to be
13404 saved on the stack for the current function. */
13405 static unsigned long
13406 thumb1_compute_save_reg_mask (void)
13408 unsigned long mask;
13412 for (reg = 0; reg < 12; reg ++)
13413 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13417 && !TARGET_SINGLE_PIC_BASE
13418 && arm_pic_register != INVALID_REGNUM
13419 && crtl->uses_pic_offset_table)
13420 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13422 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13423 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13424 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13426 /* LR will also be pushed if any lo regs are pushed. */
13427 if (mask & 0xff || thumb_force_lr_save ())
13428 mask |= (1 << LR_REGNUM);
13430 /* Make sure we have a low work register if we need one.
13431 We will need one if we are going to push a high register,
13432 but we are not currently intending to push a low register. */
13433 if ((mask & 0xff) == 0
13434 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13436 /* Use thumb_find_work_register to choose which register
13437 we will use. If the register is live then we will
13438 have to push it. Use LAST_LO_REGNUM as our fallback
13439 choice for the register to select. */
13440 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13441 /* Make sure the register returned by thumb_find_work_register is
13442 not part of the return value. */
13443 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13444 reg = LAST_LO_REGNUM;
13446 if (! call_used_regs[reg])
13450 /* The 504 below is 8 bytes less than 512 because there are two possible
13451 alignment words. We can't tell here if they will be present or not so we
13452 have to play it safe and assume that they are. */
13453 if ((CALLER_INTERWORKING_SLOT_SIZE +
13454 ROUND_UP_WORD (get_frame_size ()) +
13455 crtl->outgoing_args_size) >= 504)
13457 /* This is the same as the code in thumb1_expand_prologue() which
13458 determines which register to use for stack decrement. */
13459 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13460 if (mask & (1 << reg))
13463 if (reg > LAST_LO_REGNUM)
13465 /* Make sure we have a register available for stack decrement. */
13466 mask |= 1 << LAST_LO_REGNUM;
13474 /* Return the number of bytes required to save VFP registers. */
13476 arm_get_vfp_saved_size (void)
13478 unsigned int regno;
13483 /* Space for saved VFP registers. */
13484 if (TARGET_HARD_FLOAT && TARGET_VFP)
13487 for (regno = FIRST_VFP_REGNUM;
13488 regno < LAST_VFP_REGNUM;
13491 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13492 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13496 /* Workaround ARM10 VFPr1 bug. */
13497 if (count == 2 && !arm_arch6)
13499 saved += count * 8;
13508 if (count == 2 && !arm_arch6)
13510 saved += count * 8;
13517 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13518 everything bar the final return instruction. */
13520 output_return_instruction (rtx operand, int really_return, int reverse)
13522 char conditional[10];
13525 unsigned long live_regs_mask;
13526 unsigned long func_type;
13527 arm_stack_offsets *offsets;
13529 func_type = arm_current_func_type ();
13531 if (IS_NAKED (func_type))
13534 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13536 /* If this function was declared non-returning, and we have
13537 found a tail call, then we have to trust that the called
13538 function won't return. */
13543 /* Otherwise, trap an attempted return by aborting. */
13545 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13547 assemble_external_libcall (ops[1]);
13548 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13554 gcc_assert (!cfun->calls_alloca || really_return);
13556 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13558 cfun->machine->return_used_this_function = 1;
13560 offsets = arm_get_frame_offsets ();
13561 live_regs_mask = offsets->saved_regs_mask;
13563 if (live_regs_mask)
13565 const char * return_reg;
13567 /* If we do not have any special requirements for function exit
13568 (e.g. interworking) then we can load the return address
13569 directly into the PC. Otherwise we must load it into LR. */
13571 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13572 return_reg = reg_names[PC_REGNUM];
13574 return_reg = reg_names[LR_REGNUM];
13576 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13578 /* There are three possible reasons for the IP register
13579 being saved. 1) a stack frame was created, in which case
13580 IP contains the old stack pointer, or 2) an ISR routine
13581 corrupted it, or 3) it was saved to align the stack on
13582 iWMMXt. In case 1, restore IP into SP, otherwise just
13584 if (frame_pointer_needed)
13586 live_regs_mask &= ~ (1 << IP_REGNUM);
13587 live_regs_mask |= (1 << SP_REGNUM);
13590 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13593 /* On some ARM architectures it is faster to use LDR rather than
13594 LDM to load a single register. On other architectures, the
13595 cost is the same. In 26 bit mode, or for exception handlers,
13596 we have to use LDM to load the PC so that the CPSR is also
13598 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13599 if (live_regs_mask == (1U << reg))
13602 if (reg <= LAST_ARM_REGNUM
13603 && (reg != LR_REGNUM
13605 || ! IS_INTERRUPT (func_type)))
13607 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13608 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13615 /* Generate the load multiple instruction to restore the
13616 registers. Note we can get here, even if
13617 frame_pointer_needed is true, but only if sp already
13618 points to the base of the saved core registers. */
13619 if (live_regs_mask & (1 << SP_REGNUM))
13621 unsigned HOST_WIDE_INT stack_adjust;
13623 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13624 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13626 if (stack_adjust && arm_arch5 && TARGET_ARM)
13627 if (TARGET_UNIFIED_ASM)
13628 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13630 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13633 /* If we can't use ldmib (SA110 bug),
13634 then try to pop r3 instead. */
13636 live_regs_mask |= 1 << 3;
13638 if (TARGET_UNIFIED_ASM)
13639 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13641 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13645 if (TARGET_UNIFIED_ASM)
13646 sprintf (instr, "pop%s\t{", conditional);
13648 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13650 p = instr + strlen (instr);
13652 for (reg = 0; reg <= SP_REGNUM; reg++)
13653 if (live_regs_mask & (1 << reg))
13655 int l = strlen (reg_names[reg]);
13661 memcpy (p, ", ", 2);
13665 memcpy (p, "%|", 2);
13666 memcpy (p + 2, reg_names[reg], l);
13670 if (live_regs_mask & (1 << LR_REGNUM))
13672 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13673 /* If returning from an interrupt, restore the CPSR. */
13674 if (IS_INTERRUPT (func_type))
13681 output_asm_insn (instr, & operand);
13683 /* See if we need to generate an extra instruction to
13684 perform the actual function return. */
13686 && func_type != ARM_FT_INTERWORKED
13687 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13689 /* The return has already been handled
13690 by loading the LR into the PC. */
13697 switch ((int) ARM_FUNC_TYPE (func_type))
13701 /* ??? This is wrong for unified assembly syntax. */
13702 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13705 case ARM_FT_INTERWORKED:
13706 sprintf (instr, "bx%s\t%%|lr", conditional);
13709 case ARM_FT_EXCEPTION:
13710 /* ??? This is wrong for unified assembly syntax. */
13711 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13715 /* Use bx if it's available. */
13716 if (arm_arch5 || arm_arch4t)
13717 sprintf (instr, "bx%s\t%%|lr", conditional);
13719 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13723 output_asm_insn (instr, & operand);
13729 /* Write the function name into the code section, directly preceding
13730 the function prologue.
13732 Code will be output similar to this:
13734 .ascii "arm_poke_function_name", 0
13737 .word 0xff000000 + (t1 - t0)
13738 arm_poke_function_name
13740 stmfd sp!, {fp, ip, lr, pc}
13743 When performing a stack backtrace, code can inspect the value
13744 of 'pc' stored at 'fp' + 0. If the trace function then looks
13745 at location pc - 12 and the top 8 bits are set, then we know
13746 that there is a function name embedded immediately preceding this
13747 location and has length ((pc[-3]) & 0xff000000).
13749 We assume that pc is declared as a pointer to an unsigned long.
13751 It is of no benefit to output the function name if we are assembling
13752 a leaf function. These function types will not contain a stack
13753 backtrace structure, therefore it is not possible to determine the
13756 arm_poke_function_name (FILE *stream, const char *name)
13758 unsigned long alignlength;
13759 unsigned long length;
13762 length = strlen (name) + 1;
13763 alignlength = ROUND_UP_WORD (length);
13765 ASM_OUTPUT_ASCII (stream, name, length);
13766 ASM_OUTPUT_ALIGN (stream, 2);
13767 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13768 assemble_aligned_integer (UNITS_PER_WORD, x);
13771 /* Place some comments into the assembler stream
13772 describing the current function. */
13774 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13776 unsigned long func_type;
13780 thumb1_output_function_prologue (f, frame_size);
13784 /* Sanity check. */
13785 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13787 func_type = arm_current_func_type ();
13789 switch ((int) ARM_FUNC_TYPE (func_type))
13792 case ARM_FT_NORMAL:
13794 case ARM_FT_INTERWORKED:
13795 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13798 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13801 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13803 case ARM_FT_EXCEPTION:
13804 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13808 if (IS_NAKED (func_type))
13809 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13811 if (IS_VOLATILE (func_type))
13812 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13814 if (IS_NESTED (func_type))
13815 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13816 if (IS_STACKALIGN (func_type))
13817 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13819 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13821 crtl->args.pretend_args_size, frame_size);
13823 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13824 frame_pointer_needed,
13825 cfun->machine->uses_anonymous_args);
13827 if (cfun->machine->lr_save_eliminated)
13828 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13830 if (crtl->calls_eh_return)
13831 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13836 arm_output_epilogue (rtx sibling)
13839 unsigned long saved_regs_mask;
13840 unsigned long func_type;
13841 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13842 frame that is $fp + 4 for a non-variadic function. */
13843 int floats_offset = 0;
13845 FILE * f = asm_out_file;
13846 unsigned int lrm_count = 0;
13847 int really_return = (sibling == NULL);
13849 arm_stack_offsets *offsets;
13851 /* If we have already generated the return instruction
13852 then it is futile to generate anything else. */
13853 if (use_return_insn (FALSE, sibling) &&
13854 (cfun->machine->return_used_this_function != 0))
13857 func_type = arm_current_func_type ();
13859 if (IS_NAKED (func_type))
13860 /* Naked functions don't have epilogues. */
13863 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13867 /* A volatile function should never return. Call abort. */
13868 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13869 assemble_external_libcall (op);
13870 output_asm_insn ("bl\t%a0", &op);
13875 /* If we are throwing an exception, then we really must be doing a
13876 return, so we can't tail-call. */
13877 gcc_assert (!crtl->calls_eh_return || really_return);
13879 offsets = arm_get_frame_offsets ();
13880 saved_regs_mask = offsets->saved_regs_mask;
13883 lrm_count = bit_count (saved_regs_mask);
13885 floats_offset = offsets->saved_args;
13886 /* Compute how far away the floats will be. */
13887 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13888 if (saved_regs_mask & (1 << reg))
13889 floats_offset += 4;
13891 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13893 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13894 int vfp_offset = offsets->frame;
13896 if (TARGET_FPA_EMU2)
13898 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13899 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13901 floats_offset += 12;
13902 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13903 reg, FP_REGNUM, floats_offset - vfp_offset);
13908 start_reg = LAST_FPA_REGNUM;
13910 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13912 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13914 floats_offset += 12;
13916 /* We can't unstack more than four registers at once. */
13917 if (start_reg - reg == 3)
13919 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13920 reg, FP_REGNUM, floats_offset - vfp_offset);
13921 start_reg = reg - 1;
13926 if (reg != start_reg)
13927 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13928 reg + 1, start_reg - reg,
13929 FP_REGNUM, floats_offset - vfp_offset);
13930 start_reg = reg - 1;
13934 /* Just in case the last register checked also needs unstacking. */
13935 if (reg != start_reg)
13936 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13937 reg + 1, start_reg - reg,
13938 FP_REGNUM, floats_offset - vfp_offset);
13941 if (TARGET_HARD_FLOAT && TARGET_VFP)
13945 /* The fldmd insns do not have base+offset addressing
13946 modes, so we use IP to hold the address. */
13947 saved_size = arm_get_vfp_saved_size ();
13949 if (saved_size > 0)
13951 floats_offset += saved_size;
13952 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13953 FP_REGNUM, floats_offset - vfp_offset);
13955 start_reg = FIRST_VFP_REGNUM;
13956 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13958 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13959 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13961 if (start_reg != reg)
13962 vfp_output_fldmd (f, IP_REGNUM,
13963 (start_reg - FIRST_VFP_REGNUM) / 2,
13964 (reg - start_reg) / 2);
13965 start_reg = reg + 2;
13968 if (start_reg != reg)
13969 vfp_output_fldmd (f, IP_REGNUM,
13970 (start_reg - FIRST_VFP_REGNUM) / 2,
13971 (reg - start_reg) / 2);
13976 /* The frame pointer is guaranteed to be non-double-word aligned.
13977 This is because it is set to (old_stack_pointer - 4) and the
13978 old_stack_pointer was double word aligned. Thus the offset to
13979 the iWMMXt registers to be loaded must also be non-double-word
13980 sized, so that the resultant address *is* double-word aligned.
13981 We can ignore floats_offset since that was already included in
13982 the live_regs_mask. */
13983 lrm_count += (lrm_count % 2 ? 2 : 1);
13985 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13986 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13988 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13989 reg, FP_REGNUM, lrm_count * 4);
13994 /* saved_regs_mask should contain the IP, which at the time of stack
13995 frame generation actually contains the old stack pointer. So a
13996 quick way to unwind the stack is just pop the IP register directly
13997 into the stack pointer. */
13998 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13999 saved_regs_mask &= ~ (1 << IP_REGNUM);
14000 saved_regs_mask |= (1 << SP_REGNUM);
14002 /* There are two registers left in saved_regs_mask - LR and PC. We
14003 only need to restore the LR register (the return address), but to
14004 save time we can load it directly into the PC, unless we need a
14005 special function exit sequence, or we are not really returning. */
14007 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14008 && !crtl->calls_eh_return)
14009 /* Delete the LR from the register mask, so that the LR on
14010 the stack is loaded into the PC in the register mask. */
14011 saved_regs_mask &= ~ (1 << LR_REGNUM);
14013 saved_regs_mask &= ~ (1 << PC_REGNUM);
14015 /* We must use SP as the base register, because SP is one of the
14016 registers being restored. If an interrupt or page fault
14017 happens in the ldm instruction, the SP might or might not
14018 have been restored. That would be bad, as then SP will no
14019 longer indicate the safe area of stack, and we can get stack
14020 corruption. Using SP as the base register means that it will
14021 be reset correctly to the original value, should an interrupt
14022 occur. If the stack pointer already points at the right
14023 place, then omit the subtraction. */
14024 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14025 || cfun->calls_alloca)
14026 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14027 4 * bit_count (saved_regs_mask));
14028 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14030 if (IS_INTERRUPT (func_type))
14031 /* Interrupt handlers will have pushed the
14032 IP onto the stack, so restore it now. */
14033 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14037 /* This branch is executed for ARM mode (non-apcs frames) and
14038 Thumb-2 mode. Frame layout is essentially the same for those
14039 cases, except that in ARM mode frame pointer points to the
14040 first saved register, while in Thumb-2 mode the frame pointer points
14041 to the last saved register.
14043 It is possible to make frame pointer point to last saved
14044 register in both cases, and remove some conditionals below.
14045 That means that fp setup in prologue would be just "mov fp, sp"
14046 and sp restore in epilogue would be just "mov sp, fp", whereas
14047 now we have to use add/sub in those cases. However, the value
14048 of that would be marginal, as both mov and add/sub are 32-bit
14049 in ARM mode, and it would require extra conditionals
14050 in arm_expand_prologue to distingish ARM-apcs-frame case
14051 (where frame pointer is required to point at first register)
14052 and ARM-non-apcs-frame. Therefore, such change is postponed
14053 until real need arise. */
14054 unsigned HOST_WIDE_INT amount;
14056 /* Restore stack pointer if necessary. */
14057 if (TARGET_ARM && frame_pointer_needed)
14059 operands[0] = stack_pointer_rtx;
14060 operands[1] = hard_frame_pointer_rtx;
14062 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14063 output_add_immediate (operands);
14067 if (frame_pointer_needed)
14069 /* For Thumb-2 restore sp from the frame pointer.
14070 Operand restrictions mean we have to incrememnt FP, then copy
14072 amount = offsets->locals_base - offsets->saved_regs;
14073 operands[0] = hard_frame_pointer_rtx;
14077 unsigned long count;
14078 operands[0] = stack_pointer_rtx;
14079 amount = offsets->outgoing_args - offsets->saved_regs;
14080 /* pop call clobbered registers if it avoids a
14081 separate stack adjustment. */
14082 count = offsets->saved_regs - offsets->saved_args;
14085 && !crtl->calls_eh_return
14086 && bit_count(saved_regs_mask) * 4 == count
14087 && !IS_INTERRUPT (func_type)
14088 && !crtl->tail_call_emit)
14090 unsigned long mask;
14091 mask = (1 << (arm_size_return_regs() / 4)) - 1;
14093 mask &= ~saved_regs_mask;
14095 while (bit_count (mask) * 4 > amount)
14097 while ((mask & (1 << reg)) == 0)
14099 mask &= ~(1 << reg);
14101 if (bit_count (mask) * 4 == amount) {
14103 saved_regs_mask |= mask;
14110 operands[1] = operands[0];
14111 operands[2] = GEN_INT (amount);
14112 output_add_immediate (operands);
14114 if (frame_pointer_needed)
14115 asm_fprintf (f, "\tmov\t%r, %r\n",
14116 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14119 if (TARGET_FPA_EMU2)
14121 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14122 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14123 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14128 start_reg = FIRST_FPA_REGNUM;
14130 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14132 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14134 if (reg - start_reg == 3)
14136 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14137 start_reg, SP_REGNUM);
14138 start_reg = reg + 1;
14143 if (reg != start_reg)
14144 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14145 start_reg, reg - start_reg,
14148 start_reg = reg + 1;
14152 /* Just in case the last register checked also needs unstacking. */
14153 if (reg != start_reg)
14154 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14155 start_reg, reg - start_reg, SP_REGNUM);
14158 if (TARGET_HARD_FLOAT && TARGET_VFP)
14160 int end_reg = LAST_VFP_REGNUM + 1;
14162 /* Scan the registers in reverse order. We need to match
14163 any groupings made in the prologue and generate matching
14165 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14167 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14168 && (!df_regs_ever_live_p (reg + 1)
14169 || call_used_regs[reg + 1]))
14171 if (end_reg > reg + 2)
14172 vfp_output_fldmd (f, SP_REGNUM,
14173 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14174 (end_reg - (reg + 2)) / 2);
14178 if (end_reg > reg + 2)
14179 vfp_output_fldmd (f, SP_REGNUM, 0,
14180 (end_reg - (reg + 2)) / 2);
14184 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14185 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14186 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14188 /* If we can, restore the LR into the PC. */
14189 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14190 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14191 && !IS_STACKALIGN (func_type)
14193 && crtl->args.pretend_args_size == 0
14194 && saved_regs_mask & (1 << LR_REGNUM)
14195 && !crtl->calls_eh_return)
14197 saved_regs_mask &= ~ (1 << LR_REGNUM);
14198 saved_regs_mask |= (1 << PC_REGNUM);
14199 rfe = IS_INTERRUPT (func_type);
14204 /* Load the registers off the stack. If we only have one register
14205 to load use the LDR instruction - it is faster. For Thumb-2
14206 always use pop and the assembler will pick the best instruction.*/
14207 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14208 && !IS_INTERRUPT(func_type))
14210 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14212 else if (saved_regs_mask)
14214 if (saved_regs_mask & (1 << SP_REGNUM))
14215 /* Note - write back to the stack register is not enabled
14216 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14217 in the list of registers and if we add writeback the
14218 instruction becomes UNPREDICTABLE. */
14219 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14221 else if (TARGET_ARM)
14222 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14225 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14228 if (crtl->args.pretend_args_size)
14230 /* Unwind the pre-pushed regs. */
14231 operands[0] = operands[1] = stack_pointer_rtx;
14232 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14233 output_add_immediate (operands);
14237 /* We may have already restored PC directly from the stack. */
14238 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14241 /* Stack adjustment for exception handler. */
14242 if (crtl->calls_eh_return)
14243 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14244 ARM_EH_STACKADJ_REGNUM);
14246 /* Generate the return instruction. */
14247 switch ((int) ARM_FUNC_TYPE (func_type))
14251 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14254 case ARM_FT_EXCEPTION:
14255 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14258 case ARM_FT_INTERWORKED:
14259 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14263 if (IS_STACKALIGN (func_type))
14265 /* See comment in arm_expand_prologue. */
14266 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14268 if (arm_arch5 || arm_arch4t)
14269 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14271 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14279 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14280 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14282 arm_stack_offsets *offsets;
14288 /* Emit any call-via-reg trampolines that are needed for v4t support
14289 of call_reg and call_value_reg type insns. */
14290 for (regno = 0; regno < LR_REGNUM; regno++)
14292 rtx label = cfun->machine->call_via[regno];
14296 switch_to_section (function_section (current_function_decl));
14297 targetm.asm_out.internal_label (asm_out_file, "L",
14298 CODE_LABEL_NUMBER (label));
14299 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14303 /* ??? Probably not safe to set this here, since it assumes that a
14304 function will be emitted as assembly immediately after we generate
14305 RTL for it. This does not happen for inline functions. */
14306 cfun->machine->return_used_this_function = 0;
14308 else /* TARGET_32BIT */
14310 /* We need to take into account any stack-frame rounding. */
14311 offsets = arm_get_frame_offsets ();
14313 gcc_assert (!use_return_insn (FALSE, NULL)
14314 || (cfun->machine->return_used_this_function != 0)
14315 || offsets->saved_regs == offsets->outgoing_args
14316 || frame_pointer_needed);
14318 /* Reset the ARM-specific per-function variables. */
14319 after_arm_reorg = 0;
14323 /* Generate and emit an insn that we will recognize as a push_multi.
14324 Unfortunately, since this insn does not reflect very well the actual
14325 semantics of the operation, we need to annotate the insn for the benefit
14326 of DWARF2 frame unwind information. */
14328 emit_multi_reg_push (unsigned long mask)
14331 int num_dwarf_regs;
14335 int dwarf_par_index;
14338 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14339 if (mask & (1 << i))
14342 gcc_assert (num_regs && num_regs <= 16);
14344 /* We don't record the PC in the dwarf frame information. */
14345 num_dwarf_regs = num_regs;
14346 if (mask & (1 << PC_REGNUM))
14349 /* For the body of the insn we are going to generate an UNSPEC in
14350 parallel with several USEs. This allows the insn to be recognized
14351 by the push_multi pattern in the arm.md file.
14353 The body of the insn looks something like this:
14356 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14357 (const_int:SI <num>)))
14358 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14364 For the frame note however, we try to be more explicit and actually
14365 show each register being stored into the stack frame, plus a (single)
14366 decrement of the stack pointer. We do it this way in order to be
14367 friendly to the stack unwinding code, which only wants to see a single
14368 stack decrement per instruction. The RTL we generate for the note looks
14369 something like this:
14372 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14373 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14374 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14375 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14379 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14380 instead we'd have a parallel expression detailing all
14381 the stores to the various memory addresses so that debug
14382 information is more up-to-date. Remember however while writing
14383 this to take care of the constraints with the push instruction.
14385 Note also that this has to be taken care of for the VFP registers.
14387 For more see PR43399. */
14389 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14390 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14391 dwarf_par_index = 1;
14393 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14395 if (mask & (1 << i))
14397 reg = gen_rtx_REG (SImode, i);
14399 XVECEXP (par, 0, 0)
14400 = gen_rtx_SET (VOIDmode,
14403 gen_rtx_PRE_MODIFY (Pmode,
14406 (stack_pointer_rtx,
14409 gen_rtx_UNSPEC (BLKmode,
14410 gen_rtvec (1, reg),
14411 UNSPEC_PUSH_MULT));
14413 if (i != PC_REGNUM)
14415 tmp = gen_rtx_SET (VOIDmode,
14416 gen_frame_mem (SImode, stack_pointer_rtx),
14418 RTX_FRAME_RELATED_P (tmp) = 1;
14419 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14427 for (j = 1, i++; j < num_regs; i++)
14429 if (mask & (1 << i))
14431 reg = gen_rtx_REG (SImode, i);
14433 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14435 if (i != PC_REGNUM)
14438 = gen_rtx_SET (VOIDmode,
14441 plus_constant (stack_pointer_rtx,
14444 RTX_FRAME_RELATED_P (tmp) = 1;
14445 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14452 par = emit_insn (par);
14454 tmp = gen_rtx_SET (VOIDmode,
14456 plus_constant (stack_pointer_rtx, -4 * num_regs));
14457 RTX_FRAME_RELATED_P (tmp) = 1;
14458 XVECEXP (dwarf, 0, 0) = tmp;
14460 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14465 /* Calculate the size of the return value that is passed in registers. */
14467 arm_size_return_regs (void)
14469 enum machine_mode mode;
14471 if (crtl->return_rtx != 0)
14472 mode = GET_MODE (crtl->return_rtx);
14474 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14476 return GET_MODE_SIZE (mode);
14480 emit_sfm (int base_reg, int count)
14487 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14488 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14490 reg = gen_rtx_REG (XFmode, base_reg++);
14492 XVECEXP (par, 0, 0)
14493 = gen_rtx_SET (VOIDmode,
14496 gen_rtx_PRE_MODIFY (Pmode,
14499 (stack_pointer_rtx,
14502 gen_rtx_UNSPEC (BLKmode,
14503 gen_rtvec (1, reg),
14504 UNSPEC_PUSH_MULT));
14505 tmp = gen_rtx_SET (VOIDmode,
14506 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14507 RTX_FRAME_RELATED_P (tmp) = 1;
14508 XVECEXP (dwarf, 0, 1) = tmp;
14510 for (i = 1; i < count; i++)
14512 reg = gen_rtx_REG (XFmode, base_reg++);
14513 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14515 tmp = gen_rtx_SET (VOIDmode,
14516 gen_frame_mem (XFmode,
14517 plus_constant (stack_pointer_rtx,
14520 RTX_FRAME_RELATED_P (tmp) = 1;
14521 XVECEXP (dwarf, 0, i + 1) = tmp;
14524 tmp = gen_rtx_SET (VOIDmode,
14526 plus_constant (stack_pointer_rtx, -12 * count));
14528 RTX_FRAME_RELATED_P (tmp) = 1;
14529 XVECEXP (dwarf, 0, 0) = tmp;
14531 par = emit_insn (par);
14532 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14538 /* Return true if the current function needs to save/restore LR. */
14541 thumb_force_lr_save (void)
14543 return !cfun->machine->lr_save_eliminated
14544 && (!leaf_function_p ()
14545 || thumb_far_jump_used_p ()
14546 || df_regs_ever_live_p (LR_REGNUM));
14550 /* Compute the distance from register FROM to register TO.
14551 These can be the arg pointer (26), the soft frame pointer (25),
14552 the stack pointer (13) or the hard frame pointer (11).
14553 In thumb mode r7 is used as the soft frame pointer, if needed.
14554 Typical stack layout looks like this:
14556 old stack pointer -> | |
14559 | | saved arguments for
14560 | | vararg functions
14563 hard FP & arg pointer -> | | \
14571 soft frame pointer -> | | /
14576 locals base pointer -> | | /
14581 current stack pointer -> | | /
14584 For a given function some or all of these stack components
14585 may not be needed, giving rise to the possibility of
14586 eliminating some of the registers.
14588 The values returned by this function must reflect the behavior
14589 of arm_expand_prologue() and arm_compute_save_reg_mask().
14591 The sign of the number returned reflects the direction of stack
14592 growth, so the values are positive for all eliminations except
14593 from the soft frame pointer to the hard frame pointer.
14595 SFP may point just inside the local variables block to ensure correct
14599 /* Calculate stack offsets. These are used to calculate register elimination
14600 offsets and in prologue/epilogue code. Also calculates which registers
14601 should be saved. */
14603 static arm_stack_offsets *
14604 arm_get_frame_offsets (void)
14606 struct arm_stack_offsets *offsets;
14607 unsigned long func_type;
14611 HOST_WIDE_INT frame_size;
14614 offsets = &cfun->machine->stack_offsets;
14616 /* We need to know if we are a leaf function. Unfortunately, it
14617 is possible to be called after start_sequence has been called,
14618 which causes get_insns to return the insns for the sequence,
14619 not the function, which will cause leaf_function_p to return
14620 the incorrect result.
14622 to know about leaf functions once reload has completed, and the
14623 frame size cannot be changed after that time, so we can safely
14624 use the cached value. */
14626 if (reload_completed)
14629 /* Initially this is the size of the local variables. It will translated
14630 into an offset once we have determined the size of preceding data. */
14631 frame_size = ROUND_UP_WORD (get_frame_size ());
14633 leaf = leaf_function_p ();
14635 /* Space for variadic functions. */
14636 offsets->saved_args = crtl->args.pretend_args_size;
14638 /* In Thumb mode this is incorrect, but never used. */
14639 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14640 arm_compute_static_chain_stack_bytes();
14644 unsigned int regno;
14646 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14647 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14648 saved = core_saved;
14650 /* We know that SP will be doubleword aligned on entry, and we must
14651 preserve that condition at any subroutine call. We also require the
14652 soft frame pointer to be doubleword aligned. */
14654 if (TARGET_REALLY_IWMMXT)
14656 /* Check for the call-saved iWMMXt registers. */
14657 for (regno = FIRST_IWMMXT_REGNUM;
14658 regno <= LAST_IWMMXT_REGNUM;
14660 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14664 func_type = arm_current_func_type ();
14665 if (! IS_VOLATILE (func_type))
14667 /* Space for saved FPA registers. */
14668 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14669 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14672 /* Space for saved VFP registers. */
14673 if (TARGET_HARD_FLOAT && TARGET_VFP)
14674 saved += arm_get_vfp_saved_size ();
14677 else /* TARGET_THUMB1 */
14679 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14680 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14681 saved = core_saved;
14682 if (TARGET_BACKTRACE)
14686 /* Saved registers include the stack frame. */
14687 offsets->saved_regs = offsets->saved_args + saved +
14688 arm_compute_static_chain_stack_bytes();
14689 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14690 /* A leaf function does not need any stack alignment if it has nothing
14692 if (leaf && frame_size == 0)
14694 offsets->outgoing_args = offsets->soft_frame;
14695 offsets->locals_base = offsets->soft_frame;
14699 /* Ensure SFP has the correct alignment. */
14700 if (ARM_DOUBLEWORD_ALIGN
14701 && (offsets->soft_frame & 7))
14703 offsets->soft_frame += 4;
14704 /* Try to align stack by pushing an extra reg. Don't bother doing this
14705 when there is a stack frame as the alignment will be rolled into
14706 the normal stack adjustment. */
14707 if (frame_size + crtl->outgoing_args_size == 0)
14711 /* If it is safe to use r3, then do so. This sometimes
14712 generates better code on Thumb-2 by avoiding the need to
14713 use 32-bit push/pop instructions. */
14714 if (!crtl->tail_call_emit
14715 && arm_size_return_regs () <= 12
14716 && (offsets->saved_regs_mask & (1 << 3)) == 0)
14721 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14723 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14732 offsets->saved_regs += 4;
14733 offsets->saved_regs_mask |= (1 << reg);
14738 offsets->locals_base = offsets->soft_frame + frame_size;
14739 offsets->outgoing_args = (offsets->locals_base
14740 + crtl->outgoing_args_size);
14742 if (ARM_DOUBLEWORD_ALIGN)
14744 /* Ensure SP remains doubleword aligned. */
14745 if (offsets->outgoing_args & 7)
14746 offsets->outgoing_args += 4;
14747 gcc_assert (!(offsets->outgoing_args & 7));
14754 /* Calculate the relative offsets for the different stack pointers. Positive
14755 offsets are in the direction of stack growth. */
14758 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14760 arm_stack_offsets *offsets;
14762 offsets = arm_get_frame_offsets ();
14764 /* OK, now we have enough information to compute the distances.
14765 There must be an entry in these switch tables for each pair
14766 of registers in ELIMINABLE_REGS, even if some of the entries
14767 seem to be redundant or useless. */
14770 case ARG_POINTER_REGNUM:
14773 case THUMB_HARD_FRAME_POINTER_REGNUM:
14776 case FRAME_POINTER_REGNUM:
14777 /* This is the reverse of the soft frame pointer
14778 to hard frame pointer elimination below. */
14779 return offsets->soft_frame - offsets->saved_args;
14781 case ARM_HARD_FRAME_POINTER_REGNUM:
14782 /* This is only non-zero in the case where the static chain register
14783 is stored above the frame. */
14784 return offsets->frame - offsets->saved_args - 4;
14786 case STACK_POINTER_REGNUM:
14787 /* If nothing has been pushed on the stack at all
14788 then this will return -4. This *is* correct! */
14789 return offsets->outgoing_args - (offsets->saved_args + 4);
14792 gcc_unreachable ();
14794 gcc_unreachable ();
14796 case FRAME_POINTER_REGNUM:
14799 case THUMB_HARD_FRAME_POINTER_REGNUM:
14802 case ARM_HARD_FRAME_POINTER_REGNUM:
14803 /* The hard frame pointer points to the top entry in the
14804 stack frame. The soft frame pointer to the bottom entry
14805 in the stack frame. If there is no stack frame at all,
14806 then they are identical. */
14808 return offsets->frame - offsets->soft_frame;
14810 case STACK_POINTER_REGNUM:
14811 return offsets->outgoing_args - offsets->soft_frame;
14814 gcc_unreachable ();
14816 gcc_unreachable ();
14819 /* You cannot eliminate from the stack pointer.
14820 In theory you could eliminate from the hard frame
14821 pointer to the stack pointer, but this will never
14822 happen, since if a stack frame is not needed the
14823 hard frame pointer will never be used. */
14824 gcc_unreachable ();
14828 /* Given FROM and TO register numbers, say whether this elimination is
14829 allowed. Frame pointer elimination is automatically handled.
14831 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14832 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14833 pointer, we must eliminate FRAME_POINTER_REGNUM into
14834 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14835 ARG_POINTER_REGNUM. */
14838 arm_can_eliminate (const int from, const int to)
14840 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14841 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14842 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14843 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14847 /* Emit RTL to save coprocessor registers on function entry. Returns the
14848 number of bytes pushed. */
14851 arm_save_coproc_regs(void)
14853 int saved_size = 0;
14855 unsigned start_reg;
14858 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14859 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14861 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14862 insn = gen_rtx_MEM (V2SImode, insn);
14863 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14864 RTX_FRAME_RELATED_P (insn) = 1;
14868 /* Save any floating point call-saved registers used by this
14870 if (TARGET_FPA_EMU2)
14872 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14873 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14875 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14876 insn = gen_rtx_MEM (XFmode, insn);
14877 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14878 RTX_FRAME_RELATED_P (insn) = 1;
14884 start_reg = LAST_FPA_REGNUM;
14886 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14888 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14890 if (start_reg - reg == 3)
14892 insn = emit_sfm (reg, 4);
14893 RTX_FRAME_RELATED_P (insn) = 1;
14895 start_reg = reg - 1;
14900 if (start_reg != reg)
14902 insn = emit_sfm (reg + 1, start_reg - reg);
14903 RTX_FRAME_RELATED_P (insn) = 1;
14904 saved_size += (start_reg - reg) * 12;
14906 start_reg = reg - 1;
14910 if (start_reg != reg)
14912 insn = emit_sfm (reg + 1, start_reg - reg);
14913 saved_size += (start_reg - reg) * 12;
14914 RTX_FRAME_RELATED_P (insn) = 1;
14917 if (TARGET_HARD_FLOAT && TARGET_VFP)
14919 start_reg = FIRST_VFP_REGNUM;
14921 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14923 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14924 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14926 if (start_reg != reg)
14927 saved_size += vfp_emit_fstmd (start_reg,
14928 (reg - start_reg) / 2);
14929 start_reg = reg + 2;
14932 if (start_reg != reg)
14933 saved_size += vfp_emit_fstmd (start_reg,
14934 (reg - start_reg) / 2);
14940 /* Set the Thumb frame pointer from the stack pointer. */
14943 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14945 HOST_WIDE_INT amount;
14948 amount = offsets->outgoing_args - offsets->locals_base;
14950 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14951 stack_pointer_rtx, GEN_INT (amount)));
14954 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14955 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14956 expects the first two operands to be the same. */
14959 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14961 hard_frame_pointer_rtx));
14965 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14966 hard_frame_pointer_rtx,
14967 stack_pointer_rtx));
14969 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14970 plus_constant (stack_pointer_rtx, amount));
14971 RTX_FRAME_RELATED_P (dwarf) = 1;
14972 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14975 RTX_FRAME_RELATED_P (insn) = 1;
14978 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14981 arm_expand_prologue (void)
14986 unsigned long live_regs_mask;
14987 unsigned long func_type;
14989 int saved_pretend_args = 0;
14990 int saved_regs = 0;
14991 unsigned HOST_WIDE_INT args_to_push;
14992 arm_stack_offsets *offsets;
14994 func_type = arm_current_func_type ();
14996 /* Naked functions don't have prologues. */
14997 if (IS_NAKED (func_type))
15000 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15001 args_to_push = crtl->args.pretend_args_size;
15003 /* Compute which register we will have to save onto the stack. */
15004 offsets = arm_get_frame_offsets ();
15005 live_regs_mask = offsets->saved_regs_mask;
15007 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15009 if (IS_STACKALIGN (func_type))
15014 /* Handle a word-aligned stack pointer. We generate the following:
15019 <save and restore r0 in normal prologue/epilogue>
15023 The unwinder doesn't need to know about the stack realignment.
15024 Just tell it we saved SP in r0. */
15025 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15027 r0 = gen_rtx_REG (SImode, 0);
15028 r1 = gen_rtx_REG (SImode, 1);
15029 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15030 compiler won't choke. */
15031 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15032 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15033 insn = gen_movsi (r0, stack_pointer_rtx);
15034 RTX_FRAME_RELATED_P (insn) = 1;
15035 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15037 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15038 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15041 /* For APCS frames, if IP register is clobbered
15042 when creating frame, save that register in a special
15044 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15046 if (IS_INTERRUPT (func_type))
15048 /* Interrupt functions must not corrupt any registers.
15049 Creating a frame pointer however, corrupts the IP
15050 register, so we must push it first. */
15051 insn = emit_multi_reg_push (1 << IP_REGNUM);
15053 /* Do not set RTX_FRAME_RELATED_P on this insn.
15054 The dwarf stack unwinding code only wants to see one
15055 stack decrement per function, and this is not it. If
15056 this instruction is labeled as being part of the frame
15057 creation sequence then dwarf2out_frame_debug_expr will
15058 die when it encounters the assignment of IP to FP
15059 later on, since the use of SP here establishes SP as
15060 the CFA register and not IP.
15062 Anyway this instruction is not really part of the stack
15063 frame creation although it is part of the prologue. */
15065 else if (IS_NESTED (func_type))
15067 /* The Static chain register is the same as the IP register
15068 used as a scratch register during stack frame creation.
15069 To get around this need to find somewhere to store IP
15070 whilst the frame is being created. We try the following
15073 1. The last argument register.
15074 2. A slot on the stack above the frame. (This only
15075 works if the function is not a varargs function).
15076 3. Register r3, after pushing the argument registers
15079 Note - we only need to tell the dwarf2 backend about the SP
15080 adjustment in the second variant; the static chain register
15081 doesn't need to be unwound, as it doesn't contain a value
15082 inherited from the caller. */
15084 if (df_regs_ever_live_p (3) == false)
15085 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15086 else if (args_to_push == 0)
15090 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15093 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15094 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15097 /* Just tell the dwarf backend that we adjusted SP. */
15098 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15099 plus_constant (stack_pointer_rtx,
15101 RTX_FRAME_RELATED_P (insn) = 1;
15102 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15106 /* Store the args on the stack. */
15107 if (cfun->machine->uses_anonymous_args)
15108 insn = emit_multi_reg_push
15109 ((0xf0 >> (args_to_push / 4)) & 0xf);
15112 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15113 GEN_INT (- args_to_push)));
15115 RTX_FRAME_RELATED_P (insn) = 1;
15117 saved_pretend_args = 1;
15118 fp_offset = args_to_push;
15121 /* Now reuse r3 to preserve IP. */
15122 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15126 insn = emit_set_insn (ip_rtx,
15127 plus_constant (stack_pointer_rtx, fp_offset));
15128 RTX_FRAME_RELATED_P (insn) = 1;
15133 /* Push the argument registers, or reserve space for them. */
15134 if (cfun->machine->uses_anonymous_args)
15135 insn = emit_multi_reg_push
15136 ((0xf0 >> (args_to_push / 4)) & 0xf);
15139 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15140 GEN_INT (- args_to_push)));
15141 RTX_FRAME_RELATED_P (insn) = 1;
15144 /* If this is an interrupt service routine, and the link register
15145 is going to be pushed, and we're not generating extra
15146 push of IP (needed when frame is needed and frame layout if apcs),
15147 subtracting four from LR now will mean that the function return
15148 can be done with a single instruction. */
15149 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15150 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15151 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15154 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15156 emit_set_insn (lr, plus_constant (lr, -4));
15159 if (live_regs_mask)
15161 saved_regs += bit_count (live_regs_mask) * 4;
15162 if (optimize_size && !frame_pointer_needed
15163 && saved_regs == offsets->saved_regs - offsets->saved_args)
15165 /* If no coprocessor registers are being pushed and we don't have
15166 to worry about a frame pointer then push extra registers to
15167 create the stack frame. This is done is a way that does not
15168 alter the frame layout, so is independent of the epilogue. */
15172 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15174 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15175 if (frame && n * 4 >= frame)
15178 live_regs_mask |= (1 << n) - 1;
15179 saved_regs += frame;
15182 insn = emit_multi_reg_push (live_regs_mask);
15183 RTX_FRAME_RELATED_P (insn) = 1;
15186 if (! IS_VOLATILE (func_type))
15187 saved_regs += arm_save_coproc_regs ();
15189 if (frame_pointer_needed && TARGET_ARM)
15191 /* Create the new frame pointer. */
15192 if (TARGET_APCS_FRAME)
15194 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15195 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15196 RTX_FRAME_RELATED_P (insn) = 1;
15198 if (IS_NESTED (func_type))
15200 /* Recover the static chain register. */
15201 if (!df_regs_ever_live_p (3)
15202 || saved_pretend_args)
15203 insn = gen_rtx_REG (SImode, 3);
15204 else /* if (crtl->args.pretend_args_size == 0) */
15206 insn = plus_constant (hard_frame_pointer_rtx, 4);
15207 insn = gen_frame_mem (SImode, insn);
15209 emit_set_insn (ip_rtx, insn);
15210 /* Add a USE to stop propagate_one_insn() from barfing. */
15211 emit_insn (gen_prologue_use (ip_rtx));
15216 insn = GEN_INT (saved_regs - 4);
15217 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15218 stack_pointer_rtx, insn));
15219 RTX_FRAME_RELATED_P (insn) = 1;
15223 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15225 /* This add can produce multiple insns for a large constant, so we
15226 need to get tricky. */
15227 rtx last = get_last_insn ();
15229 amount = GEN_INT (offsets->saved_args + saved_regs
15230 - offsets->outgoing_args);
15232 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15236 last = last ? NEXT_INSN (last) : get_insns ();
15237 RTX_FRAME_RELATED_P (last) = 1;
15239 while (last != insn);
15241 /* If the frame pointer is needed, emit a special barrier that
15242 will prevent the scheduler from moving stores to the frame
15243 before the stack adjustment. */
15244 if (frame_pointer_needed)
15245 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15246 hard_frame_pointer_rtx));
15250 if (frame_pointer_needed && TARGET_THUMB2)
15251 thumb_set_frame_pointer (offsets);
15253 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15255 unsigned long mask;
15257 mask = live_regs_mask;
15258 mask &= THUMB2_WORK_REGS;
15259 if (!IS_NESTED (func_type))
15260 mask |= (1 << IP_REGNUM);
15261 arm_load_pic_register (mask);
15264 /* If we are profiling, make sure no instructions are scheduled before
15265 the call to mcount. Similarly if the user has requested no
15266 scheduling in the prolog. Similarly if we want non-call exceptions
15267 using the EABI unwinder, to prevent faulting instructions from being
15268 swapped with a stack adjustment. */
15269 if (crtl->profile || !TARGET_SCHED_PROLOG
15270 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15271 emit_insn (gen_blockage ());
15273 /* If the link register is being kept alive, with the return address in it,
15274 then make sure that it does not get reused by the ce2 pass. */
15275 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15276 cfun->machine->lr_save_eliminated = 1;
15279 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15281 arm_print_condition (FILE *stream)
15283 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15285 /* Branch conversion is not implemented for Thumb-2. */
15288 output_operand_lossage ("predicated Thumb instruction");
15291 if (current_insn_predicate != NULL)
15293 output_operand_lossage
15294 ("predicated instruction in conditional sequence");
15298 fputs (arm_condition_codes[arm_current_cc], stream);
15300 else if (current_insn_predicate)
15302 enum arm_cond_code code;
15306 output_operand_lossage ("predicated Thumb instruction");
15310 code = get_arm_condition_code (current_insn_predicate);
15311 fputs (arm_condition_codes[code], stream);
15316 /* If CODE is 'd', then the X is a condition operand and the instruction
15317 should only be executed if the condition is true.
15318 if CODE is 'D', then the X is a condition operand and the instruction
15319 should only be executed if the condition is false: however, if the mode
15320 of the comparison is CCFPEmode, then always execute the instruction -- we
15321 do this because in these circumstances !GE does not necessarily imply LT;
15322 in these cases the instruction pattern will take care to make sure that
15323 an instruction containing %d will follow, thereby undoing the effects of
15324 doing this instruction unconditionally.
15325 If CODE is 'N' then X is a floating point operand that must be negated
15327 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15328 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15330 arm_print_operand (FILE *stream, rtx x, int code)
15335 fputs (ASM_COMMENT_START, stream);
15339 fputs (user_label_prefix, stream);
15343 fputs (REGISTER_PREFIX, stream);
15347 arm_print_condition (stream);
15351 /* Nothing in unified syntax, otherwise the current condition code. */
15352 if (!TARGET_UNIFIED_ASM)
15353 arm_print_condition (stream);
15357 /* The current condition code in unified syntax, otherwise nothing. */
15358 if (TARGET_UNIFIED_ASM)
15359 arm_print_condition (stream);
15363 /* The current condition code for a condition code setting instruction.
15364 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15365 if (TARGET_UNIFIED_ASM)
15367 fputc('s', stream);
15368 arm_print_condition (stream);
15372 arm_print_condition (stream);
15373 fputc('s', stream);
15378 /* If the instruction is conditionally executed then print
15379 the current condition code, otherwise print 's'. */
15380 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15381 if (current_insn_predicate)
15382 arm_print_condition (stream);
15384 fputc('s', stream);
15387 /* %# is a "break" sequence. It doesn't output anything, but is used to
15388 separate e.g. operand numbers from following text, if that text consists
15389 of further digits which we don't want to be part of the operand
15397 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15398 r = real_value_negate (&r);
15399 fprintf (stream, "%s", fp_const_from_val (&r));
15403 /* An integer or symbol address without a preceding # sign. */
15405 switch (GET_CODE (x))
15408 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15412 output_addr_const (stream, x);
15416 gcc_unreachable ();
15421 if (GET_CODE (x) == CONST_INT)
15424 val = ARM_SIGN_EXTEND (~INTVAL (x));
15425 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15429 putc ('~', stream);
15430 output_addr_const (stream, x);
15435 /* The low 16 bits of an immediate constant. */
15436 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15440 fprintf (stream, "%s", arithmetic_instr (x, 1));
15443 /* Truncate Cirrus shift counts. */
15445 if (GET_CODE (x) == CONST_INT)
15447 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15450 arm_print_operand (stream, x, 0);
15454 fprintf (stream, "%s", arithmetic_instr (x, 0));
15462 if (!shift_operator (x, SImode))
15464 output_operand_lossage ("invalid shift operand");
15468 shift = shift_op (x, &val);
15472 fprintf (stream, ", %s ", shift);
15474 arm_print_operand (stream, XEXP (x, 1), 0);
15476 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15481 /* An explanation of the 'Q', 'R' and 'H' register operands:
15483 In a pair of registers containing a DI or DF value the 'Q'
15484 operand returns the register number of the register containing
15485 the least significant part of the value. The 'R' operand returns
15486 the register number of the register containing the most
15487 significant part of the value.
15489 The 'H' operand returns the higher of the two register numbers.
15490 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15491 same as the 'Q' operand, since the most significant part of the
15492 value is held in the lower number register. The reverse is true
15493 on systems where WORDS_BIG_ENDIAN is false.
15495 The purpose of these operands is to distinguish between cases
15496 where the endian-ness of the values is important (for example
15497 when they are added together), and cases where the endian-ness
15498 is irrelevant, but the order of register operations is important.
15499 For example when loading a value from memory into a register
15500 pair, the endian-ness does not matter. Provided that the value
15501 from the lower memory address is put into the lower numbered
15502 register, and the value from the higher address is put into the
15503 higher numbered register, the load will work regardless of whether
15504 the value being loaded is big-wordian or little-wordian. The
15505 order of the two register loads can matter however, if the address
15506 of the memory location is actually held in one of the registers
15507 being overwritten by the load.
15509 The 'Q' and 'R' constraints are also available for 64-bit
15512 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15514 rtx part = gen_lowpart (SImode, x);
15515 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15519 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15521 output_operand_lossage ("invalid operand for code '%c'", code);
15525 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15529 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15531 enum machine_mode mode = GET_MODE (x);
15534 if (mode == VOIDmode)
15536 part = gen_highpart_mode (SImode, mode, x);
15537 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15541 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15543 output_operand_lossage ("invalid operand for code '%c'", code);
15547 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15551 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15553 output_operand_lossage ("invalid operand for code '%c'", code);
15557 asm_fprintf (stream, "%r", REGNO (x) + 1);
15561 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15563 output_operand_lossage ("invalid operand for code '%c'", code);
15567 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15571 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15573 output_operand_lossage ("invalid operand for code '%c'", code);
15577 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15581 asm_fprintf (stream, "%r",
15582 GET_CODE (XEXP (x, 0)) == REG
15583 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15587 asm_fprintf (stream, "{%r-%r}",
15589 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15592 /* Like 'M', but writing doubleword vector registers, for use by Neon
15596 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15597 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15599 asm_fprintf (stream, "{d%d}", regno);
15601 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15606 /* CONST_TRUE_RTX means always -- that's the default. */
15607 if (x == const_true_rtx)
15610 if (!COMPARISON_P (x))
15612 output_operand_lossage ("invalid operand for code '%c'", code);
15616 fputs (arm_condition_codes[get_arm_condition_code (x)],
15621 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15622 want to do that. */
15623 if (x == const_true_rtx)
15625 output_operand_lossage ("instruction never executed");
15628 if (!COMPARISON_P (x))
15630 output_operand_lossage ("invalid operand for code '%c'", code);
15634 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15635 (get_arm_condition_code (x))],
15639 /* Cirrus registers can be accessed in a variety of ways:
15640 single floating point (f)
15641 double floating point (d)
15643 64bit integer (dx). */
15644 case 'W': /* Cirrus register in F mode. */
15645 case 'X': /* Cirrus register in D mode. */
15646 case 'Y': /* Cirrus register in FX mode. */
15647 case 'Z': /* Cirrus register in DX mode. */
15648 gcc_assert (GET_CODE (x) == REG
15649 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15651 fprintf (stream, "mv%s%s",
15653 : code == 'X' ? "d"
15654 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15658 /* Print cirrus register in the mode specified by the register's mode. */
15661 int mode = GET_MODE (x);
15663 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15665 output_operand_lossage ("invalid operand for code '%c'", code);
15669 fprintf (stream, "mv%s%s",
15670 mode == DFmode ? "d"
15671 : mode == SImode ? "fx"
15672 : mode == DImode ? "dx"
15673 : "f", reg_names[REGNO (x)] + 2);
15679 if (GET_CODE (x) != REG
15680 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15681 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15682 /* Bad value for wCG register number. */
15684 output_operand_lossage ("invalid operand for code '%c'", code);
15689 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15692 /* Print an iWMMXt control register name. */
15694 if (GET_CODE (x) != CONST_INT
15696 || INTVAL (x) >= 16)
15697 /* Bad value for wC register number. */
15699 output_operand_lossage ("invalid operand for code '%c'", code);
15705 static const char * wc_reg_names [16] =
15707 "wCID", "wCon", "wCSSF", "wCASF",
15708 "wC4", "wC5", "wC6", "wC7",
15709 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15710 "wC12", "wC13", "wC14", "wC15"
15713 fprintf (stream, wc_reg_names [INTVAL (x)]);
15717 /* Print the high single-precision register of a VFP double-precision
15721 int mode = GET_MODE (x);
15724 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15726 output_operand_lossage ("invalid operand for code '%c'", code);
15731 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15733 output_operand_lossage ("invalid operand for code '%c'", code);
15737 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15741 /* Print a VFP/Neon double precision or quad precision register name. */
15745 int mode = GET_MODE (x);
15746 int is_quad = (code == 'q');
15749 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15751 output_operand_lossage ("invalid operand for code '%c'", code);
15755 if (GET_CODE (x) != REG
15756 || !IS_VFP_REGNUM (REGNO (x)))
15758 output_operand_lossage ("invalid operand for code '%c'", code);
15763 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15764 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15766 output_operand_lossage ("invalid operand for code '%c'", code);
15770 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15771 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15775 /* These two codes print the low/high doubleword register of a Neon quad
15776 register, respectively. For pair-structure types, can also print
15777 low/high quadword registers. */
15781 int mode = GET_MODE (x);
15784 if ((GET_MODE_SIZE (mode) != 16
15785 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15787 output_operand_lossage ("invalid operand for code '%c'", code);
15792 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15794 output_operand_lossage ("invalid operand for code '%c'", code);
15798 if (GET_MODE_SIZE (mode) == 16)
15799 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15800 + (code == 'f' ? 1 : 0));
15802 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15803 + (code == 'f' ? 1 : 0));
15807 /* Print a VFPv3 floating-point constant, represented as an integer
15811 int index = vfp3_const_double_index (x);
15812 gcc_assert (index != -1);
15813 fprintf (stream, "%d", index);
15817 /* Print bits representing opcode features for Neon.
15819 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15820 and polynomials as unsigned.
15822 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15824 Bit 2 is 1 for rounding functions, 0 otherwise. */
15826 /* Identify the type as 's', 'u', 'p' or 'f'. */
15829 HOST_WIDE_INT bits = INTVAL (x);
15830 fputc ("uspf"[bits & 3], stream);
15834 /* Likewise, but signed and unsigned integers are both 'i'. */
15837 HOST_WIDE_INT bits = INTVAL (x);
15838 fputc ("iipf"[bits & 3], stream);
15842 /* As for 'T', but emit 'u' instead of 'p'. */
15845 HOST_WIDE_INT bits = INTVAL (x);
15846 fputc ("usuf"[bits & 3], stream);
15850 /* Bit 2: rounding (vs none). */
15853 HOST_WIDE_INT bits = INTVAL (x);
15854 fputs ((bits & 4) != 0 ? "r" : "", stream);
15858 /* Memory operand for vld1/vst1 instruction. */
15862 bool postinc = FALSE;
15863 gcc_assert (GET_CODE (x) == MEM);
15864 addr = XEXP (x, 0);
15865 if (GET_CODE (addr) == POST_INC)
15868 addr = XEXP (addr, 0);
15870 asm_fprintf (stream, "[%r]", REGNO (addr));
15872 fputs("!", stream);
15876 /* Translate an S register number into a D register number and element index. */
15879 int mode = GET_MODE (x);
15882 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15884 output_operand_lossage ("invalid operand for code '%c'", code);
15889 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15891 output_operand_lossage ("invalid operand for code '%c'", code);
15895 regno = regno - FIRST_VFP_REGNUM;
15896 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15900 /* Register specifier for vld1.16/vst1.16. Translate the S register
15901 number into a D register number and element index. */
15904 int mode = GET_MODE (x);
15907 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15909 output_operand_lossage ("invalid operand for code '%c'", code);
15914 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15916 output_operand_lossage ("invalid operand for code '%c'", code);
15920 regno = regno - FIRST_VFP_REGNUM;
15921 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15928 output_operand_lossage ("missing operand");
15932 switch (GET_CODE (x))
15935 asm_fprintf (stream, "%r", REGNO (x));
15939 output_memory_reference_mode = GET_MODE (x);
15940 output_address (XEXP (x, 0));
15947 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15948 sizeof (fpstr), 0, 1);
15949 fprintf (stream, "#%s", fpstr);
15952 fprintf (stream, "#%s", fp_immediate_constant (x));
15956 gcc_assert (GET_CODE (x) != NEG);
15957 fputc ('#', stream);
15958 if (GET_CODE (x) == HIGH)
15960 fputs (":lower16:", stream);
15964 output_addr_const (stream, x);
15970 /* Target hook for printing a memory address. */
15972 arm_print_operand_address (FILE *stream, rtx x)
15976 int is_minus = GET_CODE (x) == MINUS;
15978 if (GET_CODE (x) == REG)
15979 asm_fprintf (stream, "[%r, #0]", REGNO (x));
15980 else if (GET_CODE (x) == PLUS || is_minus)
15982 rtx base = XEXP (x, 0);
15983 rtx index = XEXP (x, 1);
15984 HOST_WIDE_INT offset = 0;
15985 if (GET_CODE (base) != REG
15986 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
15988 /* Ensure that BASE is a register. */
15989 /* (one of them must be). */
15990 /* Also ensure the SP is not used as in index register. */
15995 switch (GET_CODE (index))
15998 offset = INTVAL (index);
16001 asm_fprintf (stream, "[%r, #%wd]",
16002 REGNO (base), offset);
16006 asm_fprintf (stream, "[%r, %s%r]",
16007 REGNO (base), is_minus ? "-" : "",
16017 asm_fprintf (stream, "[%r, %s%r",
16018 REGNO (base), is_minus ? "-" : "",
16019 REGNO (XEXP (index, 0)));
16020 arm_print_operand (stream, index, 'S');
16021 fputs ("]", stream);
16026 gcc_unreachable ();
16029 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16030 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16032 extern enum machine_mode output_memory_reference_mode;
16034 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16036 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16037 asm_fprintf (stream, "[%r, #%s%d]!",
16038 REGNO (XEXP (x, 0)),
16039 GET_CODE (x) == PRE_DEC ? "-" : "",
16040 GET_MODE_SIZE (output_memory_reference_mode));
16042 asm_fprintf (stream, "[%r], #%s%d",
16043 REGNO (XEXP (x, 0)),
16044 GET_CODE (x) == POST_DEC ? "-" : "",
16045 GET_MODE_SIZE (output_memory_reference_mode));
16047 else if (GET_CODE (x) == PRE_MODIFY)
16049 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16050 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16051 asm_fprintf (stream, "#%wd]!",
16052 INTVAL (XEXP (XEXP (x, 1), 1)));
16054 asm_fprintf (stream, "%r]!",
16055 REGNO (XEXP (XEXP (x, 1), 1)));
16057 else if (GET_CODE (x) == POST_MODIFY)
16059 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16060 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16061 asm_fprintf (stream, "#%wd",
16062 INTVAL (XEXP (XEXP (x, 1), 1)));
16064 asm_fprintf (stream, "%r",
16065 REGNO (XEXP (XEXP (x, 1), 1)));
16067 else output_addr_const (stream, x);
16071 if (GET_CODE (x) == REG)
16072 asm_fprintf (stream, "[%r]", REGNO (x));
16073 else if (GET_CODE (x) == POST_INC)
16074 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16075 else if (GET_CODE (x) == PLUS)
16077 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16078 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16079 asm_fprintf (stream, "[%r, #%wd]",
16080 REGNO (XEXP (x, 0)),
16081 INTVAL (XEXP (x, 1)));
16083 asm_fprintf (stream, "[%r, %r]",
16084 REGNO (XEXP (x, 0)),
16085 REGNO (XEXP (x, 1)));
16088 output_addr_const (stream, x);
16092 /* Target hook for indicating whether a punctuation character for
16093 TARGET_PRINT_OPERAND is valid. */
16095 arm_print_operand_punct_valid_p (unsigned char code)
16097 return (code == '@' || code == '|' || code == '.'
16098 || code == '(' || code == ')' || code == '#'
16099 || (TARGET_32BIT && (code == '?'))
16100 || (TARGET_THUMB2 && (code == '!'))
16101 || (TARGET_THUMB && (code == '_')));
16104 /* Target hook for assembling integer objects. The ARM version needs to
16105 handle word-sized values specially. */
16107 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16109 enum machine_mode mode;
16111 if (size == UNITS_PER_WORD && aligned_p)
16113 fputs ("\t.word\t", asm_out_file);
16114 output_addr_const (asm_out_file, x);
16116 /* Mark symbols as position independent. We only do this in the
16117 .text segment, not in the .data segment. */
16118 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16119 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16121 /* See legitimize_pic_address for an explanation of the
16122 TARGET_VXWORKS_RTP check. */
16123 if (TARGET_VXWORKS_RTP
16124 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16125 fputs ("(GOT)", asm_out_file);
16127 fputs ("(GOTOFF)", asm_out_file);
16129 fputc ('\n', asm_out_file);
16133 mode = GET_MODE (x);
16135 if (arm_vector_mode_supported_p (mode))
16139 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16141 units = CONST_VECTOR_NUNITS (x);
16142 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16144 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16145 for (i = 0; i < units; i++)
16147 rtx elt = CONST_VECTOR_ELT (x, i);
16149 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16152 for (i = 0; i < units; i++)
16154 rtx elt = CONST_VECTOR_ELT (x, i);
16155 REAL_VALUE_TYPE rval;
16157 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16160 (rval, GET_MODE_INNER (mode),
16161 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16167 return default_assemble_integer (x, size, aligned_p);
16171 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16175 if (!TARGET_AAPCS_BASED)
16178 default_named_section_asm_out_constructor
16179 : default_named_section_asm_out_destructor) (symbol, priority);
16183 /* Put these in the .init_array section, using a special relocation. */
16184 if (priority != DEFAULT_INIT_PRIORITY)
16187 sprintf (buf, "%s.%.5u",
16188 is_ctor ? ".init_array" : ".fini_array",
16190 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16197 switch_to_section (s);
16198 assemble_align (POINTER_SIZE);
16199 fputs ("\t.word\t", asm_out_file);
16200 output_addr_const (asm_out_file, symbol);
16201 fputs ("(target1)\n", asm_out_file);
16204 /* Add a function to the list of static constructors. */
16207 arm_elf_asm_constructor (rtx symbol, int priority)
16209 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16212 /* Add a function to the list of static destructors. */
16215 arm_elf_asm_destructor (rtx symbol, int priority)
16217 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16220 /* A finite state machine takes care of noticing whether or not instructions
16221 can be conditionally executed, and thus decrease execution time and code
16222 size by deleting branch instructions. The fsm is controlled by
16223 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16225 /* The state of the fsm controlling condition codes are:
16226 0: normal, do nothing special
16227 1: make ASM_OUTPUT_OPCODE not output this instruction
16228 2: make ASM_OUTPUT_OPCODE not output this instruction
16229 3: make instructions conditional
16230 4: make instructions conditional
16232 State transitions (state->state by whom under condition):
16233 0 -> 1 final_prescan_insn if the `target' is a label
16234 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16235 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16236 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16237 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16238 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16239 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16240 (the target insn is arm_target_insn).
16242 If the jump clobbers the conditions then we use states 2 and 4.
16244 A similar thing can be done with conditional return insns.
16246 XXX In case the `target' is an unconditional branch, this conditionalising
16247 of the instructions always reduces code size, but not always execution
16248 time. But then, I want to reduce the code size to somewhere near what
16249 /bin/cc produces. */
16251 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16252 instructions. When a COND_EXEC instruction is seen the subsequent
16253 instructions are scanned so that multiple conditional instructions can be
16254 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16255 specify the length and true/false mask for the IT block. These will be
16256 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16258 /* Returns the index of the ARM condition code string in
16259 `arm_condition_codes'. COMPARISON should be an rtx like
16260 `(eq (...) (...))'. */
16261 static enum arm_cond_code
16262 get_arm_condition_code (rtx comparison)
16264 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16265 enum arm_cond_code code;
16266 enum rtx_code comp_code = GET_CODE (comparison);
16268 if (GET_MODE_CLASS (mode) != MODE_CC)
16269 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16270 XEXP (comparison, 1));
16274 case CC_DNEmode: code = ARM_NE; goto dominance;
16275 case CC_DEQmode: code = ARM_EQ; goto dominance;
16276 case CC_DGEmode: code = ARM_GE; goto dominance;
16277 case CC_DGTmode: code = ARM_GT; goto dominance;
16278 case CC_DLEmode: code = ARM_LE; goto dominance;
16279 case CC_DLTmode: code = ARM_LT; goto dominance;
16280 case CC_DGEUmode: code = ARM_CS; goto dominance;
16281 case CC_DGTUmode: code = ARM_HI; goto dominance;
16282 case CC_DLEUmode: code = ARM_LS; goto dominance;
16283 case CC_DLTUmode: code = ARM_CC;
16286 gcc_assert (comp_code == EQ || comp_code == NE);
16288 if (comp_code == EQ)
16289 return ARM_INVERSE_CONDITION_CODE (code);
16295 case NE: return ARM_NE;
16296 case EQ: return ARM_EQ;
16297 case GE: return ARM_PL;
16298 case LT: return ARM_MI;
16299 default: gcc_unreachable ();
16305 case NE: return ARM_NE;
16306 case EQ: return ARM_EQ;
16307 default: gcc_unreachable ();
16313 case NE: return ARM_MI;
16314 case EQ: return ARM_PL;
16315 default: gcc_unreachable ();
16320 /* These encodings assume that AC=1 in the FPA system control
16321 byte. This allows us to handle all cases except UNEQ and
16325 case GE: return ARM_GE;
16326 case GT: return ARM_GT;
16327 case LE: return ARM_LS;
16328 case LT: return ARM_MI;
16329 case NE: return ARM_NE;
16330 case EQ: return ARM_EQ;
16331 case ORDERED: return ARM_VC;
16332 case UNORDERED: return ARM_VS;
16333 case UNLT: return ARM_LT;
16334 case UNLE: return ARM_LE;
16335 case UNGT: return ARM_HI;
16336 case UNGE: return ARM_PL;
16337 /* UNEQ and LTGT do not have a representation. */
16338 case UNEQ: /* Fall through. */
16339 case LTGT: /* Fall through. */
16340 default: gcc_unreachable ();
16346 case NE: return ARM_NE;
16347 case EQ: return ARM_EQ;
16348 case GE: return ARM_LE;
16349 case GT: return ARM_LT;
16350 case LE: return ARM_GE;
16351 case LT: return ARM_GT;
16352 case GEU: return ARM_LS;
16353 case GTU: return ARM_CC;
16354 case LEU: return ARM_CS;
16355 case LTU: return ARM_HI;
16356 default: gcc_unreachable ();
16362 case LTU: return ARM_CS;
16363 case GEU: return ARM_CC;
16364 default: gcc_unreachable ();
16370 case NE: return ARM_NE;
16371 case EQ: return ARM_EQ;
16372 case GEU: return ARM_CS;
16373 case GTU: return ARM_HI;
16374 case LEU: return ARM_LS;
16375 case LTU: return ARM_CC;
16376 default: gcc_unreachable ();
16382 case GE: return ARM_GE;
16383 case LT: return ARM_LT;
16384 case GEU: return ARM_CS;
16385 case LTU: return ARM_CC;
16386 default: gcc_unreachable ();
16392 case NE: return ARM_NE;
16393 case EQ: return ARM_EQ;
16394 case GE: return ARM_GE;
16395 case GT: return ARM_GT;
16396 case LE: return ARM_LE;
16397 case LT: return ARM_LT;
16398 case GEU: return ARM_CS;
16399 case GTU: return ARM_HI;
16400 case LEU: return ARM_LS;
16401 case LTU: return ARM_CC;
16402 default: gcc_unreachable ();
16405 default: gcc_unreachable ();
16409 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16412 thumb2_final_prescan_insn (rtx insn)
16414 rtx first_insn = insn;
16415 rtx body = PATTERN (insn);
16417 enum arm_cond_code code;
16421 /* Remove the previous insn from the count of insns to be output. */
16422 if (arm_condexec_count)
16423 arm_condexec_count--;
16425 /* Nothing to do if we are already inside a conditional block. */
16426 if (arm_condexec_count)
16429 if (GET_CODE (body) != COND_EXEC)
16432 /* Conditional jumps are implemented directly. */
16433 if (GET_CODE (insn) == JUMP_INSN)
16436 predicate = COND_EXEC_TEST (body);
16437 arm_current_cc = get_arm_condition_code (predicate);
16439 n = get_attr_ce_count (insn);
16440 arm_condexec_count = 1;
16441 arm_condexec_mask = (1 << n) - 1;
16442 arm_condexec_masklen = n;
16443 /* See if subsequent instructions can be combined into the same block. */
16446 insn = next_nonnote_insn (insn);
16448 /* Jumping into the middle of an IT block is illegal, so a label or
16449 barrier terminates the block. */
16450 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16453 body = PATTERN (insn);
16454 /* USE and CLOBBER aren't really insns, so just skip them. */
16455 if (GET_CODE (body) == USE
16456 || GET_CODE (body) == CLOBBER)
16459 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16460 if (GET_CODE (body) != COND_EXEC)
16462 /* Allow up to 4 conditionally executed instructions in a block. */
16463 n = get_attr_ce_count (insn);
16464 if (arm_condexec_masklen + n > 4)
16467 predicate = COND_EXEC_TEST (body);
16468 code = get_arm_condition_code (predicate);
16469 mask = (1 << n) - 1;
16470 if (arm_current_cc == code)
16471 arm_condexec_mask |= (mask << arm_condexec_masklen);
16472 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16475 arm_condexec_count++;
16476 arm_condexec_masklen += n;
16478 /* A jump must be the last instruction in a conditional block. */
16479 if (GET_CODE(insn) == JUMP_INSN)
16482 /* Restore recog_data (getting the attributes of other insns can
16483 destroy this array, but final.c assumes that it remains intact
16484 across this call). */
16485 extract_constrain_insn_cached (first_insn);
16489 arm_final_prescan_insn (rtx insn)
16491 /* BODY will hold the body of INSN. */
16492 rtx body = PATTERN (insn);
16494 /* This will be 1 if trying to repeat the trick, and things need to be
16495 reversed if it appears to fail. */
16498 /* If we start with a return insn, we only succeed if we find another one. */
16499 int seeking_return = 0;
16501 /* START_INSN will hold the insn from where we start looking. This is the
16502 first insn after the following code_label if REVERSE is true. */
16503 rtx start_insn = insn;
16505 /* If in state 4, check if the target branch is reached, in order to
16506 change back to state 0. */
16507 if (arm_ccfsm_state == 4)
16509 if (insn == arm_target_insn)
16511 arm_target_insn = NULL;
16512 arm_ccfsm_state = 0;
16517 /* If in state 3, it is possible to repeat the trick, if this insn is an
16518 unconditional branch to a label, and immediately following this branch
16519 is the previous target label which is only used once, and the label this
16520 branch jumps to is not too far off. */
16521 if (arm_ccfsm_state == 3)
16523 if (simplejump_p (insn))
16525 start_insn = next_nonnote_insn (start_insn);
16526 if (GET_CODE (start_insn) == BARRIER)
16528 /* XXX Isn't this always a barrier? */
16529 start_insn = next_nonnote_insn (start_insn);
16531 if (GET_CODE (start_insn) == CODE_LABEL
16532 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16533 && LABEL_NUSES (start_insn) == 1)
16538 else if (GET_CODE (body) == RETURN)
16540 start_insn = next_nonnote_insn (start_insn);
16541 if (GET_CODE (start_insn) == BARRIER)
16542 start_insn = next_nonnote_insn (start_insn);
16543 if (GET_CODE (start_insn) == CODE_LABEL
16544 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16545 && LABEL_NUSES (start_insn) == 1)
16548 seeking_return = 1;
16557 gcc_assert (!arm_ccfsm_state || reverse);
16558 if (GET_CODE (insn) != JUMP_INSN)
16561 /* This jump might be paralleled with a clobber of the condition codes
16562 the jump should always come first */
16563 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16564 body = XVECEXP (body, 0, 0);
16567 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16568 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16571 int fail = FALSE, succeed = FALSE;
16572 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16573 int then_not_else = TRUE;
16574 rtx this_insn = start_insn, label = 0;
16576 /* Register the insn jumped to. */
16579 if (!seeking_return)
16580 label = XEXP (SET_SRC (body), 0);
16582 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16583 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16584 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16586 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16587 then_not_else = FALSE;
16589 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16590 seeking_return = 1;
16591 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16593 seeking_return = 1;
16594 then_not_else = FALSE;
16597 gcc_unreachable ();
16599 /* See how many insns this branch skips, and what kind of insns. If all
16600 insns are okay, and the label or unconditional branch to the same
16601 label is not too far away, succeed. */
16602 for (insns_skipped = 0;
16603 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16607 this_insn = next_nonnote_insn (this_insn);
16611 switch (GET_CODE (this_insn))
16614 /* Succeed if it is the target label, otherwise fail since
16615 control falls in from somewhere else. */
16616 if (this_insn == label)
16618 arm_ccfsm_state = 1;
16626 /* Succeed if the following insn is the target label.
16628 If return insns are used then the last insn in a function
16629 will be a barrier. */
16630 this_insn = next_nonnote_insn (this_insn);
16631 if (this_insn && this_insn == label)
16633 arm_ccfsm_state = 1;
16641 /* The AAPCS says that conditional calls should not be
16642 used since they make interworking inefficient (the
16643 linker can't transform BL<cond> into BLX). That's
16644 only a problem if the machine has BLX. */
16651 /* Succeed if the following insn is the target label, or
16652 if the following two insns are a barrier and the
16654 this_insn = next_nonnote_insn (this_insn);
16655 if (this_insn && GET_CODE (this_insn) == BARRIER)
16656 this_insn = next_nonnote_insn (this_insn);
16658 if (this_insn && this_insn == label
16659 && insns_skipped < max_insns_skipped)
16661 arm_ccfsm_state = 1;
16669 /* If this is an unconditional branch to the same label, succeed.
16670 If it is to another label, do nothing. If it is conditional,
16672 /* XXX Probably, the tests for SET and the PC are
16675 scanbody = PATTERN (this_insn);
16676 if (GET_CODE (scanbody) == SET
16677 && GET_CODE (SET_DEST (scanbody)) == PC)
16679 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16680 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16682 arm_ccfsm_state = 2;
16685 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16688 /* Fail if a conditional return is undesirable (e.g. on a
16689 StrongARM), but still allow this if optimizing for size. */
16690 else if (GET_CODE (scanbody) == RETURN
16691 && !use_return_insn (TRUE, NULL)
16694 else if (GET_CODE (scanbody) == RETURN
16697 arm_ccfsm_state = 2;
16700 else if (GET_CODE (scanbody) == PARALLEL)
16702 switch (get_attr_conds (this_insn))
16712 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16717 /* Instructions using or affecting the condition codes make it
16719 scanbody = PATTERN (this_insn);
16720 if (!(GET_CODE (scanbody) == SET
16721 || GET_CODE (scanbody) == PARALLEL)
16722 || get_attr_conds (this_insn) != CONDS_NOCOND)
16725 /* A conditional cirrus instruction must be followed by
16726 a non Cirrus instruction. However, since we
16727 conditionalize instructions in this function and by
16728 the time we get here we can't add instructions
16729 (nops), because shorten_branches() has already been
16730 called, we will disable conditionalizing Cirrus
16731 instructions to be safe. */
16732 if (GET_CODE (scanbody) != USE
16733 && GET_CODE (scanbody) != CLOBBER
16734 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16744 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16745 arm_target_label = CODE_LABEL_NUMBER (label);
16748 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16750 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16752 this_insn = next_nonnote_insn (this_insn);
16753 gcc_assert (!this_insn
16754 || (GET_CODE (this_insn) != BARRIER
16755 && GET_CODE (this_insn) != CODE_LABEL));
16759 /* Oh, dear! we ran off the end.. give up. */
16760 extract_constrain_insn_cached (insn);
16761 arm_ccfsm_state = 0;
16762 arm_target_insn = NULL;
16765 arm_target_insn = this_insn;
16768 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16771 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16773 if (reverse || then_not_else)
16774 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16777 /* Restore recog_data (getting the attributes of other insns can
16778 destroy this array, but final.c assumes that it remains intact
16779 across this call. */
16780 extract_constrain_insn_cached (insn);
16784 /* Output IT instructions. */
16786 thumb2_asm_output_opcode (FILE * stream)
16791 if (arm_condexec_mask)
16793 for (n = 0; n < arm_condexec_masklen; n++)
16794 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16796 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16797 arm_condition_codes[arm_current_cc]);
16798 arm_condexec_mask = 0;
16802 /* Returns true if REGNO is a valid register
16803 for holding a quantity of type MODE. */
16805 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16807 if (GET_MODE_CLASS (mode) == MODE_CC)
16808 return (regno == CC_REGNUM
16809 || (TARGET_HARD_FLOAT && TARGET_VFP
16810 && regno == VFPCC_REGNUM));
16813 /* For the Thumb we only allow values bigger than SImode in
16814 registers 0 - 6, so that there is always a second low
16815 register available to hold the upper part of the value.
16816 We probably we ought to ensure that the register is the
16817 start of an even numbered register pair. */
16818 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16820 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16821 && IS_CIRRUS_REGNUM (regno))
16822 /* We have outlawed SI values in Cirrus registers because they
16823 reside in the lower 32 bits, but SF values reside in the
16824 upper 32 bits. This causes gcc all sorts of grief. We can't
16825 even split the registers into pairs because Cirrus SI values
16826 get sign extended to 64bits-- aldyh. */
16827 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16829 if (TARGET_HARD_FLOAT && TARGET_VFP
16830 && IS_VFP_REGNUM (regno))
16832 if (mode == SFmode || mode == SImode)
16833 return VFP_REGNO_OK_FOR_SINGLE (regno);
16835 if (mode == DFmode)
16836 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16838 /* VFP registers can hold HFmode values, but there is no point in
16839 putting them there unless we have hardware conversion insns. */
16840 if (mode == HFmode)
16841 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16844 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16845 || (VALID_NEON_QREG_MODE (mode)
16846 && NEON_REGNO_OK_FOR_QUAD (regno))
16847 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16848 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16849 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16850 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16851 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16856 if (TARGET_REALLY_IWMMXT)
16858 if (IS_IWMMXT_GR_REGNUM (regno))
16859 return mode == SImode;
16861 if (IS_IWMMXT_REGNUM (regno))
16862 return VALID_IWMMXT_REG_MODE (mode);
16865 /* We allow almost any value to be stored in the general registers.
16866 Restrict doubleword quantities to even register pairs so that we can
16867 use ldrd. Do not allow very large Neon structure opaque modes in
16868 general registers; they would use too many. */
16869 if (regno <= LAST_ARM_REGNUM)
16870 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16871 && ARM_NUM_REGS (mode) <= 4;
16873 if (regno == FRAME_POINTER_REGNUM
16874 || regno == ARG_POINTER_REGNUM)
16875 /* We only allow integers in the fake hard registers. */
16876 return GET_MODE_CLASS (mode) == MODE_INT;
16878 /* The only registers left are the FPA registers
16879 which we only allow to hold FP values. */
16880 return (TARGET_HARD_FLOAT && TARGET_FPA
16881 && GET_MODE_CLASS (mode) == MODE_FLOAT
16882 && regno >= FIRST_FPA_REGNUM
16883 && regno <= LAST_FPA_REGNUM);
16886 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16887 not used in arm mode. */
16890 arm_regno_class (int regno)
16894 if (regno == STACK_POINTER_REGNUM)
16896 if (regno == CC_REGNUM)
16903 if (TARGET_THUMB2 && regno < 8)
16906 if ( regno <= LAST_ARM_REGNUM
16907 || regno == FRAME_POINTER_REGNUM
16908 || regno == ARG_POINTER_REGNUM)
16909 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16911 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16912 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16914 if (IS_CIRRUS_REGNUM (regno))
16915 return CIRRUS_REGS;
16917 if (IS_VFP_REGNUM (regno))
16919 if (regno <= D7_VFP_REGNUM)
16920 return VFP_D0_D7_REGS;
16921 else if (regno <= LAST_LO_VFP_REGNUM)
16922 return VFP_LO_REGS;
16924 return VFP_HI_REGS;
16927 if (IS_IWMMXT_REGNUM (regno))
16928 return IWMMXT_REGS;
16930 if (IS_IWMMXT_GR_REGNUM (regno))
16931 return IWMMXT_GR_REGS;
16936 /* Handle a special case when computing the offset
16937 of an argument from the frame pointer. */
16939 arm_debugger_arg_offset (int value, rtx addr)
16943 /* We are only interested if dbxout_parms() failed to compute the offset. */
16947 /* We can only cope with the case where the address is held in a register. */
16948 if (GET_CODE (addr) != REG)
16951 /* If we are using the frame pointer to point at the argument, then
16952 an offset of 0 is correct. */
16953 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16956 /* If we are using the stack pointer to point at the
16957 argument, then an offset of 0 is correct. */
16958 /* ??? Check this is consistent with thumb2 frame layout. */
16959 if ((TARGET_THUMB || !frame_pointer_needed)
16960 && REGNO (addr) == SP_REGNUM)
16963 /* Oh dear. The argument is pointed to by a register rather
16964 than being held in a register, or being stored at a known
16965 offset from the frame pointer. Since GDB only understands
16966 those two kinds of argument we must translate the address
16967 held in the register into an offset from the frame pointer.
16968 We do this by searching through the insns for the function
16969 looking to see where this register gets its value. If the
16970 register is initialized from the frame pointer plus an offset
16971 then we are in luck and we can continue, otherwise we give up.
16973 This code is exercised by producing debugging information
16974 for a function with arguments like this:
16976 double func (double a, double b, int c, double d) {return d;}
16978 Without this code the stab for parameter 'd' will be set to
16979 an offset of 0 from the frame pointer, rather than 8. */
16981 /* The if() statement says:
16983 If the insn is a normal instruction
16984 and if the insn is setting the value in a register
16985 and if the register being set is the register holding the address of the argument
16986 and if the address is computing by an addition
16987 that involves adding to a register
16988 which is the frame pointer
16993 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16995 if ( GET_CODE (insn) == INSN
16996 && GET_CODE (PATTERN (insn)) == SET
16997 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16998 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16999 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17000 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17001 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17004 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17013 warning (0, "unable to compute real location of stacked parameter");
17014 value = 8; /* XXX magic hack */
17020 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17023 if ((MASK) & insn_flags) \
17024 add_builtin_function ((NAME), (TYPE), (CODE), \
17025 BUILT_IN_MD, NULL, NULL_TREE); \
17029 struct builtin_description
17031 const unsigned int mask;
17032 const enum insn_code icode;
17033 const char * const name;
17034 const enum arm_builtins code;
17035 const enum rtx_code comparison;
17036 const unsigned int flag;
17039 static const struct builtin_description bdesc_2arg[] =
17041 #define IWMMXT_BUILTIN(code, string, builtin) \
17042 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17043 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17045 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17046 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17047 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17048 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17049 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17050 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17051 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17052 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17053 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17054 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17055 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17056 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17057 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17058 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17059 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17060 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17061 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17062 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17063 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17064 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17065 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17066 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17067 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17068 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17069 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17070 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17071 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17072 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17073 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17074 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17075 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17076 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17077 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17078 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17079 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17080 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17081 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17082 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17083 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17084 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17085 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17086 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17087 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17088 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17089 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17090 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17091 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17092 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17093 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17094 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17095 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17096 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17097 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17098 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17099 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17100 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17101 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17102 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17104 #define IWMMXT_BUILTIN2(code, builtin) \
17105 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17107 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17108 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17109 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17110 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17111 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17112 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17113 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17114 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17115 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17116 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17117 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17118 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17119 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17120 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17121 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17122 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17123 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17124 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17125 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17126 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17127 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17128 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17129 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17130 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17131 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17132 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17133 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17134 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17135 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17136 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17137 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17138 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17141 static const struct builtin_description bdesc_1arg[] =
17143 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17144 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17145 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17146 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17147 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17148 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17149 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17150 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17151 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17152 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17153 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17154 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17155 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17156 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17157 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17158 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17159 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17160 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17163 /* Set up all the iWMMXt builtins. This is
17164 not called if TARGET_IWMMXT is zero. */
17167 arm_init_iwmmxt_builtins (void)
17169 const struct builtin_description * d;
17171 tree endlink = void_list_node;
17173 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17174 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17175 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17178 = build_function_type (integer_type_node,
17179 tree_cons (NULL_TREE, integer_type_node, endlink));
17180 tree v8qi_ftype_v8qi_v8qi_int
17181 = build_function_type (V8QI_type_node,
17182 tree_cons (NULL_TREE, V8QI_type_node,
17183 tree_cons (NULL_TREE, V8QI_type_node,
17184 tree_cons (NULL_TREE,
17187 tree v4hi_ftype_v4hi_int
17188 = build_function_type (V4HI_type_node,
17189 tree_cons (NULL_TREE, V4HI_type_node,
17190 tree_cons (NULL_TREE, integer_type_node,
17192 tree v2si_ftype_v2si_int
17193 = build_function_type (V2SI_type_node,
17194 tree_cons (NULL_TREE, V2SI_type_node,
17195 tree_cons (NULL_TREE, integer_type_node,
17197 tree v2si_ftype_di_di
17198 = build_function_type (V2SI_type_node,
17199 tree_cons (NULL_TREE, long_long_integer_type_node,
17200 tree_cons (NULL_TREE, long_long_integer_type_node,
17202 tree di_ftype_di_int
17203 = build_function_type (long_long_integer_type_node,
17204 tree_cons (NULL_TREE, long_long_integer_type_node,
17205 tree_cons (NULL_TREE, integer_type_node,
17207 tree di_ftype_di_int_int
17208 = build_function_type (long_long_integer_type_node,
17209 tree_cons (NULL_TREE, long_long_integer_type_node,
17210 tree_cons (NULL_TREE, integer_type_node,
17211 tree_cons (NULL_TREE,
17214 tree int_ftype_v8qi
17215 = build_function_type (integer_type_node,
17216 tree_cons (NULL_TREE, V8QI_type_node,
17218 tree int_ftype_v4hi
17219 = build_function_type (integer_type_node,
17220 tree_cons (NULL_TREE, V4HI_type_node,
17222 tree int_ftype_v2si
17223 = build_function_type (integer_type_node,
17224 tree_cons (NULL_TREE, V2SI_type_node,
17226 tree int_ftype_v8qi_int
17227 = build_function_type (integer_type_node,
17228 tree_cons (NULL_TREE, V8QI_type_node,
17229 tree_cons (NULL_TREE, integer_type_node,
17231 tree int_ftype_v4hi_int
17232 = build_function_type (integer_type_node,
17233 tree_cons (NULL_TREE, V4HI_type_node,
17234 tree_cons (NULL_TREE, integer_type_node,
17236 tree int_ftype_v2si_int
17237 = build_function_type (integer_type_node,
17238 tree_cons (NULL_TREE, V2SI_type_node,
17239 tree_cons (NULL_TREE, integer_type_node,
17241 tree v8qi_ftype_v8qi_int_int
17242 = build_function_type (V8QI_type_node,
17243 tree_cons (NULL_TREE, V8QI_type_node,
17244 tree_cons (NULL_TREE, integer_type_node,
17245 tree_cons (NULL_TREE,
17248 tree v4hi_ftype_v4hi_int_int
17249 = build_function_type (V4HI_type_node,
17250 tree_cons (NULL_TREE, V4HI_type_node,
17251 tree_cons (NULL_TREE, integer_type_node,
17252 tree_cons (NULL_TREE,
17255 tree v2si_ftype_v2si_int_int
17256 = build_function_type (V2SI_type_node,
17257 tree_cons (NULL_TREE, V2SI_type_node,
17258 tree_cons (NULL_TREE, integer_type_node,
17259 tree_cons (NULL_TREE,
17262 /* Miscellaneous. */
17263 tree v8qi_ftype_v4hi_v4hi
17264 = build_function_type (V8QI_type_node,
17265 tree_cons (NULL_TREE, V4HI_type_node,
17266 tree_cons (NULL_TREE, V4HI_type_node,
17268 tree v4hi_ftype_v2si_v2si
17269 = build_function_type (V4HI_type_node,
17270 tree_cons (NULL_TREE, V2SI_type_node,
17271 tree_cons (NULL_TREE, V2SI_type_node,
17273 tree v2si_ftype_v4hi_v4hi
17274 = build_function_type (V2SI_type_node,
17275 tree_cons (NULL_TREE, V4HI_type_node,
17276 tree_cons (NULL_TREE, V4HI_type_node,
17278 tree v2si_ftype_v8qi_v8qi
17279 = build_function_type (V2SI_type_node,
17280 tree_cons (NULL_TREE, V8QI_type_node,
17281 tree_cons (NULL_TREE, V8QI_type_node,
17283 tree v4hi_ftype_v4hi_di
17284 = build_function_type (V4HI_type_node,
17285 tree_cons (NULL_TREE, V4HI_type_node,
17286 tree_cons (NULL_TREE,
17287 long_long_integer_type_node,
17289 tree v2si_ftype_v2si_di
17290 = build_function_type (V2SI_type_node,
17291 tree_cons (NULL_TREE, V2SI_type_node,
17292 tree_cons (NULL_TREE,
17293 long_long_integer_type_node,
17295 tree void_ftype_int_int
17296 = build_function_type (void_type_node,
17297 tree_cons (NULL_TREE, integer_type_node,
17298 tree_cons (NULL_TREE, integer_type_node,
17301 = build_function_type (long_long_unsigned_type_node, endlink);
17303 = build_function_type (long_long_integer_type_node,
17304 tree_cons (NULL_TREE, V8QI_type_node,
17307 = build_function_type (long_long_integer_type_node,
17308 tree_cons (NULL_TREE, V4HI_type_node,
17311 = build_function_type (long_long_integer_type_node,
17312 tree_cons (NULL_TREE, V2SI_type_node,
17314 tree v2si_ftype_v4hi
17315 = build_function_type (V2SI_type_node,
17316 tree_cons (NULL_TREE, V4HI_type_node,
17318 tree v4hi_ftype_v8qi
17319 = build_function_type (V4HI_type_node,
17320 tree_cons (NULL_TREE, V8QI_type_node,
17323 tree di_ftype_di_v4hi_v4hi
17324 = build_function_type (long_long_unsigned_type_node,
17325 tree_cons (NULL_TREE,
17326 long_long_unsigned_type_node,
17327 tree_cons (NULL_TREE, V4HI_type_node,
17328 tree_cons (NULL_TREE,
17332 tree di_ftype_v4hi_v4hi
17333 = build_function_type (long_long_unsigned_type_node,
17334 tree_cons (NULL_TREE, V4HI_type_node,
17335 tree_cons (NULL_TREE, V4HI_type_node,
17338 /* Normal vector binops. */
17339 tree v8qi_ftype_v8qi_v8qi
17340 = build_function_type (V8QI_type_node,
17341 tree_cons (NULL_TREE, V8QI_type_node,
17342 tree_cons (NULL_TREE, V8QI_type_node,
17344 tree v4hi_ftype_v4hi_v4hi
17345 = build_function_type (V4HI_type_node,
17346 tree_cons (NULL_TREE, V4HI_type_node,
17347 tree_cons (NULL_TREE, V4HI_type_node,
17349 tree v2si_ftype_v2si_v2si
17350 = build_function_type (V2SI_type_node,
17351 tree_cons (NULL_TREE, V2SI_type_node,
17352 tree_cons (NULL_TREE, V2SI_type_node,
17354 tree di_ftype_di_di
17355 = build_function_type (long_long_unsigned_type_node,
17356 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17357 tree_cons (NULL_TREE,
17358 long_long_unsigned_type_node,
17361 /* Add all builtins that are more or less simple operations on two
17363 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17365 /* Use one of the operands; the target can have a different mode for
17366 mask-generating compares. */
17367 enum machine_mode mode;
17373 mode = insn_data[d->icode].operand[1].mode;
17378 type = v8qi_ftype_v8qi_v8qi;
17381 type = v4hi_ftype_v4hi_v4hi;
17384 type = v2si_ftype_v2si_v2si;
17387 type = di_ftype_di_di;
17391 gcc_unreachable ();
17394 def_mbuiltin (d->mask, d->name, type, d->code);
17397 /* Add the remaining MMX insns with somewhat more complicated types. */
17398 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17400 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17405 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17407 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17412 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17490 arm_init_tls_builtins (void)
17494 ftype = build_function_type (ptr_type_node, void_list_node);
17495 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17496 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17498 TREE_NOTHROW (decl) = 1;
17499 TREE_READONLY (decl) = 1;
17502 enum neon_builtin_type_bits {
17518 #define v8qi_UP T_V8QI
17519 #define v4hi_UP T_V4HI
17520 #define v2si_UP T_V2SI
17521 #define v2sf_UP T_V2SF
17523 #define v16qi_UP T_V16QI
17524 #define v8hi_UP T_V8HI
17525 #define v4si_UP T_V4SI
17526 #define v4sf_UP T_V4SF
17527 #define v2di_UP T_V2DI
17532 #define UP(X) X##_UP
17567 NEON_LOADSTRUCTLANE,
17569 NEON_STORESTRUCTLANE,
17578 const neon_itype itype;
17580 const enum insn_code codes[T_MAX];
17581 const unsigned int num_vars;
17582 unsigned int base_fcode;
17583 } neon_builtin_datum;
17585 #define CF(N,X) CODE_FOR_neon_##N##X
17587 #define VAR1(T, N, A) \
17588 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17589 #define VAR2(T, N, A, B) \
17590 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17591 #define VAR3(T, N, A, B, C) \
17592 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17593 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17594 #define VAR4(T, N, A, B, C, D) \
17595 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17596 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17597 #define VAR5(T, N, A, B, C, D, E) \
17598 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17599 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17600 #define VAR6(T, N, A, B, C, D, E, F) \
17601 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17602 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17603 #define VAR7(T, N, A, B, C, D, E, F, G) \
17604 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17605 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17607 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17608 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17610 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17611 CF (N, G), CF (N, H) }, 8, 0
17612 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17613 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17614 | UP (H) | UP (I), \
17615 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17616 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17617 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17618 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17619 | UP (H) | UP (I) | UP (J), \
17620 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17621 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17623 /* The mode entries in the following table correspond to the "key" type of the
17624 instruction variant, i.e. equivalent to that which would be specified after
17625 the assembler mnemonic, which usually refers to the last vector operand.
17626 (Signed/unsigned/polynomial types are not differentiated between though, and
17627 are all mapped onto the same mode for a given element size.) The modes
17628 listed per instruction should be the same as those defined for that
17629 instruction's pattern in neon.md.
17630 WARNING: Variants should be listed in the same increasing order as
17631 neon_builtin_type_bits. */
17633 static neon_builtin_datum neon_builtin_data[] =
17635 { VAR10 (BINOP, vadd,
17636 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17637 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17638 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17639 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17640 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17641 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17642 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17643 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17644 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17645 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17646 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17647 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17648 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17649 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17650 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17651 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17652 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17653 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17654 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17655 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17656 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17657 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17658 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17659 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17660 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17661 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17662 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17663 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17664 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17665 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17666 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17667 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17668 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17669 { VAR10 (BINOP, vsub,
17670 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17671 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17672 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17673 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17674 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17675 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17676 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17677 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17678 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17679 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17680 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17681 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17682 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17683 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17684 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17685 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17686 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17687 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17688 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17689 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17690 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17691 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17692 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17693 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17694 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17695 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17696 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17697 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17698 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17699 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17700 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17701 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17702 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17703 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17704 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17705 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17706 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17707 /* FIXME: vget_lane supports more variants than this! */
17708 { VAR10 (GETLANE, vget_lane,
17709 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17710 { VAR10 (SETLANE, vset_lane,
17711 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17712 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17713 { VAR10 (DUP, vdup_n,
17714 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17715 { VAR10 (DUPLANE, vdup_lane,
17716 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17717 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17718 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17719 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17720 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17721 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17722 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17723 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17724 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17725 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17726 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17727 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17728 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17729 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17730 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17731 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17732 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17733 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17734 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17735 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17736 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17737 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17738 { VAR10 (BINOP, vext,
17739 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17740 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17741 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17742 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17743 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17744 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17745 { VAR10 (SELECT, vbsl,
17746 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17747 { VAR1 (VTBL, vtbl1, v8qi) },
17748 { VAR1 (VTBL, vtbl2, v8qi) },
17749 { VAR1 (VTBL, vtbl3, v8qi) },
17750 { VAR1 (VTBL, vtbl4, v8qi) },
17751 { VAR1 (VTBX, vtbx1, v8qi) },
17752 { VAR1 (VTBX, vtbx2, v8qi) },
17753 { VAR1 (VTBX, vtbx3, v8qi) },
17754 { VAR1 (VTBX, vtbx4, v8qi) },
17755 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17756 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17757 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17758 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17759 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17760 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17761 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17762 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17763 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17764 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17765 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17766 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17767 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17768 { VAR10 (LOAD1, vld1,
17769 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17770 { VAR10 (LOAD1LANE, vld1_lane,
17771 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17772 { VAR10 (LOAD1, vld1_dup,
17773 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17774 { VAR10 (STORE1, vst1,
17775 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17776 { VAR10 (STORE1LANE, vst1_lane,
17777 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17778 { VAR9 (LOADSTRUCT,
17779 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17780 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17781 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17782 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17783 { VAR9 (STORESTRUCT, vst2,
17784 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17785 { VAR7 (STORESTRUCTLANE, vst2_lane,
17786 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17787 { VAR9 (LOADSTRUCT,
17788 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17789 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17790 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17791 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17792 { VAR9 (STORESTRUCT, vst3,
17793 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17794 { VAR7 (STORESTRUCTLANE, vst3_lane,
17795 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17796 { VAR9 (LOADSTRUCT, vld4,
17797 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17798 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17799 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17800 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17801 { VAR9 (STORESTRUCT, vst4,
17802 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17803 { VAR7 (STORESTRUCTLANE, vst4_lane,
17804 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17805 { VAR10 (LOGICBINOP, vand,
17806 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17807 { VAR10 (LOGICBINOP, vorr,
17808 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17809 { VAR10 (BINOP, veor,
17810 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17811 { VAR10 (LOGICBINOP, vbic,
17812 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17813 { VAR10 (LOGICBINOP, vorn,
17814 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17830 arm_init_neon_builtins (void)
17832 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17834 tree neon_intQI_type_node;
17835 tree neon_intHI_type_node;
17836 tree neon_polyQI_type_node;
17837 tree neon_polyHI_type_node;
17838 tree neon_intSI_type_node;
17839 tree neon_intDI_type_node;
17840 tree neon_float_type_node;
17842 tree intQI_pointer_node;
17843 tree intHI_pointer_node;
17844 tree intSI_pointer_node;
17845 tree intDI_pointer_node;
17846 tree float_pointer_node;
17848 tree const_intQI_node;
17849 tree const_intHI_node;
17850 tree const_intSI_node;
17851 tree const_intDI_node;
17852 tree const_float_node;
17854 tree const_intQI_pointer_node;
17855 tree const_intHI_pointer_node;
17856 tree const_intSI_pointer_node;
17857 tree const_intDI_pointer_node;
17858 tree const_float_pointer_node;
17860 tree V8QI_type_node;
17861 tree V4HI_type_node;
17862 tree V2SI_type_node;
17863 tree V2SF_type_node;
17864 tree V16QI_type_node;
17865 tree V8HI_type_node;
17866 tree V4SI_type_node;
17867 tree V4SF_type_node;
17868 tree V2DI_type_node;
17870 tree intUQI_type_node;
17871 tree intUHI_type_node;
17872 tree intUSI_type_node;
17873 tree intUDI_type_node;
17875 tree intEI_type_node;
17876 tree intOI_type_node;
17877 tree intCI_type_node;
17878 tree intXI_type_node;
17880 tree V8QI_pointer_node;
17881 tree V4HI_pointer_node;
17882 tree V2SI_pointer_node;
17883 tree V2SF_pointer_node;
17884 tree V16QI_pointer_node;
17885 tree V8HI_pointer_node;
17886 tree V4SI_pointer_node;
17887 tree V4SF_pointer_node;
17888 tree V2DI_pointer_node;
17890 tree void_ftype_pv8qi_v8qi_v8qi;
17891 tree void_ftype_pv4hi_v4hi_v4hi;
17892 tree void_ftype_pv2si_v2si_v2si;
17893 tree void_ftype_pv2sf_v2sf_v2sf;
17894 tree void_ftype_pdi_di_di;
17895 tree void_ftype_pv16qi_v16qi_v16qi;
17896 tree void_ftype_pv8hi_v8hi_v8hi;
17897 tree void_ftype_pv4si_v4si_v4si;
17898 tree void_ftype_pv4sf_v4sf_v4sf;
17899 tree void_ftype_pv2di_v2di_v2di;
17901 tree reinterp_ftype_dreg[5][5];
17902 tree reinterp_ftype_qreg[5][5];
17903 tree dreg_types[5], qreg_types[5];
17905 /* Create distinguished type nodes for NEON vector element types,
17906 and pointers to values of such types, so we can detect them later. */
17907 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17908 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17909 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17910 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17911 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17912 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17913 neon_float_type_node = make_node (REAL_TYPE);
17914 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17915 layout_type (neon_float_type_node);
17917 /* Define typedefs which exactly correspond to the modes we are basing vector
17918 types on. If you change these names you'll need to change
17919 the table used by arm_mangle_type too. */
17920 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17921 "__builtin_neon_qi");
17922 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17923 "__builtin_neon_hi");
17924 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17925 "__builtin_neon_si");
17926 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17927 "__builtin_neon_sf");
17928 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17929 "__builtin_neon_di");
17930 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17931 "__builtin_neon_poly8");
17932 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17933 "__builtin_neon_poly16");
17935 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17936 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17937 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17938 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17939 float_pointer_node = build_pointer_type (neon_float_type_node);
17941 /* Next create constant-qualified versions of the above types. */
17942 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17944 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17946 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17948 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17950 const_float_node = build_qualified_type (neon_float_type_node,
17953 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17954 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17955 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17956 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17957 const_float_pointer_node = build_pointer_type (const_float_node);
17959 /* Now create vector types based on our NEON element types. */
17960 /* 64-bit vectors. */
17962 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17964 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17966 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17968 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17969 /* 128-bit vectors. */
17971 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17973 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17975 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17977 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17979 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17981 /* Unsigned integer types for various mode sizes. */
17982 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17983 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17984 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17985 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17987 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17988 "__builtin_neon_uqi");
17989 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17990 "__builtin_neon_uhi");
17991 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17992 "__builtin_neon_usi");
17993 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17994 "__builtin_neon_udi");
17996 /* Opaque integer types for structures of vectors. */
17997 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17998 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17999 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18000 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18002 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18003 "__builtin_neon_ti");
18004 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18005 "__builtin_neon_ei");
18006 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18007 "__builtin_neon_oi");
18008 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18009 "__builtin_neon_ci");
18010 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18011 "__builtin_neon_xi");
18013 /* Pointers to vector types. */
18014 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18015 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18016 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18017 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18018 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18019 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18020 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18021 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18022 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18024 /* Operations which return results as pairs. */
18025 void_ftype_pv8qi_v8qi_v8qi =
18026 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18027 V8QI_type_node, NULL);
18028 void_ftype_pv4hi_v4hi_v4hi =
18029 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18030 V4HI_type_node, NULL);
18031 void_ftype_pv2si_v2si_v2si =
18032 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18033 V2SI_type_node, NULL);
18034 void_ftype_pv2sf_v2sf_v2sf =
18035 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18036 V2SF_type_node, NULL);
18037 void_ftype_pdi_di_di =
18038 build_function_type_list (void_type_node, intDI_pointer_node,
18039 neon_intDI_type_node, neon_intDI_type_node, NULL);
18040 void_ftype_pv16qi_v16qi_v16qi =
18041 build_function_type_list (void_type_node, V16QI_pointer_node,
18042 V16QI_type_node, V16QI_type_node, NULL);
18043 void_ftype_pv8hi_v8hi_v8hi =
18044 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18045 V8HI_type_node, NULL);
18046 void_ftype_pv4si_v4si_v4si =
18047 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18048 V4SI_type_node, NULL);
18049 void_ftype_pv4sf_v4sf_v4sf =
18050 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18051 V4SF_type_node, NULL);
18052 void_ftype_pv2di_v2di_v2di =
18053 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18054 V2DI_type_node, NULL);
18056 dreg_types[0] = V8QI_type_node;
18057 dreg_types[1] = V4HI_type_node;
18058 dreg_types[2] = V2SI_type_node;
18059 dreg_types[3] = V2SF_type_node;
18060 dreg_types[4] = neon_intDI_type_node;
18062 qreg_types[0] = V16QI_type_node;
18063 qreg_types[1] = V8HI_type_node;
18064 qreg_types[2] = V4SI_type_node;
18065 qreg_types[3] = V4SF_type_node;
18066 qreg_types[4] = V2DI_type_node;
18068 for (i = 0; i < 5; i++)
18071 for (j = 0; j < 5; j++)
18073 reinterp_ftype_dreg[i][j]
18074 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18075 reinterp_ftype_qreg[i][j]
18076 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18080 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18082 neon_builtin_datum *d = &neon_builtin_data[i];
18083 unsigned int j, codeidx = 0;
18085 d->base_fcode = fcode;
18087 for (j = 0; j < T_MAX; j++)
18089 const char* const modenames[] = {
18090 "v8qi", "v4hi", "v2si", "v2sf", "di",
18091 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18095 enum insn_code icode;
18096 int is_load = 0, is_store = 0;
18098 if ((d->bits & (1 << j)) == 0)
18101 icode = d->codes[codeidx++];
18106 case NEON_LOAD1LANE:
18107 case NEON_LOADSTRUCT:
18108 case NEON_LOADSTRUCTLANE:
18110 /* Fall through. */
18112 case NEON_STORE1LANE:
18113 case NEON_STORESTRUCT:
18114 case NEON_STORESTRUCTLANE:
18117 /* Fall through. */
18120 case NEON_LOGICBINOP:
18121 case NEON_SHIFTINSERT:
18128 case NEON_SHIFTIMM:
18129 case NEON_SHIFTACC:
18135 case NEON_LANEMULL:
18136 case NEON_LANEMULH:
18138 case NEON_SCALARMUL:
18139 case NEON_SCALARMULL:
18140 case NEON_SCALARMULH:
18141 case NEON_SCALARMAC:
18147 tree return_type = void_type_node, args = void_list_node;
18149 /* Build a function type directly from the insn_data for this
18150 builtin. The build_function_type() function takes care of
18151 removing duplicates for us. */
18152 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18156 if (is_load && k == 1)
18158 /* Neon load patterns always have the memory operand
18159 (a SImode pointer) in the operand 1 position. We
18160 want a const pointer to the element type in that
18162 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18168 eltype = const_intQI_pointer_node;
18173 eltype = const_intHI_pointer_node;
18178 eltype = const_intSI_pointer_node;
18183 eltype = const_float_pointer_node;
18188 eltype = const_intDI_pointer_node;
18191 default: gcc_unreachable ();
18194 else if (is_store && k == 0)
18196 /* Similarly, Neon store patterns use operand 0 as
18197 the memory location to store to (a SImode pointer).
18198 Use a pointer to the element type of the store in
18200 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18206 eltype = intQI_pointer_node;
18211 eltype = intHI_pointer_node;
18216 eltype = intSI_pointer_node;
18221 eltype = float_pointer_node;
18226 eltype = intDI_pointer_node;
18229 default: gcc_unreachable ();
18234 switch (insn_data[icode].operand[k].mode)
18236 case VOIDmode: eltype = void_type_node; break;
18238 case QImode: eltype = neon_intQI_type_node; break;
18239 case HImode: eltype = neon_intHI_type_node; break;
18240 case SImode: eltype = neon_intSI_type_node; break;
18241 case SFmode: eltype = neon_float_type_node; break;
18242 case DImode: eltype = neon_intDI_type_node; break;
18243 case TImode: eltype = intTI_type_node; break;
18244 case EImode: eltype = intEI_type_node; break;
18245 case OImode: eltype = intOI_type_node; break;
18246 case CImode: eltype = intCI_type_node; break;
18247 case XImode: eltype = intXI_type_node; break;
18248 /* 64-bit vectors. */
18249 case V8QImode: eltype = V8QI_type_node; break;
18250 case V4HImode: eltype = V4HI_type_node; break;
18251 case V2SImode: eltype = V2SI_type_node; break;
18252 case V2SFmode: eltype = V2SF_type_node; break;
18253 /* 128-bit vectors. */
18254 case V16QImode: eltype = V16QI_type_node; break;
18255 case V8HImode: eltype = V8HI_type_node; break;
18256 case V4SImode: eltype = V4SI_type_node; break;
18257 case V4SFmode: eltype = V4SF_type_node; break;
18258 case V2DImode: eltype = V2DI_type_node; break;
18259 default: gcc_unreachable ();
18263 if (k == 0 && !is_store)
18264 return_type = eltype;
18266 args = tree_cons (NULL_TREE, eltype, args);
18269 ftype = build_function_type (return_type, args);
18273 case NEON_RESULTPAIR:
18275 switch (insn_data[icode].operand[1].mode)
18277 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18278 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18279 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18280 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18281 case DImode: ftype = void_ftype_pdi_di_di; break;
18282 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18283 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18284 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18285 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18286 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18287 default: gcc_unreachable ();
18292 case NEON_REINTERP:
18294 /* We iterate over 5 doubleword types, then 5 quadword
18297 switch (insn_data[icode].operand[0].mode)
18299 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18300 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18301 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18302 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18303 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18304 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18305 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18306 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18307 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18308 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18309 default: gcc_unreachable ();
18315 gcc_unreachable ();
18318 gcc_assert (ftype != NULL);
18320 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18322 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18329 arm_init_fp16_builtins (void)
18331 tree fp16_type = make_node (REAL_TYPE);
18332 TYPE_PRECISION (fp16_type) = 16;
18333 layout_type (fp16_type);
18334 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18338 arm_init_builtins (void)
18340 arm_init_tls_builtins ();
18342 if (TARGET_REALLY_IWMMXT)
18343 arm_init_iwmmxt_builtins ();
18346 arm_init_neon_builtins ();
18348 if (arm_fp16_format)
18349 arm_init_fp16_builtins ();
18352 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18354 static const char *
18355 arm_invalid_parameter_type (const_tree t)
18357 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18358 return N_("function parameters cannot have __fp16 type");
18362 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18364 static const char *
18365 arm_invalid_return_type (const_tree t)
18367 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18368 return N_("functions cannot return __fp16 type");
18372 /* Implement TARGET_PROMOTED_TYPE. */
18375 arm_promoted_type (const_tree t)
18377 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18378 return float_type_node;
18382 /* Implement TARGET_CONVERT_TO_TYPE.
18383 Specifically, this hook implements the peculiarity of the ARM
18384 half-precision floating-point C semantics that requires conversions between
18385 __fp16 to or from double to do an intermediate conversion to float. */
18388 arm_convert_to_type (tree type, tree expr)
18390 tree fromtype = TREE_TYPE (expr);
18391 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18393 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18394 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18395 return convert (type, convert (float_type_node, expr));
18399 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18400 This simply adds HFmode as a supported mode; even though we don't
18401 implement arithmetic on this type directly, it's supported by
18402 optabs conversions, much the way the double-word arithmetic is
18403 special-cased in the default hook. */
18406 arm_scalar_mode_supported_p (enum machine_mode mode)
18408 if (mode == HFmode)
18409 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18411 return default_scalar_mode_supported_p (mode);
18414 /* Errors in the source file can cause expand_expr to return const0_rtx
18415 where we expect a vector. To avoid crashing, use one of the vector
18416 clear instructions. */
18419 safe_vector_operand (rtx x, enum machine_mode mode)
18421 if (x != const0_rtx)
18423 x = gen_reg_rtx (mode);
18425 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18426 : gen_rtx_SUBREG (DImode, x, 0)));
18430 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18433 arm_expand_binop_builtin (enum insn_code icode,
18434 tree exp, rtx target)
18437 tree arg0 = CALL_EXPR_ARG (exp, 0);
18438 tree arg1 = CALL_EXPR_ARG (exp, 1);
18439 rtx op0 = expand_normal (arg0);
18440 rtx op1 = expand_normal (arg1);
18441 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18442 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18443 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18445 if (VECTOR_MODE_P (mode0))
18446 op0 = safe_vector_operand (op0, mode0);
18447 if (VECTOR_MODE_P (mode1))
18448 op1 = safe_vector_operand (op1, mode1);
18451 || GET_MODE (target) != tmode
18452 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18453 target = gen_reg_rtx (tmode);
18455 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18457 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18458 op0 = copy_to_mode_reg (mode0, op0);
18459 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18460 op1 = copy_to_mode_reg (mode1, op1);
18462 pat = GEN_FCN (icode) (target, op0, op1);
18469 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18472 arm_expand_unop_builtin (enum insn_code icode,
18473 tree exp, rtx target, int do_load)
18476 tree arg0 = CALL_EXPR_ARG (exp, 0);
18477 rtx op0 = expand_normal (arg0);
18478 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18479 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18482 || GET_MODE (target) != tmode
18483 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18484 target = gen_reg_rtx (tmode);
18486 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18489 if (VECTOR_MODE_P (mode0))
18490 op0 = safe_vector_operand (op0, mode0);
18492 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18493 op0 = copy_to_mode_reg (mode0, op0);
18496 pat = GEN_FCN (icode) (target, op0);
18504 neon_builtin_compare (const void *a, const void *b)
18506 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18507 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18508 unsigned int soughtcode = key->base_fcode;
18510 if (soughtcode >= memb->base_fcode
18511 && soughtcode < memb->base_fcode + memb->num_vars)
18513 else if (soughtcode < memb->base_fcode)
18519 static enum insn_code
18520 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18522 neon_builtin_datum key, *found;
18525 key.base_fcode = fcode;
18526 found = (neon_builtin_datum *)
18527 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18528 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18529 gcc_assert (found);
18530 idx = fcode - (int) found->base_fcode;
18531 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18534 *itype = found->itype;
18536 return found->codes[idx];
18540 NEON_ARG_COPY_TO_REG,
18545 #define NEON_MAX_BUILTIN_ARGS 5
18547 /* Expand a Neon builtin. */
18549 arm_expand_neon_args (rtx target, int icode, int have_retval,
18554 tree arg[NEON_MAX_BUILTIN_ARGS];
18555 rtx op[NEON_MAX_BUILTIN_ARGS];
18556 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18557 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18562 || GET_MODE (target) != tmode
18563 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18564 target = gen_reg_rtx (tmode);
18566 va_start (ap, exp);
18570 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18572 if (thisarg == NEON_ARG_STOP)
18576 arg[argc] = CALL_EXPR_ARG (exp, argc);
18577 op[argc] = expand_normal (arg[argc]);
18578 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18582 case NEON_ARG_COPY_TO_REG:
18583 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18584 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18585 (op[argc], mode[argc]))
18586 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18589 case NEON_ARG_CONSTANT:
18590 /* FIXME: This error message is somewhat unhelpful. */
18591 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18592 (op[argc], mode[argc]))
18593 error ("argument must be a constant");
18596 case NEON_ARG_STOP:
18597 gcc_unreachable ();
18610 pat = GEN_FCN (icode) (target, op[0]);
18614 pat = GEN_FCN (icode) (target, op[0], op[1]);
18618 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18622 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18626 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18630 gcc_unreachable ();
18636 pat = GEN_FCN (icode) (op[0]);
18640 pat = GEN_FCN (icode) (op[0], op[1]);
18644 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18648 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18652 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18656 gcc_unreachable ();
18667 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18668 constants defined per-instruction or per instruction-variant. Instead, the
18669 required info is looked up in the table neon_builtin_data. */
18671 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18674 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18681 return arm_expand_neon_args (target, icode, 1, exp,
18682 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18686 case NEON_SCALARMUL:
18687 case NEON_SCALARMULL:
18688 case NEON_SCALARMULH:
18689 case NEON_SHIFTINSERT:
18690 case NEON_LOGICBINOP:
18691 return arm_expand_neon_args (target, icode, 1, exp,
18692 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18696 return arm_expand_neon_args (target, icode, 1, exp,
18697 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18698 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18702 case NEON_SHIFTIMM:
18703 return arm_expand_neon_args (target, icode, 1, exp,
18704 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18708 return arm_expand_neon_args (target, icode, 1, exp,
18709 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18713 case NEON_REINTERP:
18714 return arm_expand_neon_args (target, icode, 1, exp,
18715 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18719 return arm_expand_neon_args (target, icode, 1, exp,
18720 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18722 case NEON_RESULTPAIR:
18723 return arm_expand_neon_args (target, icode, 0, exp,
18724 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18728 case NEON_LANEMULL:
18729 case NEON_LANEMULH:
18730 return arm_expand_neon_args (target, icode, 1, exp,
18731 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18732 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18735 return arm_expand_neon_args (target, icode, 1, exp,
18736 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18737 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18739 case NEON_SHIFTACC:
18740 return arm_expand_neon_args (target, icode, 1, exp,
18741 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18742 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18744 case NEON_SCALARMAC:
18745 return arm_expand_neon_args (target, icode, 1, exp,
18746 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18747 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18751 return arm_expand_neon_args (target, icode, 1, exp,
18752 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18756 case NEON_LOADSTRUCT:
18757 return arm_expand_neon_args (target, icode, 1, exp,
18758 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18760 case NEON_LOAD1LANE:
18761 case NEON_LOADSTRUCTLANE:
18762 return arm_expand_neon_args (target, icode, 1, exp,
18763 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18767 case NEON_STORESTRUCT:
18768 return arm_expand_neon_args (target, icode, 0, exp,
18769 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18771 case NEON_STORE1LANE:
18772 case NEON_STORESTRUCTLANE:
18773 return arm_expand_neon_args (target, icode, 0, exp,
18774 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18778 gcc_unreachable ();
18781 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18783 neon_reinterpret (rtx dest, rtx src)
18785 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18788 /* Emit code to place a Neon pair result in memory locations (with equal
18791 neon_emit_pair_result_insn (enum machine_mode mode,
18792 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18795 rtx mem = gen_rtx_MEM (mode, destaddr);
18796 rtx tmp1 = gen_reg_rtx (mode);
18797 rtx tmp2 = gen_reg_rtx (mode);
18799 emit_insn (intfn (tmp1, op1, tmp2, op2));
18801 emit_move_insn (mem, tmp1);
18802 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18803 emit_move_insn (mem, tmp2);
18806 /* Set up operands for a register copy from src to dest, taking care not to
18807 clobber registers in the process.
18808 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18809 be called with a large N, so that should be OK. */
18812 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18814 unsigned int copied = 0, opctr = 0;
18815 unsigned int done = (1 << count) - 1;
18818 while (copied != done)
18820 for (i = 0; i < count; i++)
18824 for (j = 0; good && j < count; j++)
18825 if (i != j && (copied & (1 << j)) == 0
18826 && reg_overlap_mentioned_p (src[j], dest[i]))
18831 operands[opctr++] = dest[i];
18832 operands[opctr++] = src[i];
18838 gcc_assert (opctr == count * 2);
18841 /* Expand an expression EXP that calls a built-in function,
18842 with result going to TARGET if that's convenient
18843 (and in mode MODE if that's convenient).
18844 SUBTARGET may be used as the target for computing one of EXP's operands.
18845 IGNORE is nonzero if the value is to be ignored. */
18848 arm_expand_builtin (tree exp,
18850 rtx subtarget ATTRIBUTE_UNUSED,
18851 enum machine_mode mode ATTRIBUTE_UNUSED,
18852 int ignore ATTRIBUTE_UNUSED)
18854 const struct builtin_description * d;
18855 enum insn_code icode;
18856 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18864 int fcode = DECL_FUNCTION_CODE (fndecl);
18866 enum machine_mode tmode;
18867 enum machine_mode mode0;
18868 enum machine_mode mode1;
18869 enum machine_mode mode2;
18871 if (fcode >= ARM_BUILTIN_NEON_BASE)
18872 return arm_expand_neon_builtin (fcode, exp, target);
18876 case ARM_BUILTIN_TEXTRMSB:
18877 case ARM_BUILTIN_TEXTRMUB:
18878 case ARM_BUILTIN_TEXTRMSH:
18879 case ARM_BUILTIN_TEXTRMUH:
18880 case ARM_BUILTIN_TEXTRMSW:
18881 case ARM_BUILTIN_TEXTRMUW:
18882 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18883 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18884 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18885 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18886 : CODE_FOR_iwmmxt_textrmw);
18888 arg0 = CALL_EXPR_ARG (exp, 0);
18889 arg1 = CALL_EXPR_ARG (exp, 1);
18890 op0 = expand_normal (arg0);
18891 op1 = expand_normal (arg1);
18892 tmode = insn_data[icode].operand[0].mode;
18893 mode0 = insn_data[icode].operand[1].mode;
18894 mode1 = insn_data[icode].operand[2].mode;
18896 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18897 op0 = copy_to_mode_reg (mode0, op0);
18898 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18900 /* @@@ better error message */
18901 error ("selector must be an immediate");
18902 return gen_reg_rtx (tmode);
18905 || GET_MODE (target) != tmode
18906 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18907 target = gen_reg_rtx (tmode);
18908 pat = GEN_FCN (icode) (target, op0, op1);
18914 case ARM_BUILTIN_TINSRB:
18915 case ARM_BUILTIN_TINSRH:
18916 case ARM_BUILTIN_TINSRW:
18917 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18918 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18919 : CODE_FOR_iwmmxt_tinsrw);
18920 arg0 = CALL_EXPR_ARG (exp, 0);
18921 arg1 = CALL_EXPR_ARG (exp, 1);
18922 arg2 = CALL_EXPR_ARG (exp, 2);
18923 op0 = expand_normal (arg0);
18924 op1 = expand_normal (arg1);
18925 op2 = expand_normal (arg2);
18926 tmode = insn_data[icode].operand[0].mode;
18927 mode0 = insn_data[icode].operand[1].mode;
18928 mode1 = insn_data[icode].operand[2].mode;
18929 mode2 = insn_data[icode].operand[3].mode;
18931 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18932 op0 = copy_to_mode_reg (mode0, op0);
18933 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18934 op1 = copy_to_mode_reg (mode1, op1);
18935 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18937 /* @@@ better error message */
18938 error ("selector must be an immediate");
18942 || GET_MODE (target) != tmode
18943 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18944 target = gen_reg_rtx (tmode);
18945 pat = GEN_FCN (icode) (target, op0, op1, op2);
18951 case ARM_BUILTIN_SETWCX:
18952 arg0 = CALL_EXPR_ARG (exp, 0);
18953 arg1 = CALL_EXPR_ARG (exp, 1);
18954 op0 = force_reg (SImode, expand_normal (arg0));
18955 op1 = expand_normal (arg1);
18956 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18959 case ARM_BUILTIN_GETWCX:
18960 arg0 = CALL_EXPR_ARG (exp, 0);
18961 op0 = expand_normal (arg0);
18962 target = gen_reg_rtx (SImode);
18963 emit_insn (gen_iwmmxt_tmrc (target, op0));
18966 case ARM_BUILTIN_WSHUFH:
18967 icode = CODE_FOR_iwmmxt_wshufh;
18968 arg0 = CALL_EXPR_ARG (exp, 0);
18969 arg1 = CALL_EXPR_ARG (exp, 1);
18970 op0 = expand_normal (arg0);
18971 op1 = expand_normal (arg1);
18972 tmode = insn_data[icode].operand[0].mode;
18973 mode1 = insn_data[icode].operand[1].mode;
18974 mode2 = insn_data[icode].operand[2].mode;
18976 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18977 op0 = copy_to_mode_reg (mode1, op0);
18978 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18980 /* @@@ better error message */
18981 error ("mask must be an immediate");
18985 || GET_MODE (target) != tmode
18986 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18987 target = gen_reg_rtx (tmode);
18988 pat = GEN_FCN (icode) (target, op0, op1);
18994 case ARM_BUILTIN_WSADB:
18995 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18996 case ARM_BUILTIN_WSADH:
18997 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18998 case ARM_BUILTIN_WSADBZ:
18999 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19000 case ARM_BUILTIN_WSADHZ:
19001 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19003 /* Several three-argument builtins. */
19004 case ARM_BUILTIN_WMACS:
19005 case ARM_BUILTIN_WMACU:
19006 case ARM_BUILTIN_WALIGN:
19007 case ARM_BUILTIN_TMIA:
19008 case ARM_BUILTIN_TMIAPH:
19009 case ARM_BUILTIN_TMIATT:
19010 case ARM_BUILTIN_TMIATB:
19011 case ARM_BUILTIN_TMIABT:
19012 case ARM_BUILTIN_TMIABB:
19013 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19014 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19015 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19016 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19017 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19018 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19019 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19020 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19021 : CODE_FOR_iwmmxt_walign);
19022 arg0 = CALL_EXPR_ARG (exp, 0);
19023 arg1 = CALL_EXPR_ARG (exp, 1);
19024 arg2 = CALL_EXPR_ARG (exp, 2);
19025 op0 = expand_normal (arg0);
19026 op1 = expand_normal (arg1);
19027 op2 = expand_normal (arg2);
19028 tmode = insn_data[icode].operand[0].mode;
19029 mode0 = insn_data[icode].operand[1].mode;
19030 mode1 = insn_data[icode].operand[2].mode;
19031 mode2 = insn_data[icode].operand[3].mode;
19033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19034 op0 = copy_to_mode_reg (mode0, op0);
19035 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19036 op1 = copy_to_mode_reg (mode1, op1);
19037 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19038 op2 = copy_to_mode_reg (mode2, op2);
19040 || GET_MODE (target) != tmode
19041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19042 target = gen_reg_rtx (tmode);
19043 pat = GEN_FCN (icode) (target, op0, op1, op2);
19049 case ARM_BUILTIN_WZERO:
19050 target = gen_reg_rtx (DImode);
19051 emit_insn (gen_iwmmxt_clrdi (target));
19054 case ARM_BUILTIN_THREAD_POINTER:
19055 return arm_load_tp (target);
19061 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19062 if (d->code == (const enum arm_builtins) fcode)
19063 return arm_expand_binop_builtin (d->icode, exp, target);
19065 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19066 if (d->code == (const enum arm_builtins) fcode)
19067 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19069 /* @@@ Should really do something sensible here. */
19073 /* Return the number (counting from 0) of
19074 the least significant set bit in MASK. */
19077 number_of_first_bit_set (unsigned mask)
19082 (mask & (1 << bit)) == 0;
19089 /* Emit code to push or pop registers to or from the stack. F is the
19090 assembly file. MASK is the registers to push or pop. PUSH is
19091 nonzero if we should push, and zero if we should pop. For debugging
19092 output, if pushing, adjust CFA_OFFSET by the amount of space added
19093 to the stack. REAL_REGS should have the same number of bits set as
19094 MASK, and will be used instead (in the same order) to describe which
19095 registers were saved - this is used to mark the save slots when we
19096 push high registers after moving them to low registers. */
19098 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19099 unsigned long real_regs)
19102 int lo_mask = mask & 0xFF;
19103 int pushed_words = 0;
19107 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19109 /* Special case. Do not generate a POP PC statement here, do it in
19111 thumb_exit (f, -1);
19115 if (ARM_EABI_UNWIND_TABLES && push)
19117 fprintf (f, "\t.save\t{");
19118 for (regno = 0; regno < 15; regno++)
19120 if (real_regs & (1 << regno))
19122 if (real_regs & ((1 << regno) -1))
19124 asm_fprintf (f, "%r", regno);
19127 fprintf (f, "}\n");
19130 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19132 /* Look at the low registers first. */
19133 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19137 asm_fprintf (f, "%r", regno);
19139 if ((lo_mask & ~1) != 0)
19146 if (push && (mask & (1 << LR_REGNUM)))
19148 /* Catch pushing the LR. */
19152 asm_fprintf (f, "%r", LR_REGNUM);
19156 else if (!push && (mask & (1 << PC_REGNUM)))
19158 /* Catch popping the PC. */
19159 if (TARGET_INTERWORK || TARGET_BACKTRACE
19160 || crtl->calls_eh_return)
19162 /* The PC is never poped directly, instead
19163 it is popped into r3 and then BX is used. */
19164 fprintf (f, "}\n");
19166 thumb_exit (f, -1);
19175 asm_fprintf (f, "%r", PC_REGNUM);
19179 fprintf (f, "}\n");
19181 if (push && pushed_words && dwarf2out_do_frame ())
19183 char *l = dwarf2out_cfi_label (false);
19184 int pushed_mask = real_regs;
19186 *cfa_offset += pushed_words * 4;
19187 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19190 pushed_mask = real_regs;
19191 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19193 if (pushed_mask & 1)
19194 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19199 /* Generate code to return from a thumb function.
19200 If 'reg_containing_return_addr' is -1, then the return address is
19201 actually on the stack, at the stack pointer. */
19203 thumb_exit (FILE *f, int reg_containing_return_addr)
19205 unsigned regs_available_for_popping;
19206 unsigned regs_to_pop;
19208 unsigned available;
19212 int restore_a4 = FALSE;
19214 /* Compute the registers we need to pop. */
19218 if (reg_containing_return_addr == -1)
19220 regs_to_pop |= 1 << LR_REGNUM;
19224 if (TARGET_BACKTRACE)
19226 /* Restore the (ARM) frame pointer and stack pointer. */
19227 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19231 /* If there is nothing to pop then just emit the BX instruction and
19233 if (pops_needed == 0)
19235 if (crtl->calls_eh_return)
19236 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19238 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19241 /* Otherwise if we are not supporting interworking and we have not created
19242 a backtrace structure and the function was not entered in ARM mode then
19243 just pop the return address straight into the PC. */
19244 else if (!TARGET_INTERWORK
19245 && !TARGET_BACKTRACE
19246 && !is_called_in_ARM_mode (current_function_decl)
19247 && !crtl->calls_eh_return)
19249 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19253 /* Find out how many of the (return) argument registers we can corrupt. */
19254 regs_available_for_popping = 0;
19256 /* If returning via __builtin_eh_return, the bottom three registers
19257 all contain information needed for the return. */
19258 if (crtl->calls_eh_return)
19262 /* If we can deduce the registers used from the function's
19263 return value. This is more reliable that examining
19264 df_regs_ever_live_p () because that will be set if the register is
19265 ever used in the function, not just if the register is used
19266 to hold a return value. */
19268 if (crtl->return_rtx != 0)
19269 mode = GET_MODE (crtl->return_rtx);
19271 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19273 size = GET_MODE_SIZE (mode);
19277 /* In a void function we can use any argument register.
19278 In a function that returns a structure on the stack
19279 we can use the second and third argument registers. */
19280 if (mode == VOIDmode)
19281 regs_available_for_popping =
19282 (1 << ARG_REGISTER (1))
19283 | (1 << ARG_REGISTER (2))
19284 | (1 << ARG_REGISTER (3));
19286 regs_available_for_popping =
19287 (1 << ARG_REGISTER (2))
19288 | (1 << ARG_REGISTER (3));
19290 else if (size <= 4)
19291 regs_available_for_popping =
19292 (1 << ARG_REGISTER (2))
19293 | (1 << ARG_REGISTER (3));
19294 else if (size <= 8)
19295 regs_available_for_popping =
19296 (1 << ARG_REGISTER (3));
19299 /* Match registers to be popped with registers into which we pop them. */
19300 for (available = regs_available_for_popping,
19301 required = regs_to_pop;
19302 required != 0 && available != 0;
19303 available &= ~(available & - available),
19304 required &= ~(required & - required))
19307 /* If we have any popping registers left over, remove them. */
19309 regs_available_for_popping &= ~available;
19311 /* Otherwise if we need another popping register we can use
19312 the fourth argument register. */
19313 else if (pops_needed)
19315 /* If we have not found any free argument registers and
19316 reg a4 contains the return address, we must move it. */
19317 if (regs_available_for_popping == 0
19318 && reg_containing_return_addr == LAST_ARG_REGNUM)
19320 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19321 reg_containing_return_addr = LR_REGNUM;
19323 else if (size > 12)
19325 /* Register a4 is being used to hold part of the return value,
19326 but we have dire need of a free, low register. */
19329 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19332 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19334 /* The fourth argument register is available. */
19335 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19341 /* Pop as many registers as we can. */
19342 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19343 regs_available_for_popping);
19345 /* Process the registers we popped. */
19346 if (reg_containing_return_addr == -1)
19348 /* The return address was popped into the lowest numbered register. */
19349 regs_to_pop &= ~(1 << LR_REGNUM);
19351 reg_containing_return_addr =
19352 number_of_first_bit_set (regs_available_for_popping);
19354 /* Remove this register for the mask of available registers, so that
19355 the return address will not be corrupted by further pops. */
19356 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19359 /* If we popped other registers then handle them here. */
19360 if (regs_available_for_popping)
19364 /* Work out which register currently contains the frame pointer. */
19365 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19367 /* Move it into the correct place. */
19368 asm_fprintf (f, "\tmov\t%r, %r\n",
19369 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19371 /* (Temporarily) remove it from the mask of popped registers. */
19372 regs_available_for_popping &= ~(1 << frame_pointer);
19373 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19375 if (regs_available_for_popping)
19379 /* We popped the stack pointer as well,
19380 find the register that contains it. */
19381 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19383 /* Move it into the stack register. */
19384 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19386 /* At this point we have popped all necessary registers, so
19387 do not worry about restoring regs_available_for_popping
19388 to its correct value:
19390 assert (pops_needed == 0)
19391 assert (regs_available_for_popping == (1 << frame_pointer))
19392 assert (regs_to_pop == (1 << STACK_POINTER)) */
19396 /* Since we have just move the popped value into the frame
19397 pointer, the popping register is available for reuse, and
19398 we know that we still have the stack pointer left to pop. */
19399 regs_available_for_popping |= (1 << frame_pointer);
19403 /* If we still have registers left on the stack, but we no longer have
19404 any registers into which we can pop them, then we must move the return
19405 address into the link register and make available the register that
19407 if (regs_available_for_popping == 0 && pops_needed > 0)
19409 regs_available_for_popping |= 1 << reg_containing_return_addr;
19411 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19412 reg_containing_return_addr);
19414 reg_containing_return_addr = LR_REGNUM;
19417 /* If we have registers left on the stack then pop some more.
19418 We know that at most we will want to pop FP and SP. */
19419 if (pops_needed > 0)
19424 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19425 regs_available_for_popping);
19427 /* We have popped either FP or SP.
19428 Move whichever one it is into the correct register. */
19429 popped_into = number_of_first_bit_set (regs_available_for_popping);
19430 move_to = number_of_first_bit_set (regs_to_pop);
19432 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19434 regs_to_pop &= ~(1 << move_to);
19439 /* If we still have not popped everything then we must have only
19440 had one register available to us and we are now popping the SP. */
19441 if (pops_needed > 0)
19445 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19446 regs_available_for_popping);
19448 popped_into = number_of_first_bit_set (regs_available_for_popping);
19450 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19452 assert (regs_to_pop == (1 << STACK_POINTER))
19453 assert (pops_needed == 1)
19457 /* If necessary restore the a4 register. */
19460 if (reg_containing_return_addr != LR_REGNUM)
19462 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19463 reg_containing_return_addr = LR_REGNUM;
19466 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19469 if (crtl->calls_eh_return)
19470 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19472 /* Return to caller. */
19473 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19478 thumb1_final_prescan_insn (rtx insn)
19480 if (flag_print_asm_name)
19481 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19482 INSN_ADDRESSES (INSN_UID (insn)));
19486 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19488 unsigned HOST_WIDE_INT mask = 0xff;
19491 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19492 if (val == 0) /* XXX */
19495 for (i = 0; i < 25; i++)
19496 if ((val & (mask << i)) == val)
19502 /* Returns nonzero if the current function contains,
19503 or might contain a far jump. */
19505 thumb_far_jump_used_p (void)
19509 /* This test is only important for leaf functions. */
19510 /* assert (!leaf_function_p ()); */
19512 /* If we have already decided that far jumps may be used,
19513 do not bother checking again, and always return true even if
19514 it turns out that they are not being used. Once we have made
19515 the decision that far jumps are present (and that hence the link
19516 register will be pushed onto the stack) we cannot go back on it. */
19517 if (cfun->machine->far_jump_used)
19520 /* If this function is not being called from the prologue/epilogue
19521 generation code then it must be being called from the
19522 INITIAL_ELIMINATION_OFFSET macro. */
19523 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19525 /* In this case we know that we are being asked about the elimination
19526 of the arg pointer register. If that register is not being used,
19527 then there are no arguments on the stack, and we do not have to
19528 worry that a far jump might force the prologue to push the link
19529 register, changing the stack offsets. In this case we can just
19530 return false, since the presence of far jumps in the function will
19531 not affect stack offsets.
19533 If the arg pointer is live (or if it was live, but has now been
19534 eliminated and so set to dead) then we do have to test to see if
19535 the function might contain a far jump. This test can lead to some
19536 false negatives, since before reload is completed, then length of
19537 branch instructions is not known, so gcc defaults to returning their
19538 longest length, which in turn sets the far jump attribute to true.
19540 A false negative will not result in bad code being generated, but it
19541 will result in a needless push and pop of the link register. We
19542 hope that this does not occur too often.
19544 If we need doubleword stack alignment this could affect the other
19545 elimination offsets so we can't risk getting it wrong. */
19546 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19547 cfun->machine->arg_pointer_live = 1;
19548 else if (!cfun->machine->arg_pointer_live)
19552 /* Check to see if the function contains a branch
19553 insn with the far jump attribute set. */
19554 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19556 if (GET_CODE (insn) == JUMP_INSN
19557 /* Ignore tablejump patterns. */
19558 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19559 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19560 && get_attr_far_jump (insn) == FAR_JUMP_YES
19563 /* Record the fact that we have decided that
19564 the function does use far jumps. */
19565 cfun->machine->far_jump_used = 1;
19573 /* Return nonzero if FUNC must be entered in ARM mode. */
19575 is_called_in_ARM_mode (tree func)
19577 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19579 /* Ignore the problem about functions whose address is taken. */
19580 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19584 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19590 /* Given the stack offsets and register mask in OFFSETS, decide how
19591 many additional registers to push instead of subtracting a constant
19592 from SP. For epilogues the principle is the same except we use pop.
19593 FOR_PROLOGUE indicates which we're generating. */
19595 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
19597 HOST_WIDE_INT amount;
19598 unsigned long live_regs_mask = offsets->saved_regs_mask;
19599 /* Extract a mask of the ones we can give to the Thumb's push/pop
19601 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
19602 /* Then count how many other high registers will need to be pushed. */
19603 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19604 int n_free, reg_base;
19606 if (!for_prologue && frame_pointer_needed)
19607 amount = offsets->locals_base - offsets->saved_regs;
19609 amount = offsets->outgoing_args - offsets->saved_regs;
19611 /* If the stack frame size is 512 exactly, we can save one load
19612 instruction, which should make this a win even when optimizing
19614 if (!optimize_size && amount != 512)
19617 /* Can't do this if there are high registers to push. */
19618 if (high_regs_pushed != 0)
19621 /* Shouldn't do it in the prologue if no registers would normally
19622 be pushed at all. In the epilogue, also allow it if we'll have
19623 a pop insn for the PC. */
19626 || TARGET_BACKTRACE
19627 || (live_regs_mask & 1 << LR_REGNUM) == 0
19628 || TARGET_INTERWORK
19629 || crtl->args.pretend_args_size != 0))
19632 /* Don't do this if thumb_expand_prologue wants to emit instructions
19633 between the push and the stack frame allocation. */
19635 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
19636 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
19643 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
19644 live_regs_mask >>= reg_base;
19647 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
19648 && (for_prologue || call_used_regs[reg_base + n_free]))
19650 live_regs_mask >>= 1;
19656 gcc_assert (amount / 4 * 4 == amount);
19658 if (amount >= 512 && (amount - n_free * 4) < 512)
19659 return (amount - 508) / 4;
19660 if (amount <= n_free * 4)
19665 /* The bits which aren't usefully expanded as rtl. */
19667 thumb_unexpanded_epilogue (void)
19669 arm_stack_offsets *offsets;
19671 unsigned long live_regs_mask = 0;
19672 int high_regs_pushed = 0;
19674 int had_to_push_lr;
19677 if (cfun->machine->return_used_this_function != 0)
19680 if (IS_NAKED (arm_current_func_type ()))
19683 offsets = arm_get_frame_offsets ();
19684 live_regs_mask = offsets->saved_regs_mask;
19685 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19687 /* If we can deduce the registers used from the function's return value.
19688 This is more reliable that examining df_regs_ever_live_p () because that
19689 will be set if the register is ever used in the function, not just if
19690 the register is used to hold a return value. */
19691 size = arm_size_return_regs ();
19693 extra_pop = thumb1_extra_regs_pushed (offsets, false);
19696 unsigned long extra_mask = (1 << extra_pop) - 1;
19697 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
19700 /* The prolog may have pushed some high registers to use as
19701 work registers. e.g. the testsuite file:
19702 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19703 compiles to produce:
19704 push {r4, r5, r6, r7, lr}
19708 as part of the prolog. We have to undo that pushing here. */
19710 if (high_regs_pushed)
19712 unsigned long mask = live_regs_mask & 0xff;
19715 /* The available low registers depend on the size of the value we are
19723 /* Oh dear! We have no low registers into which we can pop
19726 ("no low registers available for popping high registers");
19728 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19729 if (live_regs_mask & (1 << next_hi_reg))
19732 while (high_regs_pushed)
19734 /* Find lo register(s) into which the high register(s) can
19736 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19738 if (mask & (1 << regno))
19739 high_regs_pushed--;
19740 if (high_regs_pushed == 0)
19744 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19746 /* Pop the values into the low register(s). */
19747 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19749 /* Move the value(s) into the high registers. */
19750 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19752 if (mask & (1 << regno))
19754 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19757 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19758 if (live_regs_mask & (1 << next_hi_reg))
19763 live_regs_mask &= ~0x0f00;
19766 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19767 live_regs_mask &= 0xff;
19769 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19771 /* Pop the return address into the PC. */
19772 if (had_to_push_lr)
19773 live_regs_mask |= 1 << PC_REGNUM;
19775 /* Either no argument registers were pushed or a backtrace
19776 structure was created which includes an adjusted stack
19777 pointer, so just pop everything. */
19778 if (live_regs_mask)
19779 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19782 /* We have either just popped the return address into the
19783 PC or it is was kept in LR for the entire function.
19784 Note that thumb_pushpop has already called thumb_exit if the
19785 PC was in the list. */
19786 if (!had_to_push_lr)
19787 thumb_exit (asm_out_file, LR_REGNUM);
19791 /* Pop everything but the return address. */
19792 if (live_regs_mask)
19793 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19796 if (had_to_push_lr)
19800 /* We have no free low regs, so save one. */
19801 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19805 /* Get the return address into a temporary register. */
19806 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19807 1 << LAST_ARG_REGNUM);
19811 /* Move the return address to lr. */
19812 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19814 /* Restore the low register. */
19815 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19820 regno = LAST_ARG_REGNUM;
19825 /* Remove the argument registers that were pushed onto the stack. */
19826 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19827 SP_REGNUM, SP_REGNUM,
19828 crtl->args.pretend_args_size);
19830 thumb_exit (asm_out_file, regno);
19836 /* Functions to save and restore machine-specific function data. */
19837 static struct machine_function *
19838 arm_init_machine_status (void)
19840 struct machine_function *machine;
19841 machine = ggc_alloc_cleared_machine_function ();
19843 #if ARM_FT_UNKNOWN != 0
19844 machine->func_type = ARM_FT_UNKNOWN;
19849 /* Return an RTX indicating where the return address to the
19850 calling function can be found. */
19852 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19857 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19860 /* Do anything needed before RTL is emitted for each function. */
19862 arm_init_expanders (void)
19864 /* Arrange to initialize and mark the machine per-function status. */
19865 init_machine_status = arm_init_machine_status;
19867 /* This is to stop the combine pass optimizing away the alignment
19868 adjustment of va_arg. */
19869 /* ??? It is claimed that this should not be necessary. */
19871 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19875 /* Like arm_compute_initial_elimination offset. Simpler because there
19876 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19877 to point at the base of the local variables after static stack
19878 space for a function has been allocated. */
19881 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19883 arm_stack_offsets *offsets;
19885 offsets = arm_get_frame_offsets ();
19889 case ARG_POINTER_REGNUM:
19892 case STACK_POINTER_REGNUM:
19893 return offsets->outgoing_args - offsets->saved_args;
19895 case FRAME_POINTER_REGNUM:
19896 return offsets->soft_frame - offsets->saved_args;
19898 case ARM_HARD_FRAME_POINTER_REGNUM:
19899 return offsets->saved_regs - offsets->saved_args;
19901 case THUMB_HARD_FRAME_POINTER_REGNUM:
19902 return offsets->locals_base - offsets->saved_args;
19905 gcc_unreachable ();
19909 case FRAME_POINTER_REGNUM:
19912 case STACK_POINTER_REGNUM:
19913 return offsets->outgoing_args - offsets->soft_frame;
19915 case ARM_HARD_FRAME_POINTER_REGNUM:
19916 return offsets->saved_regs - offsets->soft_frame;
19918 case THUMB_HARD_FRAME_POINTER_REGNUM:
19919 return offsets->locals_base - offsets->soft_frame;
19922 gcc_unreachable ();
19927 gcc_unreachable ();
19931 /* Generate the rest of a function's prologue. */
19933 thumb1_expand_prologue (void)
19937 HOST_WIDE_INT amount;
19938 arm_stack_offsets *offsets;
19939 unsigned long func_type;
19941 unsigned long live_regs_mask;
19943 func_type = arm_current_func_type ();
19945 /* Naked functions don't have prologues. */
19946 if (IS_NAKED (func_type))
19949 if (IS_INTERRUPT (func_type))
19951 error ("interrupt Service Routines cannot be coded in Thumb mode");
19955 offsets = arm_get_frame_offsets ();
19956 live_regs_mask = offsets->saved_regs_mask;
19957 /* Load the pic register before setting the frame pointer,
19958 so we can use r7 as a temporary work register. */
19959 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19960 arm_load_pic_register (live_regs_mask);
19962 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19963 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19964 stack_pointer_rtx);
19966 amount = offsets->outgoing_args - offsets->saved_regs;
19967 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
19972 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19973 GEN_INT (- amount)));
19974 RTX_FRAME_RELATED_P (insn) = 1;
19980 /* The stack decrement is too big for an immediate value in a single
19981 insn. In theory we could issue multiple subtracts, but after
19982 three of them it becomes more space efficient to place the full
19983 value in the constant pool and load into a register. (Also the
19984 ARM debugger really likes to see only one stack decrement per
19985 function). So instead we look for a scratch register into which
19986 we can load the decrement, and then we subtract this from the
19987 stack pointer. Unfortunately on the thumb the only available
19988 scratch registers are the argument registers, and we cannot use
19989 these as they may hold arguments to the function. Instead we
19990 attempt to locate a call preserved register which is used by this
19991 function. If we can find one, then we know that it will have
19992 been pushed at the start of the prologue and so we can corrupt
19994 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19995 if (live_regs_mask & (1 << regno))
19998 gcc_assert(regno <= LAST_LO_REGNUM);
20000 reg = gen_rtx_REG (SImode, regno);
20002 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20004 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20005 stack_pointer_rtx, reg));
20006 RTX_FRAME_RELATED_P (insn) = 1;
20007 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20008 plus_constant (stack_pointer_rtx,
20010 RTX_FRAME_RELATED_P (dwarf) = 1;
20011 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20015 if (frame_pointer_needed)
20016 thumb_set_frame_pointer (offsets);
20018 /* If we are profiling, make sure no instructions are scheduled before
20019 the call to mcount. Similarly if the user has requested no
20020 scheduling in the prolog. Similarly if we want non-call exceptions
20021 using the EABI unwinder, to prevent faulting instructions from being
20022 swapped with a stack adjustment. */
20023 if (crtl->profile || !TARGET_SCHED_PROLOG
20024 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20025 emit_insn (gen_blockage ());
20027 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20028 if (live_regs_mask & 0xff)
20029 cfun->machine->lr_save_eliminated = 0;
20034 thumb1_expand_epilogue (void)
20036 HOST_WIDE_INT amount;
20037 arm_stack_offsets *offsets;
20040 /* Naked functions don't have prologues. */
20041 if (IS_NAKED (arm_current_func_type ()))
20044 offsets = arm_get_frame_offsets ();
20045 amount = offsets->outgoing_args - offsets->saved_regs;
20047 if (frame_pointer_needed)
20049 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20050 amount = offsets->locals_base - offsets->saved_regs;
20052 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20054 gcc_assert (amount >= 0);
20058 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20059 GEN_INT (amount)));
20062 /* r3 is always free in the epilogue. */
20063 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20065 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20066 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20070 /* Emit a USE (stack_pointer_rtx), so that
20071 the stack adjustment will not be deleted. */
20072 emit_insn (gen_prologue_use (stack_pointer_rtx));
20074 if (crtl->profile || !TARGET_SCHED_PROLOG)
20075 emit_insn (gen_blockage ());
20077 /* Emit a clobber for each insn that will be restored in the epilogue,
20078 so that flow2 will get register lifetimes correct. */
20079 for (regno = 0; regno < 13; regno++)
20080 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20081 emit_clobber (gen_rtx_REG (SImode, regno));
20083 if (! df_regs_ever_live_p (LR_REGNUM))
20084 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20088 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20090 arm_stack_offsets *offsets;
20091 unsigned long live_regs_mask = 0;
20092 unsigned long l_mask;
20093 unsigned high_regs_pushed = 0;
20094 int cfa_offset = 0;
20097 if (IS_NAKED (arm_current_func_type ()))
20100 if (is_called_in_ARM_mode (current_function_decl))
20104 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20105 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20107 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20109 /* Generate code sequence to switch us into Thumb mode. */
20110 /* The .code 32 directive has already been emitted by
20111 ASM_DECLARE_FUNCTION_NAME. */
20112 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20113 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20115 /* Generate a label, so that the debugger will notice the
20116 change in instruction sets. This label is also used by
20117 the assembler to bypass the ARM code when this function
20118 is called from a Thumb encoded function elsewhere in the
20119 same file. Hence the definition of STUB_NAME here must
20120 agree with the definition in gas/config/tc-arm.c. */
20122 #define STUB_NAME ".real_start_of"
20124 fprintf (f, "\t.code\t16\n");
20126 if (arm_dllexport_name_p (name))
20127 name = arm_strip_name_encoding (name);
20129 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20130 fprintf (f, "\t.thumb_func\n");
20131 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20134 if (crtl->args.pretend_args_size)
20136 /* Output unwind directive for the stack adjustment. */
20137 if (ARM_EABI_UNWIND_TABLES)
20138 fprintf (f, "\t.pad #%d\n",
20139 crtl->args.pretend_args_size);
20141 if (cfun->machine->uses_anonymous_args)
20145 fprintf (f, "\tpush\t{");
20147 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20149 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20150 regno <= LAST_ARG_REGNUM;
20152 asm_fprintf (f, "%r%s", regno,
20153 regno == LAST_ARG_REGNUM ? "" : ", ");
20155 fprintf (f, "}\n");
20158 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20159 SP_REGNUM, SP_REGNUM,
20160 crtl->args.pretend_args_size);
20162 /* We don't need to record the stores for unwinding (would it
20163 help the debugger any if we did?), but record the change in
20164 the stack pointer. */
20165 if (dwarf2out_do_frame ())
20167 char *l = dwarf2out_cfi_label (false);
20169 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20170 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20174 /* Get the registers we are going to push. */
20175 offsets = arm_get_frame_offsets ();
20176 live_regs_mask = offsets->saved_regs_mask;
20177 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20178 l_mask = live_regs_mask & 0x40ff;
20179 /* Then count how many other high registers will need to be pushed. */
20180 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20182 if (TARGET_BACKTRACE)
20185 unsigned work_register;
20187 /* We have been asked to create a stack backtrace structure.
20188 The code looks like this:
20192 0 sub SP, #16 Reserve space for 4 registers.
20193 2 push {R7} Push low registers.
20194 4 add R7, SP, #20 Get the stack pointer before the push.
20195 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20196 8 mov R7, PC Get hold of the start of this code plus 12.
20197 10 str R7, [SP, #16] Store it.
20198 12 mov R7, FP Get hold of the current frame pointer.
20199 14 str R7, [SP, #4] Store it.
20200 16 mov R7, LR Get hold of the current return address.
20201 18 str R7, [SP, #12] Store it.
20202 20 add R7, SP, #16 Point at the start of the backtrace structure.
20203 22 mov FP, R7 Put this value into the frame pointer. */
20205 work_register = thumb_find_work_register (live_regs_mask);
20207 if (ARM_EABI_UNWIND_TABLES)
20208 asm_fprintf (f, "\t.pad #16\n");
20211 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20212 SP_REGNUM, SP_REGNUM);
20214 if (dwarf2out_do_frame ())
20216 char *l = dwarf2out_cfi_label (false);
20218 cfa_offset = cfa_offset + 16;
20219 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20224 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20225 offset = bit_count (l_mask) * UNITS_PER_WORD;
20230 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20231 offset + 16 + crtl->args.pretend_args_size);
20233 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20236 /* Make sure that the instruction fetching the PC is in the right place
20237 to calculate "start of backtrace creation code + 12". */
20240 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20241 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20243 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20244 ARM_HARD_FRAME_POINTER_REGNUM);
20245 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20250 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20251 ARM_HARD_FRAME_POINTER_REGNUM);
20252 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20254 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20255 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20259 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20260 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20262 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20264 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20265 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20267 /* Optimization: If we are not pushing any low registers but we are going
20268 to push some high registers then delay our first push. This will just
20269 be a push of LR and we can combine it with the push of the first high
20271 else if ((l_mask & 0xff) != 0
20272 || (high_regs_pushed == 0 && l_mask))
20274 unsigned long mask = l_mask;
20275 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20276 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20279 if (high_regs_pushed)
20281 unsigned pushable_regs;
20282 unsigned next_hi_reg;
20284 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20285 if (live_regs_mask & (1 << next_hi_reg))
20288 pushable_regs = l_mask & 0xff;
20290 if (pushable_regs == 0)
20291 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20293 while (high_regs_pushed > 0)
20295 unsigned long real_regs_mask = 0;
20297 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20299 if (pushable_regs & (1 << regno))
20301 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20303 high_regs_pushed --;
20304 real_regs_mask |= (1 << next_hi_reg);
20306 if (high_regs_pushed)
20308 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20310 if (live_regs_mask & (1 << next_hi_reg))
20315 pushable_regs &= ~((1 << regno) - 1);
20321 /* If we had to find a work register and we have not yet
20322 saved the LR then add it to the list of regs to push. */
20323 if (l_mask == (1 << LR_REGNUM))
20325 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20327 real_regs_mask | (1 << LR_REGNUM));
20331 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20336 /* Handle the case of a double word load into a low register from
20337 a computed memory address. The computed address may involve a
20338 register which is overwritten by the load. */
20340 thumb_load_double_from_address (rtx *operands)
20348 gcc_assert (GET_CODE (operands[0]) == REG);
20349 gcc_assert (GET_CODE (operands[1]) == MEM);
20351 /* Get the memory address. */
20352 addr = XEXP (operands[1], 0);
20354 /* Work out how the memory address is computed. */
20355 switch (GET_CODE (addr))
20358 operands[2] = adjust_address (operands[1], SImode, 4);
20360 if (REGNO (operands[0]) == REGNO (addr))
20362 output_asm_insn ("ldr\t%H0, %2", operands);
20363 output_asm_insn ("ldr\t%0, %1", operands);
20367 output_asm_insn ("ldr\t%0, %1", operands);
20368 output_asm_insn ("ldr\t%H0, %2", operands);
20373 /* Compute <address> + 4 for the high order load. */
20374 operands[2] = adjust_address (operands[1], SImode, 4);
20376 output_asm_insn ("ldr\t%0, %1", operands);
20377 output_asm_insn ("ldr\t%H0, %2", operands);
20381 arg1 = XEXP (addr, 0);
20382 arg2 = XEXP (addr, 1);
20384 if (CONSTANT_P (arg1))
20385 base = arg2, offset = arg1;
20387 base = arg1, offset = arg2;
20389 gcc_assert (GET_CODE (base) == REG);
20391 /* Catch the case of <address> = <reg> + <reg> */
20392 if (GET_CODE (offset) == REG)
20394 int reg_offset = REGNO (offset);
20395 int reg_base = REGNO (base);
20396 int reg_dest = REGNO (operands[0]);
20398 /* Add the base and offset registers together into the
20399 higher destination register. */
20400 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20401 reg_dest + 1, reg_base, reg_offset);
20403 /* Load the lower destination register from the address in
20404 the higher destination register. */
20405 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20406 reg_dest, reg_dest + 1);
20408 /* Load the higher destination register from its own address
20410 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20411 reg_dest + 1, reg_dest + 1);
20415 /* Compute <address> + 4 for the high order load. */
20416 operands[2] = adjust_address (operands[1], SImode, 4);
20418 /* If the computed address is held in the low order register
20419 then load the high order register first, otherwise always
20420 load the low order register first. */
20421 if (REGNO (operands[0]) == REGNO (base))
20423 output_asm_insn ("ldr\t%H0, %2", operands);
20424 output_asm_insn ("ldr\t%0, %1", operands);
20428 output_asm_insn ("ldr\t%0, %1", operands);
20429 output_asm_insn ("ldr\t%H0, %2", operands);
20435 /* With no registers to worry about we can just load the value
20437 operands[2] = adjust_address (operands[1], SImode, 4);
20439 output_asm_insn ("ldr\t%H0, %2", operands);
20440 output_asm_insn ("ldr\t%0, %1", operands);
20444 gcc_unreachable ();
20451 thumb_output_move_mem_multiple (int n, rtx *operands)
20458 if (REGNO (operands[4]) > REGNO (operands[5]))
20461 operands[4] = operands[5];
20464 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20465 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20469 if (REGNO (operands[4]) > REGNO (operands[5]))
20472 operands[4] = operands[5];
20475 if (REGNO (operands[5]) > REGNO (operands[6]))
20478 operands[5] = operands[6];
20481 if (REGNO (operands[4]) > REGNO (operands[5]))
20484 operands[4] = operands[5];
20488 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20489 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20493 gcc_unreachable ();
20499 /* Output a call-via instruction for thumb state. */
20501 thumb_call_via_reg (rtx reg)
20503 int regno = REGNO (reg);
20506 gcc_assert (regno < LR_REGNUM);
20508 /* If we are in the normal text section we can use a single instance
20509 per compilation unit. If we are doing function sections, then we need
20510 an entry per section, since we can't rely on reachability. */
20511 if (in_section == text_section)
20513 thumb_call_reg_needed = 1;
20515 if (thumb_call_via_label[regno] == NULL)
20516 thumb_call_via_label[regno] = gen_label_rtx ();
20517 labelp = thumb_call_via_label + regno;
20521 if (cfun->machine->call_via[regno] == NULL)
20522 cfun->machine->call_via[regno] = gen_label_rtx ();
20523 labelp = cfun->machine->call_via + regno;
20526 output_asm_insn ("bl\t%a0", labelp);
20530 /* Routines for generating rtl. */
20532 thumb_expand_movmemqi (rtx *operands)
20534 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20535 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20536 HOST_WIDE_INT len = INTVAL (operands[2]);
20537 HOST_WIDE_INT offset = 0;
20541 emit_insn (gen_movmem12b (out, in, out, in));
20547 emit_insn (gen_movmem8b (out, in, out, in));
20553 rtx reg = gen_reg_rtx (SImode);
20554 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20555 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20562 rtx reg = gen_reg_rtx (HImode);
20563 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20564 plus_constant (in, offset))));
20565 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20573 rtx reg = gen_reg_rtx (QImode);
20574 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20575 plus_constant (in, offset))));
20576 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20582 thumb_reload_out_hi (rtx *operands)
20584 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20587 /* Handle reading a half-word from memory during reload. */
20589 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20591 gcc_unreachable ();
20594 /* Return the length of a function name prefix
20595 that starts with the character 'c'. */
20597 arm_get_strip_length (int c)
20601 ARM_NAME_ENCODING_LENGTHS
20606 /* Return a pointer to a function's name with any
20607 and all prefix encodings stripped from it. */
20609 arm_strip_name_encoding (const char *name)
20613 while ((skip = arm_get_strip_length (* name)))
20619 /* If there is a '*' anywhere in the name's prefix, then
20620 emit the stripped name verbatim, otherwise prepend an
20621 underscore if leading underscores are being used. */
20623 arm_asm_output_labelref (FILE *stream, const char *name)
20628 while ((skip = arm_get_strip_length (* name)))
20630 verbatim |= (*name == '*');
20635 fputs (name, stream);
20637 asm_fprintf (stream, "%U%s", name);
20641 arm_file_start (void)
20645 if (TARGET_UNIFIED_ASM)
20646 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20650 const char *fpu_name;
20651 if (arm_selected_arch)
20652 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20654 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20656 if (TARGET_SOFT_FLOAT)
20659 fpu_name = "softvfp";
20661 fpu_name = "softfpa";
20665 fpu_name = arm_fpu_desc->name;
20666 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20668 if (TARGET_HARD_FLOAT)
20669 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20670 if (TARGET_HARD_FLOAT_ABI)
20671 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20674 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20676 /* Some of these attributes only apply when the corresponding features
20677 are used. However we don't have any easy way of figuring this out.
20678 Conservatively record the setting that would have been used. */
20680 /* Tag_ABI_FP_rounding. */
20681 if (flag_rounding_math)
20682 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20683 if (!flag_unsafe_math_optimizations)
20685 /* Tag_ABI_FP_denomal. */
20686 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20687 /* Tag_ABI_FP_exceptions. */
20688 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20690 /* Tag_ABI_FP_user_exceptions. */
20691 if (flag_signaling_nans)
20692 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20693 /* Tag_ABI_FP_number_model. */
20694 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20695 flag_finite_math_only ? 1 : 3);
20697 /* Tag_ABI_align8_needed. */
20698 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20699 /* Tag_ABI_align8_preserved. */
20700 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20701 /* Tag_ABI_enum_size. */
20702 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20703 flag_short_enums ? 1 : 2);
20705 /* Tag_ABI_optimization_goals. */
20708 else if (optimize >= 2)
20714 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20716 /* Tag_ABI_FP_16bit_format. */
20717 if (arm_fp16_format)
20718 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20719 (int)arm_fp16_format);
20721 if (arm_lang_output_object_attributes_hook)
20722 arm_lang_output_object_attributes_hook();
20724 default_file_start();
20728 arm_file_end (void)
20732 if (NEED_INDICATE_EXEC_STACK)
20733 /* Add .note.GNU-stack. */
20734 file_end_indicate_exec_stack ();
20736 if (! thumb_call_reg_needed)
20739 switch_to_section (text_section);
20740 asm_fprintf (asm_out_file, "\t.code 16\n");
20741 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20743 for (regno = 0; regno < LR_REGNUM; regno++)
20745 rtx label = thumb_call_via_label[regno];
20749 targetm.asm_out.internal_label (asm_out_file, "L",
20750 CODE_LABEL_NUMBER (label));
20751 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20757 /* Symbols in the text segment can be accessed without indirecting via the
20758 constant pool; it may take an extra binary operation, but this is still
20759 faster than indirecting via memory. Don't do this when not optimizing,
20760 since we won't be calculating al of the offsets necessary to do this
20764 arm_encode_section_info (tree decl, rtx rtl, int first)
20766 if (optimize > 0 && TREE_CONSTANT (decl))
20767 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20769 default_encode_section_info (decl, rtl, first);
20771 #endif /* !ARM_PE */
20774 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20776 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20777 && !strcmp (prefix, "L"))
20779 arm_ccfsm_state = 0;
20780 arm_target_insn = NULL;
20782 default_internal_label (stream, prefix, labelno);
20785 /* Output code to add DELTA to the first argument, and then jump
20786 to FUNCTION. Used for C++ multiple inheritance. */
20788 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20789 HOST_WIDE_INT delta,
20790 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20793 static int thunk_label = 0;
20796 int mi_delta = delta;
20797 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20799 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20802 mi_delta = - mi_delta;
20806 int labelno = thunk_label++;
20807 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20808 /* Thunks are entered in arm mode when avaiable. */
20809 if (TARGET_THUMB1_ONLY)
20811 /* push r3 so we can use it as a temporary. */
20812 /* TODO: Omit this save if r3 is not used. */
20813 fputs ("\tpush {r3}\n", file);
20814 fputs ("\tldr\tr3, ", file);
20818 fputs ("\tldr\tr12, ", file);
20820 assemble_name (file, label);
20821 fputc ('\n', file);
20824 /* If we are generating PIC, the ldr instruction below loads
20825 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20826 the address of the add + 8, so we have:
20828 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20831 Note that we have "+ 1" because some versions of GNU ld
20832 don't set the low bit of the result for R_ARM_REL32
20833 relocations against thumb function symbols.
20834 On ARMv6M this is +4, not +8. */
20835 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20836 assemble_name (file, labelpc);
20837 fputs (":\n", file);
20838 if (TARGET_THUMB1_ONLY)
20840 /* This is 2 insns after the start of the thunk, so we know it
20841 is 4-byte aligned. */
20842 fputs ("\tadd\tr3, pc, r3\n", file);
20843 fputs ("\tmov r12, r3\n", file);
20846 fputs ("\tadd\tr12, pc, r12\n", file);
20848 else if (TARGET_THUMB1_ONLY)
20849 fputs ("\tmov r12, r3\n", file);
20851 if (TARGET_THUMB1_ONLY)
20853 if (mi_delta > 255)
20855 fputs ("\tldr\tr3, ", file);
20856 assemble_name (file, label);
20857 fputs ("+4\n", file);
20858 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20859 mi_op, this_regno, this_regno);
20861 else if (mi_delta != 0)
20863 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20864 mi_op, this_regno, this_regno,
20870 /* TODO: Use movw/movt for large constants when available. */
20871 while (mi_delta != 0)
20873 if ((mi_delta & (3 << shift)) == 0)
20877 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20878 mi_op, this_regno, this_regno,
20879 mi_delta & (0xff << shift));
20880 mi_delta &= ~(0xff << shift);
20887 if (TARGET_THUMB1_ONLY)
20888 fputs ("\tpop\t{r3}\n", file);
20890 fprintf (file, "\tbx\tr12\n");
20891 ASM_OUTPUT_ALIGN (file, 2);
20892 assemble_name (file, label);
20893 fputs (":\n", file);
20896 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20897 rtx tem = XEXP (DECL_RTL (function), 0);
20898 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20899 tem = gen_rtx_MINUS (GET_MODE (tem),
20901 gen_rtx_SYMBOL_REF (Pmode,
20902 ggc_strdup (labelpc)));
20903 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20906 /* Output ".word .LTHUNKn". */
20907 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20909 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20910 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20914 fputs ("\tb\t", file);
20915 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20916 if (NEED_PLT_RELOC)
20917 fputs ("(PLT)", file);
20918 fputc ('\n', file);
20923 arm_emit_vector_const (FILE *file, rtx x)
20926 const char * pattern;
20928 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20930 switch (GET_MODE (x))
20932 case V2SImode: pattern = "%08x"; break;
20933 case V4HImode: pattern = "%04x"; break;
20934 case V8QImode: pattern = "%02x"; break;
20935 default: gcc_unreachable ();
20938 fprintf (file, "0x");
20939 for (i = CONST_VECTOR_NUNITS (x); i--;)
20943 element = CONST_VECTOR_ELT (x, i);
20944 fprintf (file, pattern, INTVAL (element));
20950 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20951 HFmode constant pool entries are actually loaded with ldr. */
20953 arm_emit_fp16_const (rtx c)
20958 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20959 bits = real_to_target (NULL, &r, HFmode);
20960 if (WORDS_BIG_ENDIAN)
20961 assemble_zeros (2);
20962 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20963 if (!WORDS_BIG_ENDIAN)
20964 assemble_zeros (2);
20968 arm_output_load_gr (rtx *operands)
20975 if (GET_CODE (operands [1]) != MEM
20976 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20977 || GET_CODE (reg = XEXP (sum, 0)) != REG
20978 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20979 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20980 return "wldrw%?\t%0, %1";
20982 /* Fix up an out-of-range load of a GR register. */
20983 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20984 wcgr = operands[0];
20986 output_asm_insn ("ldr%?\t%0, %1", operands);
20988 operands[0] = wcgr;
20990 output_asm_insn ("tmcr%?\t%0, %1", operands);
20991 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20996 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20998 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20999 named arg and all anonymous args onto the stack.
21000 XXX I know the prologue shouldn't be pushing registers, but it is faster
21004 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21005 enum machine_mode mode,
21008 int second_time ATTRIBUTE_UNUSED)
21012 cfun->machine->uses_anonymous_args = 1;
21013 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21015 nregs = pcum->aapcs_ncrn;
21016 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21020 nregs = pcum->nregs;
21022 if (nregs < NUM_ARG_REGS)
21023 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21026 /* Return nonzero if the CONSUMER instruction (a store) does not need
21027 PRODUCER's value to calculate the address. */
21030 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21032 rtx value = PATTERN (producer);
21033 rtx addr = PATTERN (consumer);
21035 if (GET_CODE (value) == COND_EXEC)
21036 value = COND_EXEC_CODE (value);
21037 if (GET_CODE (value) == PARALLEL)
21038 value = XVECEXP (value, 0, 0);
21039 value = XEXP (value, 0);
21040 if (GET_CODE (addr) == COND_EXEC)
21041 addr = COND_EXEC_CODE (addr);
21042 if (GET_CODE (addr) == PARALLEL)
21043 addr = XVECEXP (addr, 0, 0);
21044 addr = XEXP (addr, 0);
21046 return !reg_overlap_mentioned_p (value, addr);
21049 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21050 have an early register shift value or amount dependency on the
21051 result of PRODUCER. */
21054 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21056 rtx value = PATTERN (producer);
21057 rtx op = PATTERN (consumer);
21060 if (GET_CODE (value) == COND_EXEC)
21061 value = COND_EXEC_CODE (value);
21062 if (GET_CODE (value) == PARALLEL)
21063 value = XVECEXP (value, 0, 0);
21064 value = XEXP (value, 0);
21065 if (GET_CODE (op) == COND_EXEC)
21066 op = COND_EXEC_CODE (op);
21067 if (GET_CODE (op) == PARALLEL)
21068 op = XVECEXP (op, 0, 0);
21071 early_op = XEXP (op, 0);
21072 /* This is either an actual independent shift, or a shift applied to
21073 the first operand of another operation. We want the whole shift
21075 if (GET_CODE (early_op) == REG)
21078 return !reg_overlap_mentioned_p (value, early_op);
21081 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21082 have an early register shift value dependency on the result of
21086 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21088 rtx value = PATTERN (producer);
21089 rtx op = PATTERN (consumer);
21092 if (GET_CODE (value) == COND_EXEC)
21093 value = COND_EXEC_CODE (value);
21094 if (GET_CODE (value) == PARALLEL)
21095 value = XVECEXP (value, 0, 0);
21096 value = XEXP (value, 0);
21097 if (GET_CODE (op) == COND_EXEC)
21098 op = COND_EXEC_CODE (op);
21099 if (GET_CODE (op) == PARALLEL)
21100 op = XVECEXP (op, 0, 0);
21103 early_op = XEXP (op, 0);
21105 /* This is either an actual independent shift, or a shift applied to
21106 the first operand of another operation. We want the value being
21107 shifted, in either case. */
21108 if (GET_CODE (early_op) != REG)
21109 early_op = XEXP (early_op, 0);
21111 return !reg_overlap_mentioned_p (value, early_op);
21114 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21115 have an early register mult dependency on the result of
21119 arm_no_early_mul_dep (rtx producer, rtx consumer)
21121 rtx value = PATTERN (producer);
21122 rtx op = PATTERN (consumer);
21124 if (GET_CODE (value) == COND_EXEC)
21125 value = COND_EXEC_CODE (value);
21126 if (GET_CODE (value) == PARALLEL)
21127 value = XVECEXP (value, 0, 0);
21128 value = XEXP (value, 0);
21129 if (GET_CODE (op) == COND_EXEC)
21130 op = COND_EXEC_CODE (op);
21131 if (GET_CODE (op) == PARALLEL)
21132 op = XVECEXP (op, 0, 0);
21135 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21137 if (GET_CODE (XEXP (op, 0)) == MULT)
21138 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21140 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21146 /* We can't rely on the caller doing the proper promotion when
21147 using APCS or ATPCS. */
21150 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21152 return !TARGET_AAPCS_BASED;
21155 static enum machine_mode
21156 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21157 enum machine_mode mode,
21158 int *punsignedp ATTRIBUTE_UNUSED,
21159 const_tree fntype ATTRIBUTE_UNUSED,
21160 int for_return ATTRIBUTE_UNUSED)
21162 if (GET_MODE_CLASS (mode) == MODE_INT
21163 && GET_MODE_SIZE (mode) < 4)
21169 /* AAPCS based ABIs use short enums by default. */
21172 arm_default_short_enums (void)
21174 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21178 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21181 arm_align_anon_bitfield (void)
21183 return TARGET_AAPCS_BASED;
21187 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21190 arm_cxx_guard_type (void)
21192 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21195 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21196 has an accumulator dependency on the result of the producer (a
21197 multiplication instruction) and no other dependency on that result. */
21199 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21201 rtx mul = PATTERN (producer);
21202 rtx mac = PATTERN (consumer);
21204 rtx mac_op0, mac_op1, mac_acc;
21206 if (GET_CODE (mul) == COND_EXEC)
21207 mul = COND_EXEC_CODE (mul);
21208 if (GET_CODE (mac) == COND_EXEC)
21209 mac = COND_EXEC_CODE (mac);
21211 /* Check that mul is of the form (set (...) (mult ...))
21212 and mla is of the form (set (...) (plus (mult ...) (...))). */
21213 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21214 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21215 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21218 mul_result = XEXP (mul, 0);
21219 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21220 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21221 mac_acc = XEXP (XEXP (mac, 1), 1);
21223 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21224 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21225 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21229 /* The EABI says test the least significant bit of a guard variable. */
21232 arm_cxx_guard_mask_bit (void)
21234 return TARGET_AAPCS_BASED;
21238 /* The EABI specifies that all array cookies are 8 bytes long. */
21241 arm_get_cookie_size (tree type)
21245 if (!TARGET_AAPCS_BASED)
21246 return default_cxx_get_cookie_size (type);
21248 size = build_int_cst (sizetype, 8);
21253 /* The EABI says that array cookies should also contain the element size. */
21256 arm_cookie_has_size (void)
21258 return TARGET_AAPCS_BASED;
21262 /* The EABI says constructors and destructors should return a pointer to
21263 the object constructed/destroyed. */
21266 arm_cxx_cdtor_returns_this (void)
21268 return TARGET_AAPCS_BASED;
21271 /* The EABI says that an inline function may never be the key
21275 arm_cxx_key_method_may_be_inline (void)
21277 return !TARGET_AAPCS_BASED;
21281 arm_cxx_determine_class_data_visibility (tree decl)
21283 if (!TARGET_AAPCS_BASED
21284 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21287 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21288 is exported. However, on systems without dynamic vague linkage,
21289 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21290 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21291 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21293 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21294 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21298 arm_cxx_class_data_always_comdat (void)
21300 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21301 vague linkage if the class has no key function. */
21302 return !TARGET_AAPCS_BASED;
21306 /* The EABI says __aeabi_atexit should be used to register static
21310 arm_cxx_use_aeabi_atexit (void)
21312 return TARGET_AAPCS_BASED;
21317 arm_set_return_address (rtx source, rtx scratch)
21319 arm_stack_offsets *offsets;
21320 HOST_WIDE_INT delta;
21322 unsigned long saved_regs;
21324 offsets = arm_get_frame_offsets ();
21325 saved_regs = offsets->saved_regs_mask;
21327 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21328 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21331 if (frame_pointer_needed)
21332 addr = plus_constant(hard_frame_pointer_rtx, -4);
21335 /* LR will be the first saved register. */
21336 delta = offsets->outgoing_args - (offsets->frame + 4);
21341 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21342 GEN_INT (delta & ~4095)));
21347 addr = stack_pointer_rtx;
21349 addr = plus_constant (addr, delta);
21351 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21357 thumb_set_return_address (rtx source, rtx scratch)
21359 arm_stack_offsets *offsets;
21360 HOST_WIDE_INT delta;
21361 HOST_WIDE_INT limit;
21364 unsigned long mask;
21368 offsets = arm_get_frame_offsets ();
21369 mask = offsets->saved_regs_mask;
21370 if (mask & (1 << LR_REGNUM))
21373 /* Find the saved regs. */
21374 if (frame_pointer_needed)
21376 delta = offsets->soft_frame - offsets->saved_args;
21377 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21383 delta = offsets->outgoing_args - offsets->saved_args;
21386 /* Allow for the stack frame. */
21387 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21389 /* The link register is always the first saved register. */
21392 /* Construct the address. */
21393 addr = gen_rtx_REG (SImode, reg);
21396 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21397 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21401 addr = plus_constant (addr, delta);
21403 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21406 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21409 /* Implements target hook vector_mode_supported_p. */
21411 arm_vector_mode_supported_p (enum machine_mode mode)
21413 /* Neon also supports V2SImode, etc. listed in the clause below. */
21414 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21415 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21418 if ((TARGET_NEON || TARGET_IWMMXT)
21419 && ((mode == V2SImode)
21420 || (mode == V4HImode)
21421 || (mode == V8QImode)))
21427 /* Implements target hook small_register_classes_for_mode_p. */
21429 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21431 return TARGET_THUMB1;
21434 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21435 ARM insns and therefore guarantee that the shift count is modulo 256.
21436 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21437 guarantee no particular behavior for out-of-range counts. */
21439 static unsigned HOST_WIDE_INT
21440 arm_shift_truncation_mask (enum machine_mode mode)
21442 return mode == SImode ? 255 : 0;
21446 /* Map internal gcc register numbers to DWARF2 register numbers. */
21449 arm_dbx_register_number (unsigned int regno)
21454 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21455 compatibility. The EABI defines them as registers 96-103. */
21456 if (IS_FPA_REGNUM (regno))
21457 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21459 if (IS_VFP_REGNUM (regno))
21461 /* See comment in arm_dwarf_register_span. */
21462 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21463 return 64 + regno - FIRST_VFP_REGNUM;
21465 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21468 if (IS_IWMMXT_GR_REGNUM (regno))
21469 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21471 if (IS_IWMMXT_REGNUM (regno))
21472 return 112 + regno - FIRST_IWMMXT_REGNUM;
21474 gcc_unreachable ();
21477 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21478 GCC models tham as 64 32-bit registers, so we need to describe this to
21479 the DWARF generation code. Other registers can use the default. */
21481 arm_dwarf_register_span (rtx rtl)
21488 regno = REGNO (rtl);
21489 if (!IS_VFP_REGNUM (regno))
21492 /* XXX FIXME: The EABI defines two VFP register ranges:
21493 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21495 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21496 corresponding D register. Until GDB supports this, we shall use the
21497 legacy encodings. We also use these encodings for D0-D15 for
21498 compatibility with older debuggers. */
21499 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21502 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21503 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21504 regno = (regno - FIRST_VFP_REGNUM) / 2;
21505 for (i = 0; i < nregs; i++)
21506 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21511 #ifdef TARGET_UNWIND_INFO
21512 /* Emit unwind directives for a store-multiple instruction or stack pointer
21513 push during alignment.
21514 These should only ever be generated by the function prologue code, so
21515 expect them to have a particular form. */
21518 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21521 HOST_WIDE_INT offset;
21522 HOST_WIDE_INT nregs;
21528 e = XVECEXP (p, 0, 0);
21529 if (GET_CODE (e) != SET)
21532 /* First insn will adjust the stack pointer. */
21533 if (GET_CODE (e) != SET
21534 || GET_CODE (XEXP (e, 0)) != REG
21535 || REGNO (XEXP (e, 0)) != SP_REGNUM
21536 || GET_CODE (XEXP (e, 1)) != PLUS)
21539 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21540 nregs = XVECLEN (p, 0) - 1;
21542 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21545 /* The function prologue may also push pc, but not annotate it as it is
21546 never restored. We turn this into a stack pointer adjustment. */
21547 if (nregs * 4 == offset - 4)
21549 fprintf (asm_out_file, "\t.pad #4\n");
21553 fprintf (asm_out_file, "\t.save {");
21555 else if (IS_VFP_REGNUM (reg))
21558 fprintf (asm_out_file, "\t.vsave {");
21560 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21562 /* FPA registers are done differently. */
21563 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21567 /* Unknown register type. */
21570 /* If the stack increment doesn't match the size of the saved registers,
21571 something has gone horribly wrong. */
21572 if (offset != nregs * reg_size)
21577 /* The remaining insns will describe the stores. */
21578 for (i = 1; i <= nregs; i++)
21580 /* Expect (set (mem <addr>) (reg)).
21581 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21582 e = XVECEXP (p, 0, i);
21583 if (GET_CODE (e) != SET
21584 || GET_CODE (XEXP (e, 0)) != MEM
21585 || GET_CODE (XEXP (e, 1)) != REG)
21588 reg = REGNO (XEXP (e, 1));
21593 fprintf (asm_out_file, ", ");
21594 /* We can't use %r for vfp because we need to use the
21595 double precision register names. */
21596 if (IS_VFP_REGNUM (reg))
21597 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21599 asm_fprintf (asm_out_file, "%r", reg);
21601 #ifdef ENABLE_CHECKING
21602 /* Check that the addresses are consecutive. */
21603 e = XEXP (XEXP (e, 0), 0);
21604 if (GET_CODE (e) == PLUS)
21606 offset += reg_size;
21607 if (GET_CODE (XEXP (e, 0)) != REG
21608 || REGNO (XEXP (e, 0)) != SP_REGNUM
21609 || GET_CODE (XEXP (e, 1)) != CONST_INT
21610 || offset != INTVAL (XEXP (e, 1)))
21614 || GET_CODE (e) != REG
21615 || REGNO (e) != SP_REGNUM)
21619 fprintf (asm_out_file, "}\n");
21622 /* Emit unwind directives for a SET. */
21625 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21633 switch (GET_CODE (e0))
21636 /* Pushing a single register. */
21637 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21638 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21639 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21642 asm_fprintf (asm_out_file, "\t.save ");
21643 if (IS_VFP_REGNUM (REGNO (e1)))
21644 asm_fprintf(asm_out_file, "{d%d}\n",
21645 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21647 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21651 if (REGNO (e0) == SP_REGNUM)
21653 /* A stack increment. */
21654 if (GET_CODE (e1) != PLUS
21655 || GET_CODE (XEXP (e1, 0)) != REG
21656 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21657 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21660 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21661 -INTVAL (XEXP (e1, 1)));
21663 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21665 HOST_WIDE_INT offset;
21667 if (GET_CODE (e1) == PLUS)
21669 if (GET_CODE (XEXP (e1, 0)) != REG
21670 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21672 reg = REGNO (XEXP (e1, 0));
21673 offset = INTVAL (XEXP (e1, 1));
21674 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21675 HARD_FRAME_POINTER_REGNUM, reg,
21678 else if (GET_CODE (e1) == REG)
21681 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21682 HARD_FRAME_POINTER_REGNUM, reg);
21687 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21689 /* Move from sp to reg. */
21690 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21692 else if (GET_CODE (e1) == PLUS
21693 && GET_CODE (XEXP (e1, 0)) == REG
21694 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21695 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21697 /* Set reg to offset from sp. */
21698 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21699 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21701 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21703 /* Stack pointer save before alignment. */
21705 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21718 /* Emit unwind directives for the given insn. */
21721 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21725 if (!ARM_EABI_UNWIND_TABLES)
21728 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21729 && (TREE_NOTHROW (current_function_decl)
21730 || crtl->all_throwers_are_sibcalls))
21733 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21736 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21738 pat = XEXP (pat, 0);
21740 pat = PATTERN (insn);
21742 switch (GET_CODE (pat))
21745 arm_unwind_emit_set (asm_out_file, pat);
21749 /* Store multiple. */
21750 arm_unwind_emit_sequence (asm_out_file, pat);
21759 /* Output a reference from a function exception table to the type_info
21760 object X. The EABI specifies that the symbol should be relocated by
21761 an R_ARM_TARGET2 relocation. */
21764 arm_output_ttype (rtx x)
21766 fputs ("\t.word\t", asm_out_file);
21767 output_addr_const (asm_out_file, x);
21768 /* Use special relocations for symbol references. */
21769 if (GET_CODE (x) != CONST_INT)
21770 fputs ("(TARGET2)", asm_out_file);
21771 fputc ('\n', asm_out_file);
21775 #endif /* TARGET_UNWIND_INFO */
21778 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21779 stack alignment. */
21782 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21784 rtx unspec = SET_SRC (pattern);
21785 gcc_assert (GET_CODE (unspec) == UNSPEC);
21789 case UNSPEC_STACK_ALIGN:
21790 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21791 put anything on the stack, so hopefully it won't matter.
21792 CFA = SP will be correct after alignment. */
21793 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21794 SET_DEST (pattern));
21797 gcc_unreachable ();
21802 /* Output unwind directives for the start/end of a function. */
21805 arm_output_fn_unwind (FILE * f, bool prologue)
21807 if (!ARM_EABI_UNWIND_TABLES)
21811 fputs ("\t.fnstart\n", f);
21814 /* If this function will never be unwound, then mark it as such.
21815 The came condition is used in arm_unwind_emit to suppress
21816 the frame annotations. */
21817 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21818 && (TREE_NOTHROW (current_function_decl)
21819 || crtl->all_throwers_are_sibcalls))
21820 fputs("\t.cantunwind\n", f);
21822 fputs ("\t.fnend\n", f);
21827 arm_emit_tls_decoration (FILE *fp, rtx x)
21829 enum tls_reloc reloc;
21832 val = XVECEXP (x, 0, 0);
21833 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21835 output_addr_const (fp, val);
21840 fputs ("(tlsgd)", fp);
21843 fputs ("(tlsldm)", fp);
21846 fputs ("(tlsldo)", fp);
21849 fputs ("(gottpoff)", fp);
21852 fputs ("(tpoff)", fp);
21855 gcc_unreachable ();
21863 fputs (" + (. - ", fp);
21864 output_addr_const (fp, XVECEXP (x, 0, 2));
21866 output_addr_const (fp, XVECEXP (x, 0, 3));
21876 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21879 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21881 gcc_assert (size == 4);
21882 fputs ("\t.word\t", file);
21883 output_addr_const (file, x);
21884 fputs ("(tlsldo)", file);
21888 arm_output_addr_const_extra (FILE *fp, rtx x)
21890 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21891 return arm_emit_tls_decoration (fp, x);
21892 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21895 int labelno = INTVAL (XVECEXP (x, 0, 0));
21897 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21898 assemble_name_raw (fp, label);
21902 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21904 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21908 output_addr_const (fp, XVECEXP (x, 0, 0));
21912 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21914 output_addr_const (fp, XVECEXP (x, 0, 0));
21918 output_addr_const (fp, XVECEXP (x, 0, 1));
21922 else if (GET_CODE (x) == CONST_VECTOR)
21923 return arm_emit_vector_const (fp, x);
21928 /* Output assembly for a shift instruction.
21929 SET_FLAGS determines how the instruction modifies the condition codes.
21930 0 - Do not set condition codes.
21931 1 - Set condition codes.
21932 2 - Use smallest instruction. */
21934 arm_output_shift(rtx * operands, int set_flags)
21937 static const char flag_chars[3] = {'?', '.', '!'};
21942 c = flag_chars[set_flags];
21943 if (TARGET_UNIFIED_ASM)
21945 shift = shift_op(operands[3], &val);
21949 operands[2] = GEN_INT(val);
21950 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21953 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21956 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21957 output_asm_insn (pattern, operands);
21961 /* Output a Thumb-1 casesi dispatch sequence. */
21963 thumb1_output_casesi (rtx *operands)
21965 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21967 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21969 switch (GET_MODE(diff_vec))
21972 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21973 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21975 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21976 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21978 return "bl\t%___gnu_thumb1_case_si";
21980 gcc_unreachable ();
21984 /* Output a Thumb-2 casesi instruction. */
21986 thumb2_output_casesi (rtx *operands)
21988 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21990 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21992 output_asm_insn ("cmp\t%0, %1", operands);
21993 output_asm_insn ("bhi\t%l3", operands);
21994 switch (GET_MODE(diff_vec))
21997 return "tbb\t[%|pc, %0]";
21999 return "tbh\t[%|pc, %0, lsl #1]";
22003 output_asm_insn ("adr\t%4, %l2", operands);
22004 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22005 output_asm_insn ("add\t%4, %4, %5", operands);
22010 output_asm_insn ("adr\t%4, %l2", operands);
22011 return "ldr\t%|pc, [%4, %0, lsl #2]";
22014 gcc_unreachable ();
22018 /* Most ARM cores are single issue, but some newer ones can dual issue.
22019 The scheduler descriptions rely on this being correct. */
22021 arm_issue_rate (void)
22036 /* A table and a function to perform ARM-specific name mangling for
22037 NEON vector types in order to conform to the AAPCS (see "Procedure
22038 Call Standard for the ARM Architecture", Appendix A). To qualify
22039 for emission with the mangled names defined in that document, a
22040 vector type must not only be of the correct mode but also be
22041 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22044 enum machine_mode mode;
22045 const char *element_type_name;
22046 const char *aapcs_name;
22047 } arm_mangle_map_entry;
22049 static arm_mangle_map_entry arm_mangle_map[] = {
22050 /* 64-bit containerized types. */
22051 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22052 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22053 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22054 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22055 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22056 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22057 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22058 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22059 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22060 /* 128-bit containerized types. */
22061 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22062 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22063 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22064 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22065 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22066 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22067 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22068 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22069 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22070 { VOIDmode, NULL, NULL }
22074 arm_mangle_type (const_tree type)
22076 arm_mangle_map_entry *pos = arm_mangle_map;
22078 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22079 has to be managled as if it is in the "std" namespace. */
22080 if (TARGET_AAPCS_BASED
22081 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22083 static bool warned;
22084 if (!warned && warn_psabi && !in_system_header)
22087 inform (input_location,
22088 "the mangling of %<va_list%> has changed in GCC 4.4");
22090 return "St9__va_list";
22093 /* Half-precision float. */
22094 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22097 if (TREE_CODE (type) != VECTOR_TYPE)
22100 /* Check the mode of the vector type, and the name of the vector
22101 element type, against the table. */
22102 while (pos->mode != VOIDmode)
22104 tree elt_type = TREE_TYPE (type);
22106 if (pos->mode == TYPE_MODE (type)
22107 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22108 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22109 pos->element_type_name))
22110 return pos->aapcs_name;
22115 /* Use the default mangling for unrecognized (possibly user-defined)
22120 /* Order of allocation of core registers for Thumb: this allocation is
22121 written over the corresponding initial entries of the array
22122 initialized with REG_ALLOC_ORDER. We allocate all low registers
22123 first. Saving and restoring a low register is usually cheaper than
22124 using a call-clobbered high register. */
22126 static const int thumb_core_reg_alloc_order[] =
22128 3, 2, 1, 0, 4, 5, 6, 7,
22129 14, 12, 8, 9, 10, 11, 13, 15
22132 /* Adjust register allocation order when compiling for Thumb. */
22135 arm_order_regs_for_local_alloc (void)
22137 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22138 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22140 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22141 sizeof (thumb_core_reg_alloc_order));
22144 /* Set default optimization options. */
22146 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22148 /* Enable section anchors by default at -O1 or higher.
22149 Use 2 to distinguish from an explicit -fsection-anchors
22150 given on the command line. */
22152 flag_section_anchors = 2;
22155 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22158 arm_frame_pointer_required (void)
22160 return (cfun->has_nonlocal_label
22161 || SUBTARGET_FRAME_POINTER_REQUIRED
22162 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22165 /* Only thumb1 can't support conditional execution, so return true if
22166 the target is not thumb1. */
22168 arm_have_conditional_execution (void)
22170 return !TARGET_THUMB1;
22173 #include "gt-arm.h"