1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
63 void (*arm_lang_output_object_attributes_hook)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx, int);
73 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
74 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
75 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
76 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
77 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
78 inline static int thumb1_index_register_rtx_p (rtx, int);
79 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
80 static int thumb_far_jump_used_p (void);
81 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
83 static rtx emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx, unsigned int, int);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
87 static arm_cc get_arm_condition_code (rtx);
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
89 static rtx is_jump_table (rtx);
90 static const char *output_multi_immediate (rtx *, const char *, const char *,
92 static const char *shift_op (rtx, HOST_WIDE_INT *);
93 static struct machine_function *arm_init_machine_status (void);
94 static void thumb_exit (FILE *, int);
95 static rtx is_jump_table (rtx);
96 static HOST_WIDE_INT get_jump_table_size (rtx);
97 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
98 static Mnode *add_minipool_forward_ref (Mfix *);
99 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
100 static Mnode *add_minipool_backward_ref (Mfix *);
101 static void assign_minipool_offsets (Mfix *);
102 static void arm_print_value (FILE *, rtx);
103 static void dump_minipool (rtx);
104 static int arm_barrier_cost (rtx);
105 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
106 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
107 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
109 static void arm_reorg (void);
110 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
111 static unsigned long arm_compute_save_reg0_reg12_mask (void);
112 static unsigned long arm_compute_save_reg_mask (void);
113 static unsigned long arm_isr_value (tree);
114 static unsigned long arm_compute_func_type (void);
115 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
116 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static int arm_comp_type_attributes (const_tree, const_tree);
125 static void arm_set_default_type_attributes (tree);
126 static int arm_adjust_cost (rtx, rtx, rtx, int);
127 static int count_insns_for_constant (HOST_WIDE_INT, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree, tree);
130 static enum machine_mode arm_promote_function_mode (const_tree,
131 enum machine_mode, int *,
133 static bool arm_return_in_memory (const_tree, const_tree);
134 static rtx arm_function_value (const_tree, const_tree, bool);
135 static rtx arm_libcall_value (enum machine_mode, const_rtx);
137 static void arm_internal_label (FILE *, const char *, unsigned long);
138 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
140 static bool arm_have_conditional_execution (void);
141 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
142 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
143 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_rtx_costs (rtx, int, int, int *, bool);
148 static int arm_address_cost (rtx, bool);
149 static bool arm_memory_load_p (rtx);
150 static bool arm_cirrus_insn_p (rtx);
151 static void cirrus_reorg (rtx);
152 static void arm_init_builtins (void);
153 static void arm_init_iwmmxt_builtins (void);
154 static rtx safe_vector_operand (rtx, enum machine_mode);
155 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
156 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
158 static void emit_constant_insn (rtx cond, rtx pattern);
159 static rtx emit_set_insn (rtx, rtx);
160 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
162 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
164 static int aapcs_select_return_coproc (const_tree, const_tree);
166 #ifdef OBJECT_FORMAT_ELF
167 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
168 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
171 static void arm_encode_section_info (tree, rtx, int);
174 static void arm_file_end (void);
175 static void arm_file_start (void);
177 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
179 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
180 enum machine_mode, const_tree, bool);
181 static bool arm_promote_prototypes (const_tree);
182 static bool arm_default_short_enums (void);
183 static bool arm_align_anon_bitfield (void);
184 static bool arm_return_in_msb (const_tree);
185 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
186 static bool arm_return_in_memory (const_tree, const_tree);
187 #ifdef TARGET_UNWIND_INFO
188 static void arm_unwind_emit (FILE *, rtx);
189 static bool arm_output_ttype (rtx);
191 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
192 static rtx arm_dwarf_register_span (rtx);
194 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree arm_get_cookie_size (tree);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr);
219 static bool arm_scalar_mode_supported_p (enum machine_mode);
220 static bool arm_frame_pointer_required (void);
221 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx, tree, rtx);
224 static rtx arm_trampoline_adjust_address (rtx);
225 static rtx arm_pic_static_addr (rtx orig, rtx reg);
228 /* Table of machine attributes. */
229 static const struct attribute_spec arm_attribute_table[] =
231 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
232 /* Function calls made to this symbol must be done indirectly, because
233 it may lie outside of the 26 bit addressing range of a normal function
235 { "long_call", 0, 0, false, true, true, NULL },
236 /* Whereas these functions are always known to reside within the 26 bit
238 { "short_call", 0, 0, false, true, true, NULL },
239 /* Specify the procedure call conventions for a function. */
240 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
241 /* Interrupt Service Routines have special prologue and epilogue requirements. */
242 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
243 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
246 /* ARM/PE has three new attributes:
248 dllexport - for exporting a function/variable that will live in a dll
249 dllimport - for importing a function/variable from a dll
251 Microsoft allows multiple declspecs in one __declspec, separating
252 them with spaces. We do NOT support this. Instead, use __declspec
255 { "dllimport", 0, 0, true, false, false, NULL },
256 { "dllexport", 0, 0, true, false, false, NULL },
257 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
258 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
259 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
263 { NULL, 0, 0, false, false, false, NULL }
266 /* Initialize the GCC target structure. */
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
268 #undef TARGET_MERGE_DECL_ATTRIBUTES
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
272 #undef TARGET_LEGITIMIZE_ADDRESS
273 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START arm_file_start
280 #undef TARGET_ASM_FILE_END
281 #define TARGET_ASM_FILE_END arm_file_end
283 #undef TARGET_ASM_ALIGNED_SI_OP
284 #define TARGET_ASM_ALIGNED_SI_OP NULL
285 #undef TARGET_ASM_INTEGER
286 #define TARGET_ASM_INTEGER arm_assemble_integer
288 #undef TARGET_ASM_FUNCTION_PROLOGUE
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294 #undef TARGET_DEFAULT_TARGET_FLAGS
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
296 #undef TARGET_HANDLE_OPTION
297 #define TARGET_HANDLE_OPTION arm_handle_option
299 #define TARGET_HELP arm_target_help
301 #undef TARGET_COMP_TYPE_ATTRIBUTES
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
305 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307 #undef TARGET_SCHED_ADJUST_COST
308 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310 #undef TARGET_ENCODE_SECTION_INFO
312 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
314 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
317 #undef TARGET_STRIP_NAME_ENCODING
318 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320 #undef TARGET_ASM_INTERNAL_LABEL
321 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
324 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326 #undef TARGET_FUNCTION_VALUE
327 #define TARGET_FUNCTION_VALUE arm_function_value
329 #undef TARGET_LIBCALL_VALUE
330 #define TARGET_LIBCALL_VALUE arm_libcall_value
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337 #undef TARGET_RTX_COSTS
338 #define TARGET_RTX_COSTS arm_rtx_costs
339 #undef TARGET_ADDRESS_COST
340 #define TARGET_ADDRESS_COST arm_address_cost
342 #undef TARGET_SHIFT_TRUNCATION_MASK
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347 #undef TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350 #undef TARGET_INIT_BUILTINS
351 #define TARGET_INIT_BUILTINS arm_init_builtins
352 #undef TARGET_EXPAND_BUILTIN
353 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355 #undef TARGET_INIT_LIBFUNCS
356 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358 #undef TARGET_PROMOTE_FUNCTION_MODE
359 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
360 #undef TARGET_PROMOTE_PROTOTYPES
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367 #undef TARGET_SETUP_INCOMING_VARARGS
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
371 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
374 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
375 #undef TARGET_TRAMPOLINE_INIT
376 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
377 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
378 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380 #undef TARGET_DEFAULT_SHORT_ENUMS
381 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383 #undef TARGET_ALIGN_ANON_BITFIELD
384 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386 #undef TARGET_NARROW_VOLATILE_BITFIELD
387 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389 #undef TARGET_CXX_GUARD_TYPE
390 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392 #undef TARGET_CXX_GUARD_MASK_BIT
393 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395 #undef TARGET_CXX_GET_COOKIE_SIZE
396 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398 #undef TARGET_CXX_COOKIE_HAS_SIZE
399 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401 #undef TARGET_CXX_CDTOR_RETURNS_THIS
402 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
405 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407 #undef TARGET_CXX_USE_AEABI_ATEXIT
408 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
411 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
412 arm_cxx_determine_class_data_visibility
414 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
415 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417 #undef TARGET_RETURN_IN_MSB
418 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423 #undef TARGET_MUST_PASS_IN_STACK
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426 #ifdef TARGET_UNWIND_INFO
427 #undef TARGET_UNWIND_EMIT
428 #define TARGET_UNWIND_EMIT arm_unwind_emit
430 /* EABI unwinding tables use a different format for the typeinfo tables. */
431 #undef TARGET_ASM_TTYPE
432 #define TARGET_ASM_TTYPE arm_output_ttype
434 #undef TARGET_ARM_EABI_UNWINDER
435 #define TARGET_ARM_EABI_UNWINDER true
436 #endif /* TARGET_UNWIND_INFO */
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441 #undef TARGET_DWARF_REGISTER_SPAN
442 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
444 #undef TARGET_CANNOT_COPY_INSN_P
445 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
448 #undef TARGET_HAVE_TLS
449 #define TARGET_HAVE_TLS true
452 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
453 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
455 #undef TARGET_CANNOT_FORCE_CONST_MEM
456 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
458 #undef TARGET_MAX_ANCHOR_OFFSET
459 #define TARGET_MAX_ANCHOR_OFFSET 4095
461 /* The minimum is set such that the total size of the block
462 for a particular anchor is -4088 + 1 + 4095 bytes, which is
463 divisible by eight, ensuring natural spacing of anchors. */
464 #undef TARGET_MIN_ANCHOR_OFFSET
465 #define TARGET_MIN_ANCHOR_OFFSET -4088
467 #undef TARGET_SCHED_ISSUE_RATE
468 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
470 #undef TARGET_MANGLE_TYPE
471 #define TARGET_MANGLE_TYPE arm_mangle_type
473 #undef TARGET_BUILD_BUILTIN_VA_LIST
474 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
475 #undef TARGET_EXPAND_BUILTIN_VA_START
476 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
477 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
478 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
481 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
482 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
485 #undef TARGET_LEGITIMATE_ADDRESS_P
486 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
488 #undef TARGET_INVALID_PARAMETER_TYPE
489 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
491 #undef TARGET_INVALID_RETURN_TYPE
492 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
494 #undef TARGET_PROMOTED_TYPE
495 #define TARGET_PROMOTED_TYPE arm_promoted_type
497 #undef TARGET_CONVERT_TO_TYPE
498 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
500 #undef TARGET_SCALAR_MODE_SUPPORTED_P
501 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
503 #undef TARGET_FRAME_POINTER_REQUIRED
504 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
506 #undef TARGET_CAN_ELIMINATE
507 #define TARGET_CAN_ELIMINATE arm_can_eliminate
509 struct gcc_target targetm = TARGET_INITIALIZER;
511 /* Obstack for minipool constant handling. */
512 static struct obstack minipool_obstack;
513 static char * minipool_startobj;
515 /* The maximum number of insns skipped which
516 will be conditionalised if possible. */
517 static int max_insns_skipped = 5;
519 extern FILE * asm_out_file;
521 /* True if we are currently building a constant table. */
522 int making_const_table;
524 /* The processor for which instructions should be scheduled. */
525 enum processor_type arm_tune = arm_none;
527 /* The current tuning set. */
528 const struct tune_params *current_tune;
530 /* The default processor used if not overridden by commandline. */
531 static enum processor_type arm_default_cpu = arm_none;
533 /* Which floating point hardware to schedule for. */
536 /* Which floating popint hardware to use. */
537 const struct arm_fpu_desc *arm_fpu_desc;
539 /* Whether to use floating point hardware. */
540 enum float_abi_type arm_float_abi;
542 /* Which __fp16 format to use. */
543 enum arm_fp16_format_type arm_fp16_format;
545 /* Which ABI to use. */
546 enum arm_abi_type arm_abi;
548 /* Which thread pointer model to use. */
549 enum arm_tp_type target_thread_pointer = TP_AUTO;
551 /* Used to parse -mstructure_size_boundary command line option. */
552 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
554 /* Used for Thumb call_via trampolines. */
555 rtx thumb_call_via_label[14];
556 static int thumb_call_reg_needed;
558 /* Bit values used to identify processor capabilities. */
559 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
560 #define FL_ARCH3M (1 << 1) /* Extended multiply */
561 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
562 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
563 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
564 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
565 #define FL_THUMB (1 << 6) /* Thumb aware */
566 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
567 #define FL_STRONG (1 << 8) /* StrongARM */
568 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
569 #define FL_XSCALE (1 << 10) /* XScale */
570 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
571 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
572 media instructions. */
573 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
574 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
575 Note: ARM6 & 7 derivatives only. */
576 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
577 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
578 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
580 #define FL_DIV (1 << 18) /* Hardware divide. */
581 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
582 #define FL_NEON (1 << 20) /* Neon instructions. */
583 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
586 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
588 #define FL_FOR_ARCH2 FL_NOTM
589 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
590 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
591 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
592 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
593 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
594 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
595 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
596 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
597 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
598 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
599 #define FL_FOR_ARCH6J FL_FOR_ARCH6
600 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
601 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
602 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
603 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
604 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
605 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
606 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
607 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
608 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
609 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
611 /* The bits in this mask specify which
612 instructions we are allowed to generate. */
613 static unsigned long insn_flags = 0;
615 /* The bits in this mask specify which instruction scheduling options should
617 static unsigned long tune_flags = 0;
619 /* The following are used in the arm.md file as equivalents to bits
620 in the above two flag variables. */
622 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
625 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
628 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
631 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
634 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
637 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
640 /* Nonzero if this chip supports the ARM 6K extensions. */
643 /* Nonzero if instructions not present in the 'M' profile can be used. */
644 int arm_arch_notm = 0;
646 /* Nonzero if instructions present in ARMv7E-M can be used. */
649 /* Nonzero if this chip can benefit from load scheduling. */
650 int arm_ld_sched = 0;
652 /* Nonzero if this chip is a StrongARM. */
653 int arm_tune_strongarm = 0;
655 /* Nonzero if this chip is a Cirrus variant. */
656 int arm_arch_cirrus = 0;
658 /* Nonzero if this chip supports Intel Wireless MMX technology. */
659 int arm_arch_iwmmxt = 0;
661 /* Nonzero if this chip is an XScale. */
662 int arm_arch_xscale = 0;
664 /* Nonzero if tuning for XScale */
665 int arm_tune_xscale = 0;
667 /* Nonzero if we want to tune for stores that access the write-buffer.
668 This typically means an ARM6 or ARM7 with MMU or MPU. */
669 int arm_tune_wbuf = 0;
671 /* Nonzero if tuning for Cortex-A9. */
672 int arm_tune_cortex_a9 = 0;
674 /* Nonzero if generating Thumb instructions. */
677 /* Nonzero if we should define __THUMB_INTERWORK__ in the
679 XXX This is a bit of a hack, it's intended to help work around
680 problems in GLD which doesn't understand that armv5t code is
681 interworking clean. */
682 int arm_cpp_interwork = 0;
684 /* Nonzero if chip supports Thumb 2. */
687 /* Nonzero if chip supports integer division instruction. */
690 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
691 must report the mode of the memory reference from PRINT_OPERAND to
692 PRINT_OPERAND_ADDRESS. */
693 enum machine_mode output_memory_reference_mode;
695 /* The register number to be used for the PIC offset register. */
696 unsigned arm_pic_register = INVALID_REGNUM;
698 /* Set to 1 after arm_reorg has started. Reset to start at the start of
699 the next function. */
700 static int after_arm_reorg = 0;
702 static enum arm_pcs arm_pcs_default;
704 /* For an explanation of these variables, see final_prescan_insn below. */
706 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
707 enum arm_cond_code arm_current_cc;
709 int arm_target_label;
710 /* The number of conditionally executed insns, including the current insn. */
711 int arm_condexec_count = 0;
712 /* A bitmask specifying the patterns for the IT block.
713 Zero means do not output an IT block before this insn. */
714 int arm_condexec_mask = 0;
715 /* The number of bits used in arm_condexec_mask. */
716 int arm_condexec_masklen = 0;
718 /* The condition codes of the ARM, and the inverse function. */
719 static const char * const arm_condition_codes[] =
721 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
722 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
725 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
726 #define streq(string1, string2) (strcmp (string1, string2) == 0)
728 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
729 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
730 | (1 << PIC_OFFSET_TABLE_REGNUM)))
732 /* Initialization code. */
736 const char *const name;
737 enum processor_type core;
739 const unsigned long flags;
740 const struct tune_params *const tune;
743 const struct tune_params arm_slowmul_tune =
745 arm_slowmul_rtx_costs,
749 const struct tune_params arm_fastmul_tune =
751 arm_fastmul_rtx_costs,
755 const struct tune_params arm_xscale_tune =
757 arm_xscale_rtx_costs,
761 const struct tune_params arm_9e_tune =
767 /* Not all of these give usefully different compilation alternatives,
768 but there is no simple way of generalizing them. */
769 static const struct processors all_cores[] =
772 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
773 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
774 #include "arm-cores.def"
776 {NULL, arm_none, NULL, 0, NULL}
779 static const struct processors all_architectures[] =
781 /* ARM Architectures */
782 /* We don't specify tuning costs here as it will be figured out
785 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
786 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
787 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
788 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
789 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
790 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
791 implementations that support it, so we will leave it out for now. */
792 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
793 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
794 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
795 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
796 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
797 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
798 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
799 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
800 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
801 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
802 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
803 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
804 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
805 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
806 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
807 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
808 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
809 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
810 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
811 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
812 {NULL, arm_none, NULL, 0 , NULL}
815 struct arm_cpu_select
819 const struct processors * processors;
822 /* This is a magic structure. The 'string' field is magically filled in
823 with a pointer to the value specified by the user on the command line
824 assuming that the user has specified such a value. */
826 static struct arm_cpu_select arm_select[] =
828 /* string name processors */
829 { NULL, "-mcpu=", all_cores },
830 { NULL, "-march=", all_architectures },
831 { NULL, "-mtune=", all_cores }
834 /* Defines representing the indexes into the above table. */
835 #define ARM_OPT_SET_CPU 0
836 #define ARM_OPT_SET_ARCH 1
837 #define ARM_OPT_SET_TUNE 2
839 /* The name of the preprocessor macro to define for this architecture. */
841 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
843 /* Available values for -mfpu=. */
845 static const struct arm_fpu_desc all_fpus[] =
847 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
848 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
849 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
850 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
851 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
852 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
853 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
854 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
855 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
856 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
857 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
858 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
859 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
860 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
861 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
862 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
863 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
864 /* Compatibility aliases. */
865 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
872 enum float_abi_type abi_type;
876 /* Available values for -mfloat-abi=. */
878 static const struct float_abi all_float_abis[] =
880 {"soft", ARM_FLOAT_ABI_SOFT},
881 {"softfp", ARM_FLOAT_ABI_SOFTFP},
882 {"hard", ARM_FLOAT_ABI_HARD}
889 enum arm_fp16_format_type fp16_format_type;
893 /* Available values for -mfp16-format=. */
895 static const struct fp16_format all_fp16_formats[] =
897 {"none", ARM_FP16_FORMAT_NONE},
898 {"ieee", ARM_FP16_FORMAT_IEEE},
899 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
906 enum arm_abi_type abi_type;
910 /* Available values for -mabi=. */
912 static const struct abi_name arm_all_abis[] =
914 {"apcs-gnu", ARM_ABI_APCS},
915 {"atpcs", ARM_ABI_ATPCS},
916 {"aapcs", ARM_ABI_AAPCS},
917 {"iwmmxt", ARM_ABI_IWMMXT},
918 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
921 /* Supported TLS relocations. */
931 /* The maximum number of insns to be used when loading a constant. */
933 arm_constant_limit (bool size_p)
935 return size_p ? 1 : current_tune->constant_limit;
938 /* Emit an insn that's a simple single-set. Both the operands must be known
941 emit_set_insn (rtx x, rtx y)
943 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
946 /* Return the number of bits set in VALUE. */
948 bit_count (unsigned long value)
950 unsigned long count = 0;
955 value &= value - 1; /* Clear the least-significant set bit. */
961 /* Set up library functions unique to ARM. */
964 arm_init_libfuncs (void)
966 /* There are no special library functions unless we are using the
971 /* The functions below are described in Section 4 of the "Run-Time
972 ABI for the ARM architecture", Version 1.0. */
974 /* Double-precision floating-point arithmetic. Table 2. */
975 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
976 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
977 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
978 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
979 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
981 /* Double-precision comparisons. Table 3. */
982 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
983 set_optab_libfunc (ne_optab, DFmode, NULL);
984 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
985 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
986 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
987 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
988 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
990 /* Single-precision floating-point arithmetic. Table 4. */
991 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
992 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
993 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
994 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
995 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
997 /* Single-precision comparisons. Table 5. */
998 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
999 set_optab_libfunc (ne_optab, SFmode, NULL);
1000 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1001 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1002 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1003 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1004 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1006 /* Floating-point to integer conversions. Table 6. */
1007 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1008 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1009 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1010 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1011 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1012 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1013 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1014 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1016 /* Conversions between floating types. Table 7. */
1017 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1018 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1020 /* Integer to floating-point conversions. Table 8. */
1021 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1022 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1023 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1024 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1025 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1026 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1027 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1028 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1030 /* Long long. Table 9. */
1031 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1032 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1033 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1034 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1035 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1036 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1037 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1038 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1040 /* Integer (32/32->32) division. \S 4.3.1. */
1041 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1042 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1044 /* The divmod functions are designed so that they can be used for
1045 plain division, even though they return both the quotient and the
1046 remainder. The quotient is returned in the usual location (i.e.,
1047 r0 for SImode, {r0, r1} for DImode), just as would be expected
1048 for an ordinary division routine. Because the AAPCS calling
1049 conventions specify that all of { r0, r1, r2, r3 } are
1050 callee-saved registers, there is no need to tell the compiler
1051 explicitly that those registers are clobbered by these
1053 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1054 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1056 /* For SImode division the ABI provides div-without-mod routines,
1057 which are faster. */
1058 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1059 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1061 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1062 divmod libcalls instead. */
1063 set_optab_libfunc (smod_optab, DImode, NULL);
1064 set_optab_libfunc (umod_optab, DImode, NULL);
1065 set_optab_libfunc (smod_optab, SImode, NULL);
1066 set_optab_libfunc (umod_optab, SImode, NULL);
1068 /* Half-precision float operations. The compiler handles all operations
1069 with NULL libfuncs by converting the SFmode. */
1070 switch (arm_fp16_format)
1072 case ARM_FP16_FORMAT_IEEE:
1073 case ARM_FP16_FORMAT_ALTERNATIVE:
1076 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1077 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1079 : "__gnu_f2h_alternative"));
1080 set_conv_libfunc (sext_optab, SFmode, HFmode,
1081 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1083 : "__gnu_h2f_alternative"));
1086 set_optab_libfunc (add_optab, HFmode, NULL);
1087 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1088 set_optab_libfunc (smul_optab, HFmode, NULL);
1089 set_optab_libfunc (neg_optab, HFmode, NULL);
1090 set_optab_libfunc (sub_optab, HFmode, NULL);
1093 set_optab_libfunc (eq_optab, HFmode, NULL);
1094 set_optab_libfunc (ne_optab, HFmode, NULL);
1095 set_optab_libfunc (lt_optab, HFmode, NULL);
1096 set_optab_libfunc (le_optab, HFmode, NULL);
1097 set_optab_libfunc (ge_optab, HFmode, NULL);
1098 set_optab_libfunc (gt_optab, HFmode, NULL);
1099 set_optab_libfunc (unord_optab, HFmode, NULL);
1106 if (TARGET_AAPCS_BASED)
1107 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1110 /* On AAPCS systems, this is the "struct __va_list". */
1111 static GTY(()) tree va_list_type;
1113 /* Return the type to use as __builtin_va_list. */
1115 arm_build_builtin_va_list (void)
1120 if (!TARGET_AAPCS_BASED)
1121 return std_build_builtin_va_list ();
1123 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1131 The C Library ABI further reinforces this definition in \S
1134 We must follow this definition exactly. The structure tag
1135 name is visible in C++ mangled names, and thus forms a part
1136 of the ABI. The field name may be used by people who
1137 #include <stdarg.h>. */
1138 /* Create the type. */
1139 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1140 /* Give it the required name. */
1141 va_list_name = build_decl (BUILTINS_LOCATION,
1143 get_identifier ("__va_list"),
1145 DECL_ARTIFICIAL (va_list_name) = 1;
1146 TYPE_NAME (va_list_type) = va_list_name;
1147 /* Create the __ap field. */
1148 ap_field = build_decl (BUILTINS_LOCATION,
1150 get_identifier ("__ap"),
1152 DECL_ARTIFICIAL (ap_field) = 1;
1153 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1154 TYPE_FIELDS (va_list_type) = ap_field;
1155 /* Compute its layout. */
1156 layout_type (va_list_type);
1158 return va_list_type;
1161 /* Return an expression of type "void *" pointing to the next
1162 available argument in a variable-argument list. VALIST is the
1163 user-level va_list object, of type __builtin_va_list. */
1165 arm_extract_valist_ptr (tree valist)
1167 if (TREE_TYPE (valist) == error_mark_node)
1168 return error_mark_node;
1170 /* On an AAPCS target, the pointer is stored within "struct
1172 if (TARGET_AAPCS_BASED)
1174 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1175 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1176 valist, ap_field, NULL_TREE);
1182 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1184 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1186 valist = arm_extract_valist_ptr (valist);
1187 std_expand_builtin_va_start (valist, nextarg);
1190 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1192 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1195 valist = arm_extract_valist_ptr (valist);
1196 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1199 /* Implement TARGET_HANDLE_OPTION. */
1202 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1207 arm_select[1].string = arg;
1211 arm_select[0].string = arg;
1214 case OPT_mhard_float:
1215 target_float_abi_name = "hard";
1218 case OPT_msoft_float:
1219 target_float_abi_name = "soft";
1223 arm_select[2].string = arg;
1232 arm_target_help (void)
1235 static int columns = 0;
1238 /* If we have not done so already, obtain the desired maximum width of
1239 the output. Note - this is a duplication of the code at the start of
1240 gcc/opts.c:print_specific_help() - the two copies should probably be
1241 replaced by a single function. */
1246 GET_ENVIRONMENT (p, "COLUMNS");
1249 int value = atoi (p);
1256 /* Use a reasonable default. */
1260 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1262 /* The - 2 is because we know that the last entry in the array is NULL. */
1263 i = ARRAY_SIZE (all_cores) - 2;
1265 printf (" %s", all_cores[i].name);
1266 remaining = columns - (strlen (all_cores[i].name) + 4);
1267 gcc_assert (remaining >= 0);
1271 int len = strlen (all_cores[i].name);
1273 if (remaining > len + 2)
1275 printf (", %s", all_cores[i].name);
1276 remaining -= len + 2;
1282 printf ("\n %s", all_cores[i].name);
1283 remaining = columns - (len + 4);
1287 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1289 i = ARRAY_SIZE (all_architectures) - 2;
1292 printf (" %s", all_architectures[i].name);
1293 remaining = columns - (strlen (all_architectures[i].name) + 4);
1294 gcc_assert (remaining >= 0);
1298 int len = strlen (all_architectures[i].name);
1300 if (remaining > len + 2)
1302 printf (", %s", all_architectures[i].name);
1303 remaining -= len + 2;
1309 printf ("\n %s", all_architectures[i].name);
1310 remaining = columns - (len + 4);
1317 /* Fix up any incompatible options that the user has specified.
1318 This has now turned into a maze. */
1320 arm_override_options (void)
1323 enum processor_type target_arch_cpu = arm_none;
1324 enum processor_type selected_cpu = arm_none;
1326 /* Set up the flags based on the cpu/architecture selected by the user. */
1327 for (i = ARRAY_SIZE (arm_select); i--;)
1329 struct arm_cpu_select * ptr = arm_select + i;
1331 if (ptr->string != NULL && ptr->string[0] != '\0')
1333 const struct processors * sel;
1335 for (sel = ptr->processors; sel->name != NULL; sel++)
1336 if (streq (ptr->string, sel->name))
1338 /* Set the architecture define. */
1339 if (i != ARM_OPT_SET_TUNE)
1340 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1342 /* Determine the processor core for which we should
1343 tune code-generation. */
1344 if (/* -mcpu= is a sensible default. */
1345 i == ARM_OPT_SET_CPU
1346 /* -mtune= overrides -mcpu= and -march=. */
1347 || i == ARM_OPT_SET_TUNE)
1348 arm_tune = (enum processor_type) (sel - ptr->processors);
1350 /* Remember the CPU associated with this architecture.
1351 If no other option is used to set the CPU type,
1352 we'll use this to guess the most suitable tuning
1354 if (i == ARM_OPT_SET_ARCH)
1355 target_arch_cpu = sel->core;
1357 if (i == ARM_OPT_SET_CPU)
1358 selected_cpu = (enum processor_type) (sel - ptr->processors);
1360 if (i != ARM_OPT_SET_TUNE)
1362 /* If we have been given an architecture and a processor
1363 make sure that they are compatible. We only generate
1364 a warning though, and we prefer the CPU over the
1366 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1367 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1370 insn_flags = sel->flags;
1376 if (sel->name == NULL)
1377 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1381 /* Guess the tuning options from the architecture if necessary. */
1382 if (arm_tune == arm_none)
1383 arm_tune = target_arch_cpu;
1385 /* If the user did not specify a processor, choose one for them. */
1386 if (insn_flags == 0)
1388 const struct processors * sel;
1389 unsigned int sought;
1391 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1392 if (selected_cpu == arm_none)
1394 #ifdef SUBTARGET_CPU_DEFAULT
1395 /* Use the subtarget default CPU if none was specified by
1397 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1399 /* Default to ARM6. */
1400 if (selected_cpu == arm_none)
1401 selected_cpu = arm6;
1403 sel = &all_cores[selected_cpu];
1405 insn_flags = sel->flags;
1407 /* Now check to see if the user has specified some command line
1408 switch that require certain abilities from the cpu. */
1411 if (TARGET_INTERWORK || TARGET_THUMB)
1413 sought |= (FL_THUMB | FL_MODE32);
1415 /* There are no ARM processors that support both APCS-26 and
1416 interworking. Therefore we force FL_MODE26 to be removed
1417 from insn_flags here (if it was set), so that the search
1418 below will always be able to find a compatible processor. */
1419 insn_flags &= ~FL_MODE26;
1422 if (sought != 0 && ((sought & insn_flags) != sought))
1424 /* Try to locate a CPU type that supports all of the abilities
1425 of the default CPU, plus the extra abilities requested by
1427 for (sel = all_cores; sel->name != NULL; sel++)
1428 if ((sel->flags & sought) == (sought | insn_flags))
1431 if (sel->name == NULL)
1433 unsigned current_bit_count = 0;
1434 const struct processors * best_fit = NULL;
1436 /* Ideally we would like to issue an error message here
1437 saying that it was not possible to find a CPU compatible
1438 with the default CPU, but which also supports the command
1439 line options specified by the programmer, and so they
1440 ought to use the -mcpu=<name> command line option to
1441 override the default CPU type.
1443 If we cannot find a cpu that has both the
1444 characteristics of the default cpu and the given
1445 command line options we scan the array again looking
1446 for a best match. */
1447 for (sel = all_cores; sel->name != NULL; sel++)
1448 if ((sel->flags & sought) == sought)
1452 count = bit_count (sel->flags & insn_flags);
1454 if (count >= current_bit_count)
1457 current_bit_count = count;
1461 gcc_assert (best_fit);
1465 insn_flags = sel->flags;
1467 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1468 arm_default_cpu = (enum processor_type) (sel - all_cores);
1469 if (arm_tune == arm_none)
1470 arm_tune = arm_default_cpu;
1473 /* The processor for which we should tune should now have been
1475 gcc_assert (arm_tune != arm_none);
1477 tune_flags = all_cores[(int)arm_tune].flags;
1478 current_tune = all_cores[(int)arm_tune].tune;
1480 if (target_fp16_format_name)
1482 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1484 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1486 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1490 if (i == ARRAY_SIZE (all_fp16_formats))
1491 error ("invalid __fp16 format option: -mfp16-format=%s",
1492 target_fp16_format_name);
1495 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1497 if (target_abi_name)
1499 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1501 if (streq (arm_all_abis[i].name, target_abi_name))
1503 arm_abi = arm_all_abis[i].abi_type;
1507 if (i == ARRAY_SIZE (arm_all_abis))
1508 error ("invalid ABI option: -mabi=%s", target_abi_name);
1511 arm_abi = ARM_DEFAULT_ABI;
1513 /* Make sure that the processor choice does not conflict with any of the
1514 other command line choices. */
1515 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1516 error ("target CPU does not support ARM mode");
1518 /* BPABI targets use linker tricks to allow interworking on cores
1519 without thumb support. */
1520 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1522 warning (0, "target CPU does not support interworking" );
1523 target_flags &= ~MASK_INTERWORK;
1526 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1528 warning (0, "target CPU does not support THUMB instructions");
1529 target_flags &= ~MASK_THUMB;
1532 if (TARGET_APCS_FRAME && TARGET_THUMB)
1534 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1535 target_flags &= ~MASK_APCS_FRAME;
1538 /* Callee super interworking implies thumb interworking. Adding
1539 this to the flags here simplifies the logic elsewhere. */
1540 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1541 target_flags |= MASK_INTERWORK;
1543 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1544 from here where no function is being compiled currently. */
1545 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1546 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1548 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1549 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1551 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1552 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1554 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1556 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1557 target_flags |= MASK_APCS_FRAME;
1560 if (TARGET_POKE_FUNCTION_NAME)
1561 target_flags |= MASK_APCS_FRAME;
1563 if (TARGET_APCS_REENT && flag_pic)
1564 error ("-fpic and -mapcs-reent are incompatible");
1566 if (TARGET_APCS_REENT)
1567 warning (0, "APCS reentrant code not supported. Ignored");
1569 /* If this target is normally configured to use APCS frames, warn if they
1570 are turned off and debugging is turned on. */
1572 && write_symbols != NO_DEBUG
1573 && !TARGET_APCS_FRAME
1574 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1575 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1577 if (TARGET_APCS_FLOAT)
1578 warning (0, "passing floating point arguments in fp regs not yet supported");
1580 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1581 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1582 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1583 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1584 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1585 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1586 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1587 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1588 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1589 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1590 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1591 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1592 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1594 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1595 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1596 thumb_code = (TARGET_ARM == 0);
1597 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1598 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1599 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1600 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1601 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1603 /* If we are not using the default (ARM mode) section anchor offset
1604 ranges, then set the correct ranges now. */
1607 /* Thumb-1 LDR instructions cannot have negative offsets.
1608 Permissible positive offset ranges are 5-bit (for byte loads),
1609 6-bit (for halfword loads), or 7-bit (for word loads).
1610 Empirical results suggest a 7-bit anchor range gives the best
1611 overall code size. */
1612 targetm.min_anchor_offset = 0;
1613 targetm.max_anchor_offset = 127;
1615 else if (TARGET_THUMB2)
1617 /* The minimum is set such that the total size of the block
1618 for a particular anchor is 248 + 1 + 4095 bytes, which is
1619 divisible by eight, ensuring natural spacing of anchors. */
1620 targetm.min_anchor_offset = -248;
1621 targetm.max_anchor_offset = 4095;
1624 /* V5 code we generate is completely interworking capable, so we turn off
1625 TARGET_INTERWORK here to avoid many tests later on. */
1627 /* XXX However, we must pass the right pre-processor defines to CPP
1628 or GLD can get confused. This is a hack. */
1629 if (TARGET_INTERWORK)
1630 arm_cpp_interwork = 1;
1633 target_flags &= ~MASK_INTERWORK;
1635 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1636 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1638 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1639 error ("iwmmxt abi requires an iwmmxt capable cpu");
1641 if (target_fpu_name == NULL && target_fpe_name != NULL)
1643 if (streq (target_fpe_name, "2"))
1644 target_fpu_name = "fpe2";
1645 else if (streq (target_fpe_name, "3"))
1646 target_fpu_name = "fpe3";
1648 error ("invalid floating point emulation option: -mfpe=%s",
1652 if (target_fpu_name == NULL)
1654 #ifdef FPUTYPE_DEFAULT
1655 target_fpu_name = FPUTYPE_DEFAULT;
1657 if (arm_arch_cirrus)
1658 target_fpu_name = "maverick";
1660 target_fpu_name = "fpe2";
1664 arm_fpu_desc = NULL;
1665 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1667 if (streq (all_fpus[i].name, target_fpu_name))
1669 arm_fpu_desc = &all_fpus[i];
1676 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1680 switch (arm_fpu_desc->model)
1682 case ARM_FP_MODEL_FPA:
1683 if (arm_fpu_desc->rev == 2)
1684 arm_fpu_attr = FPU_FPE2;
1685 else if (arm_fpu_desc->rev == 3)
1686 arm_fpu_attr = FPU_FPE3;
1688 arm_fpu_attr = FPU_FPA;
1691 case ARM_FP_MODEL_MAVERICK:
1692 arm_fpu_attr = FPU_MAVERICK;
1695 case ARM_FP_MODEL_VFP:
1696 arm_fpu_attr = FPU_VFP;
1703 if (target_float_abi_name != NULL)
1705 /* The user specified a FP ABI. */
1706 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1708 if (streq (all_float_abis[i].name, target_float_abi_name))
1710 arm_float_abi = all_float_abis[i].abi_type;
1714 if (i == ARRAY_SIZE (all_float_abis))
1715 error ("invalid floating point abi: -mfloat-abi=%s",
1716 target_float_abi_name);
1719 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1721 if (TARGET_AAPCS_BASED
1722 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1723 error ("FPA is unsupported in the AAPCS");
1725 if (TARGET_AAPCS_BASED)
1727 if (TARGET_CALLER_INTERWORKING)
1728 error ("AAPCS does not support -mcaller-super-interworking");
1730 if (TARGET_CALLEE_INTERWORKING)
1731 error ("AAPCS does not support -mcallee-super-interworking");
1734 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1735 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1736 will ever exist. GCC makes no attempt to support this combination. */
1737 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1738 sorry ("iWMMXt and hardware floating point");
1740 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1741 if (TARGET_THUMB2 && TARGET_IWMMXT)
1742 sorry ("Thumb-2 iWMMXt");
1744 /* __fp16 support currently assumes the core has ldrh. */
1745 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1746 sorry ("__fp16 and no ldrh");
1748 /* If soft-float is specified then don't use FPU. */
1749 if (TARGET_SOFT_FLOAT)
1750 arm_fpu_attr = FPU_NONE;
1752 if (TARGET_AAPCS_BASED)
1754 if (arm_abi == ARM_ABI_IWMMXT)
1755 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1756 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1757 && TARGET_HARD_FLOAT
1759 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1761 arm_pcs_default = ARM_PCS_AAPCS;
1765 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1766 sorry ("-mfloat-abi=hard and VFP");
1768 if (arm_abi == ARM_ABI_APCS)
1769 arm_pcs_default = ARM_PCS_APCS;
1771 arm_pcs_default = ARM_PCS_ATPCS;
1774 /* For arm2/3 there is no need to do any scheduling if there is only
1775 a floating point emulator, or we are doing software floating-point. */
1776 if ((TARGET_SOFT_FLOAT
1777 || (TARGET_FPA && arm_fpu_desc->rev))
1778 && (tune_flags & FL_MODE32) == 0)
1779 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1781 if (target_thread_switch)
1783 if (strcmp (target_thread_switch, "soft") == 0)
1784 target_thread_pointer = TP_SOFT;
1785 else if (strcmp (target_thread_switch, "auto") == 0)
1786 target_thread_pointer = TP_AUTO;
1787 else if (strcmp (target_thread_switch, "cp15") == 0)
1788 target_thread_pointer = TP_CP15;
1790 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1793 /* Use the cp15 method if it is available. */
1794 if (target_thread_pointer == TP_AUTO)
1796 if (arm_arch6k && !TARGET_THUMB1)
1797 target_thread_pointer = TP_CP15;
1799 target_thread_pointer = TP_SOFT;
1802 if (TARGET_HARD_TP && TARGET_THUMB1)
1803 error ("can not use -mtp=cp15 with 16-bit Thumb");
1805 /* Override the default structure alignment for AAPCS ABI. */
1806 if (TARGET_AAPCS_BASED)
1807 arm_structure_size_boundary = 8;
1809 if (structure_size_string != NULL)
1811 int size = strtol (structure_size_string, NULL, 0);
1813 if (size == 8 || size == 32
1814 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1815 arm_structure_size_boundary = size;
1817 warning (0, "structure size boundary can only be set to %s",
1818 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1821 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1823 error ("RTP PIC is incompatible with Thumb");
1827 /* If stack checking is disabled, we can use r10 as the PIC register,
1828 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1829 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1831 if (TARGET_VXWORKS_RTP)
1832 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1833 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1836 if (flag_pic && TARGET_VXWORKS_RTP)
1837 arm_pic_register = 9;
1839 if (arm_pic_register_string != NULL)
1841 int pic_register = decode_reg_name (arm_pic_register_string);
1844 warning (0, "-mpic-register= is useless without -fpic");
1846 /* Prevent the user from choosing an obviously stupid PIC register. */
1847 else if (pic_register < 0 || call_used_regs[pic_register]
1848 || pic_register == HARD_FRAME_POINTER_REGNUM
1849 || pic_register == STACK_POINTER_REGNUM
1850 || pic_register >= PC_REGNUM
1851 || (TARGET_VXWORKS_RTP
1852 && (unsigned int) pic_register != arm_pic_register))
1853 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1855 arm_pic_register = pic_register;
1858 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1859 if (fix_cm3_ldrd == 2)
1861 if (selected_cpu == cortexm3)
1867 if (TARGET_THUMB1 && flag_schedule_insns)
1869 /* Don't warn since it's on by default in -O2. */
1870 flag_schedule_insns = 0;
1875 /* If optimizing for size, bump the number of instructions that we
1876 are prepared to conditionally execute (even on a StrongARM). */
1877 max_insns_skipped = 6;
1881 /* StrongARM has early execution of branches, so a sequence
1882 that is worth skipping is shorter. */
1883 if (arm_tune_strongarm)
1884 max_insns_skipped = 3;
1887 /* Hot/Cold partitioning is not currently supported, since we can't
1888 handle literal pool placement in that case. */
1889 if (flag_reorder_blocks_and_partition)
1891 inform (input_location,
1892 "-freorder-blocks-and-partition not supported on this architecture");
1893 flag_reorder_blocks_and_partition = 0;
1894 flag_reorder_blocks = 1;
1897 /* Register global variables with the garbage collector. */
1898 arm_add_gc_roots ();
1902 arm_add_gc_roots (void)
1904 gcc_obstack_init(&minipool_obstack);
1905 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1908 /* A table of known ARM exception types.
1909 For use with the interrupt function attribute. */
1913 const char *const arg;
1914 const unsigned long return_value;
1918 static const isr_attribute_arg isr_attribute_args [] =
1920 { "IRQ", ARM_FT_ISR },
1921 { "irq", ARM_FT_ISR },
1922 { "FIQ", ARM_FT_FIQ },
1923 { "fiq", ARM_FT_FIQ },
1924 { "ABORT", ARM_FT_ISR },
1925 { "abort", ARM_FT_ISR },
1926 { "ABORT", ARM_FT_ISR },
1927 { "abort", ARM_FT_ISR },
1928 { "UNDEF", ARM_FT_EXCEPTION },
1929 { "undef", ARM_FT_EXCEPTION },
1930 { "SWI", ARM_FT_EXCEPTION },
1931 { "swi", ARM_FT_EXCEPTION },
1932 { NULL, ARM_FT_NORMAL }
1935 /* Returns the (interrupt) function type of the current
1936 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1938 static unsigned long
1939 arm_isr_value (tree argument)
1941 const isr_attribute_arg * ptr;
1945 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1947 /* No argument - default to IRQ. */
1948 if (argument == NULL_TREE)
1951 /* Get the value of the argument. */
1952 if (TREE_VALUE (argument) == NULL_TREE
1953 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1954 return ARM_FT_UNKNOWN;
1956 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1958 /* Check it against the list of known arguments. */
1959 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1960 if (streq (arg, ptr->arg))
1961 return ptr->return_value;
1963 /* An unrecognized interrupt type. */
1964 return ARM_FT_UNKNOWN;
1967 /* Computes the type of the current function. */
1969 static unsigned long
1970 arm_compute_func_type (void)
1972 unsigned long type = ARM_FT_UNKNOWN;
1976 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1978 /* Decide if the current function is volatile. Such functions
1979 never return, and many memory cycles can be saved by not storing
1980 register values that will never be needed again. This optimization
1981 was added to speed up context switching in a kernel application. */
1983 && (TREE_NOTHROW (current_function_decl)
1984 || !(flag_unwind_tables
1985 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1986 && TREE_THIS_VOLATILE (current_function_decl))
1987 type |= ARM_FT_VOLATILE;
1989 if (cfun->static_chain_decl != NULL)
1990 type |= ARM_FT_NESTED;
1992 attr = DECL_ATTRIBUTES (current_function_decl);
1994 a = lookup_attribute ("naked", attr);
1996 type |= ARM_FT_NAKED;
1998 a = lookup_attribute ("isr", attr);
2000 a = lookup_attribute ("interrupt", attr);
2003 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2005 type |= arm_isr_value (TREE_VALUE (a));
2010 /* Returns the type of the current function. */
2013 arm_current_func_type (void)
2015 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2016 cfun->machine->func_type = arm_compute_func_type ();
2018 return cfun->machine->func_type;
2022 arm_allocate_stack_slots_for_args (void)
2024 /* Naked functions should not allocate stack slots for arguments. */
2025 return !IS_NAKED (arm_current_func_type ());
2029 /* Output assembler code for a block containing the constant parts
2030 of a trampoline, leaving space for the variable parts.
2032 On the ARM, (if r8 is the static chain regnum, and remembering that
2033 referencing pc adds an offset of 8) the trampoline looks like:
2036 .word static chain value
2037 .word function's address
2038 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2041 arm_asm_trampoline_template (FILE *f)
2045 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2046 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2048 else if (TARGET_THUMB2)
2050 /* The Thumb-2 trampoline is similar to the arm implementation.
2051 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2052 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2053 STATIC_CHAIN_REGNUM, PC_REGNUM);
2054 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2058 ASM_OUTPUT_ALIGN (f, 2);
2059 fprintf (f, "\t.code\t16\n");
2060 fprintf (f, ".Ltrampoline_start:\n");
2061 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2062 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2063 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2064 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2065 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2066 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2068 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2069 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2072 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2075 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2077 rtx fnaddr, mem, a_tramp;
2079 emit_block_move (m_tramp, assemble_trampoline_template (),
2080 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2082 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2083 emit_move_insn (mem, chain_value);
2085 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2086 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2087 emit_move_insn (mem, fnaddr);
2089 a_tramp = XEXP (m_tramp, 0);
2090 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2091 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2092 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2095 /* Thumb trampolines should be entered in thumb mode, so set
2096 the bottom bit of the address. */
2099 arm_trampoline_adjust_address (rtx addr)
2102 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2103 NULL, 0, OPTAB_LIB_WIDEN);
2107 /* Return 1 if it is possible to return using a single instruction.
2108 If SIBLING is non-null, this is a test for a return before a sibling
2109 call. SIBLING is the call insn, so we can examine its register usage. */
2112 use_return_insn (int iscond, rtx sibling)
2115 unsigned int func_type;
2116 unsigned long saved_int_regs;
2117 unsigned HOST_WIDE_INT stack_adjust;
2118 arm_stack_offsets *offsets;
2120 /* Never use a return instruction before reload has run. */
2121 if (!reload_completed)
2124 func_type = arm_current_func_type ();
2126 /* Naked, volatile and stack alignment functions need special
2128 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2131 /* So do interrupt functions that use the frame pointer and Thumb
2132 interrupt functions. */
2133 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2136 offsets = arm_get_frame_offsets ();
2137 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2139 /* As do variadic functions. */
2140 if (crtl->args.pretend_args_size
2141 || cfun->machine->uses_anonymous_args
2142 /* Or if the function calls __builtin_eh_return () */
2143 || crtl->calls_eh_return
2144 /* Or if the function calls alloca */
2145 || cfun->calls_alloca
2146 /* Or if there is a stack adjustment. However, if the stack pointer
2147 is saved on the stack, we can use a pre-incrementing stack load. */
2148 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2149 && stack_adjust == 4)))
2152 saved_int_regs = offsets->saved_regs_mask;
2154 /* Unfortunately, the insn
2156 ldmib sp, {..., sp, ...}
2158 triggers a bug on most SA-110 based devices, such that the stack
2159 pointer won't be correctly restored if the instruction takes a
2160 page fault. We work around this problem by popping r3 along with
2161 the other registers, since that is never slower than executing
2162 another instruction.
2164 We test for !arm_arch5 here, because code for any architecture
2165 less than this could potentially be run on one of the buggy
2167 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2169 /* Validate that r3 is a call-clobbered register (always true in
2170 the default abi) ... */
2171 if (!call_used_regs[3])
2174 /* ... that it isn't being used for a return value ... */
2175 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2178 /* ... or for a tail-call argument ... */
2181 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2183 if (find_regno_fusage (sibling, USE, 3))
2187 /* ... and that there are no call-saved registers in r0-r2
2188 (always true in the default ABI). */
2189 if (saved_int_regs & 0x7)
2193 /* Can't be done if interworking with Thumb, and any registers have been
2195 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2198 /* On StrongARM, conditional returns are expensive if they aren't
2199 taken and multiple registers have been stacked. */
2200 if (iscond && arm_tune_strongarm)
2202 /* Conditional return when just the LR is stored is a simple
2203 conditional-load instruction, that's not expensive. */
2204 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2208 && arm_pic_register != INVALID_REGNUM
2209 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2213 /* If there are saved registers but the LR isn't saved, then we need
2214 two instructions for the return. */
2215 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2218 /* Can't be done if any of the FPA regs are pushed,
2219 since this also requires an insn. */
2220 if (TARGET_HARD_FLOAT && TARGET_FPA)
2221 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2222 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2225 /* Likewise VFP regs. */
2226 if (TARGET_HARD_FLOAT && TARGET_VFP)
2227 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2228 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2231 if (TARGET_REALLY_IWMMXT)
2232 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2233 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2239 /* Return TRUE if int I is a valid immediate ARM constant. */
2242 const_ok_for_arm (HOST_WIDE_INT i)
2246 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2247 be all zero, or all one. */
2248 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2249 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2250 != ((~(unsigned HOST_WIDE_INT) 0)
2251 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2254 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2256 /* Fast return for 0 and small values. We must do this for zero, since
2257 the code below can't handle that one case. */
2258 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2261 /* Get the number of trailing zeros. */
2262 lowbit = ffs((int) i) - 1;
2264 /* Only even shifts are allowed in ARM mode so round down to the
2265 nearest even number. */
2269 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2274 /* Allow rotated constants in ARM mode. */
2276 && ((i & ~0xc000003f) == 0
2277 || (i & ~0xf000000f) == 0
2278 || (i & ~0xfc000003) == 0))
2285 /* Allow repeated pattern. */
2288 if (i == v || i == (v | (v << 8)))
2295 /* Return true if I is a valid constant for the operation CODE. */
2297 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2299 if (const_ok_for_arm (i))
2323 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2325 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2331 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2335 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2342 /* Emit a sequence of insns to handle a large constant.
2343 CODE is the code of the operation required, it can be any of SET, PLUS,
2344 IOR, AND, XOR, MINUS;
2345 MODE is the mode in which the operation is being performed;
2346 VAL is the integer to operate on;
2347 SOURCE is the other operand (a register, or a null-pointer for SET);
2348 SUBTARGETS means it is safe to create scratch registers if that will
2349 either produce a simpler sequence, or we will want to cse the values.
2350 Return value is the number of insns emitted. */
2352 /* ??? Tweak this for thumb2. */
2354 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2355 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2359 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2360 cond = COND_EXEC_TEST (PATTERN (insn));
2364 if (subtargets || code == SET
2365 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2366 && REGNO (target) != REGNO (source)))
2368 /* After arm_reorg has been called, we can't fix up expensive
2369 constants by pushing them into memory so we must synthesize
2370 them in-line, regardless of the cost. This is only likely to
2371 be more costly on chips that have load delay slots and we are
2372 compiling without running the scheduler (so no splitting
2373 occurred before the final instruction emission).
2375 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2377 if (!after_arm_reorg
2379 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2381 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2386 /* Currently SET is the only monadic value for CODE, all
2387 the rest are diadic. */
2388 if (TARGET_USE_MOVT)
2389 arm_emit_movpair (target, GEN_INT (val));
2391 emit_set_insn (target, GEN_INT (val));
2397 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2399 if (TARGET_USE_MOVT)
2400 arm_emit_movpair (temp, GEN_INT (val));
2402 emit_set_insn (temp, GEN_INT (val));
2404 /* For MINUS, the value is subtracted from, since we never
2405 have subtraction of a constant. */
2407 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2409 emit_set_insn (target,
2410 gen_rtx_fmt_ee (code, mode, source, temp));
2416 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2420 /* Return the number of instructions required to synthesize the given
2421 constant, if we start emitting them from bit-position I. */
2423 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2425 HOST_WIDE_INT temp1;
2426 int step_size = TARGET_ARM ? 2 : 1;
2429 gcc_assert (TARGET_ARM || i == 0);
2437 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2442 temp1 = remainder & ((0x0ff << end)
2443 | ((i < end) ? (0xff >> (32 - end)) : 0));
2444 remainder &= ~temp1;
2449 } while (remainder);
2454 find_best_start (unsigned HOST_WIDE_INT remainder)
2456 int best_consecutive_zeros = 0;
2460 /* If we aren't targetting ARM, the best place to start is always at
2465 for (i = 0; i < 32; i += 2)
2467 int consecutive_zeros = 0;
2469 if (!(remainder & (3 << i)))
2471 while ((i < 32) && !(remainder & (3 << i)))
2473 consecutive_zeros += 2;
2476 if (consecutive_zeros > best_consecutive_zeros)
2478 best_consecutive_zeros = consecutive_zeros;
2479 best_start = i - consecutive_zeros;
2485 /* So long as it won't require any more insns to do so, it's
2486 desirable to emit a small constant (in bits 0...9) in the last
2487 insn. This way there is more chance that it can be combined with
2488 a later addressing insn to form a pre-indexed load or store
2489 operation. Consider:
2491 *((volatile int *)0xe0000100) = 1;
2492 *((volatile int *)0xe0000110) = 2;
2494 We want this to wind up as:
2498 str rB, [rA, #0x100]
2500 str rB, [rA, #0x110]
2502 rather than having to synthesize both large constants from scratch.
2504 Therefore, we calculate how many insns would be required to emit
2505 the constant starting from `best_start', and also starting from
2506 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2507 yield a shorter sequence, we may as well use zero. */
2509 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2510 && (count_insns_for_constant (remainder, 0) <=
2511 count_insns_for_constant (remainder, best_start)))
2517 /* Emit an instruction with the indicated PATTERN. If COND is
2518 non-NULL, conditionalize the execution of the instruction on COND
2522 emit_constant_insn (rtx cond, rtx pattern)
2525 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2526 emit_insn (pattern);
2529 /* As above, but extra parameter GENERATE which, if clear, suppresses
2531 /* ??? This needs more work for thumb2. */
2534 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2535 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2540 int final_invert = 0;
2541 int can_negate_initial = 0;
2543 int num_bits_set = 0;
2544 int set_sign_bit_copies = 0;
2545 int clear_sign_bit_copies = 0;
2546 int clear_zero_bit_copies = 0;
2547 int set_zero_bit_copies = 0;
2549 unsigned HOST_WIDE_INT temp1, temp2;
2550 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2551 int step_size = TARGET_ARM ? 2 : 1;
2553 /* Find out which operations are safe for a given CODE. Also do a quick
2554 check for degenerate cases; these can occur when DImode operations
2565 can_negate_initial = 1;
2569 if (remainder == 0xffffffff)
2572 emit_constant_insn (cond,
2573 gen_rtx_SET (VOIDmode, target,
2574 GEN_INT (ARM_SIGN_EXTEND (val))));
2580 if (reload_completed && rtx_equal_p (target, source))
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target, source));
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, target, const0_rtx));
2601 if (remainder == 0xffffffff)
2603 if (reload_completed && rtx_equal_p (target, source))
2606 emit_constant_insn (cond,
2607 gen_rtx_SET (VOIDmode, target, source));
2616 if (reload_completed && rtx_equal_p (target, source))
2619 emit_constant_insn (cond,
2620 gen_rtx_SET (VOIDmode, target, source));
2624 if (remainder == 0xffffffff)
2627 emit_constant_insn (cond,
2628 gen_rtx_SET (VOIDmode, target,
2629 gen_rtx_NOT (mode, source)));
2635 /* We treat MINUS as (val - source), since (source - val) is always
2636 passed as (source + (-val)). */
2640 emit_constant_insn (cond,
2641 gen_rtx_SET (VOIDmode, target,
2642 gen_rtx_NEG (mode, source)));
2645 if (const_ok_for_arm (val))
2648 emit_constant_insn (cond,
2649 gen_rtx_SET (VOIDmode, target,
2650 gen_rtx_MINUS (mode, GEN_INT (val),
2662 /* If we can do it in one insn get out quickly. */
2663 if (const_ok_for_arm (val)
2664 || (can_negate_initial && const_ok_for_arm (-val))
2665 || (can_invert && const_ok_for_arm (~val)))
2668 emit_constant_insn (cond,
2669 gen_rtx_SET (VOIDmode, target,
2671 ? gen_rtx_fmt_ee (code, mode, source,
2677 /* Calculate a few attributes that may be useful for specific
2679 /* Count number of leading zeros. */
2680 for (i = 31; i >= 0; i--)
2682 if ((remainder & (1 << i)) == 0)
2683 clear_sign_bit_copies++;
2688 /* Count number of leading 1's. */
2689 for (i = 31; i >= 0; i--)
2691 if ((remainder & (1 << i)) != 0)
2692 set_sign_bit_copies++;
2697 /* Count number of trailing zero's. */
2698 for (i = 0; i <= 31; i++)
2700 if ((remainder & (1 << i)) == 0)
2701 clear_zero_bit_copies++;
2706 /* Count number of trailing 1's. */
2707 for (i = 0; i <= 31; i++)
2709 if ((remainder & (1 << i)) != 0)
2710 set_zero_bit_copies++;
2718 /* See if we can use movw. */
2719 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2722 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2727 /* See if we can do this by sign_extending a constant that is known
2728 to be negative. This is a good, way of doing it, since the shift
2729 may well merge into a subsequent insn. */
2730 if (set_sign_bit_copies > 1)
2732 if (const_ok_for_arm
2733 (temp1 = ARM_SIGN_EXTEND (remainder
2734 << (set_sign_bit_copies - 1))))
2738 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2739 emit_constant_insn (cond,
2740 gen_rtx_SET (VOIDmode, new_src,
2742 emit_constant_insn (cond,
2743 gen_ashrsi3 (target, new_src,
2744 GEN_INT (set_sign_bit_copies - 1)));
2748 /* For an inverted constant, we will need to set the low bits,
2749 these will be shifted out of harm's way. */
2750 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2751 if (const_ok_for_arm (~temp1))
2755 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2756 emit_constant_insn (cond,
2757 gen_rtx_SET (VOIDmode, new_src,
2759 emit_constant_insn (cond,
2760 gen_ashrsi3 (target, new_src,
2761 GEN_INT (set_sign_bit_copies - 1)));
2767 /* See if we can calculate the value as the difference between two
2768 valid immediates. */
2769 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2771 int topshift = clear_sign_bit_copies & ~1;
2773 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2774 & (0xff000000 >> topshift));
2776 /* If temp1 is zero, then that means the 9 most significant
2777 bits of remainder were 1 and we've caused it to overflow.
2778 When topshift is 0 we don't need to do anything since we
2779 can borrow from 'bit 32'. */
2780 if (temp1 == 0 && topshift != 0)
2781 temp1 = 0x80000000 >> (topshift - 1);
2783 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2785 if (const_ok_for_arm (temp2))
2789 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2790 emit_constant_insn (cond,
2791 gen_rtx_SET (VOIDmode, new_src,
2793 emit_constant_insn (cond,
2794 gen_addsi3 (target, new_src,
2802 /* See if we can generate this by setting the bottom (or the top)
2803 16 bits, and then shifting these into the other half of the
2804 word. We only look for the simplest cases, to do more would cost
2805 too much. Be careful, however, not to generate this when the
2806 alternative would take fewer insns. */
2807 if (val & 0xffff0000)
2809 temp1 = remainder & 0xffff0000;
2810 temp2 = remainder & 0x0000ffff;
2812 /* Overlaps outside this range are best done using other methods. */
2813 for (i = 9; i < 24; i++)
2815 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2816 && !const_ok_for_arm (temp2))
2818 rtx new_src = (subtargets
2819 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2821 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2822 source, subtargets, generate);
2830 gen_rtx_ASHIFT (mode, source,
2837 /* Don't duplicate cases already considered. */
2838 for (i = 17; i < 24; i++)
2840 if (((temp1 | (temp1 >> i)) == remainder)
2841 && !const_ok_for_arm (temp1))
2843 rtx new_src = (subtargets
2844 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2846 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2847 source, subtargets, generate);
2852 gen_rtx_SET (VOIDmode, target,
2855 gen_rtx_LSHIFTRT (mode, source,
2866 /* If we have IOR or XOR, and the constant can be loaded in a
2867 single instruction, and we can find a temporary to put it in,
2868 then this can be done in two instructions instead of 3-4. */
2870 /* TARGET can't be NULL if SUBTARGETS is 0 */
2871 || (reload_completed && !reg_mentioned_p (target, source)))
2873 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2877 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2879 emit_constant_insn (cond,
2880 gen_rtx_SET (VOIDmode, sub,
2882 emit_constant_insn (cond,
2883 gen_rtx_SET (VOIDmode, target,
2884 gen_rtx_fmt_ee (code, mode,
2895 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2896 and the remainder 0s for e.g. 0xfff00000)
2897 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2899 This can be done in 2 instructions by using shifts with mov or mvn.
2904 mvn r0, r0, lsr #12 */
2905 if (set_sign_bit_copies > 8
2906 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2910 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2911 rtx shift = GEN_INT (set_sign_bit_copies);
2915 gen_rtx_SET (VOIDmode, sub,
2917 gen_rtx_ASHIFT (mode,
2922 gen_rtx_SET (VOIDmode, target,
2924 gen_rtx_LSHIFTRT (mode, sub,
2931 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2933 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2935 For eg. r0 = r0 | 0xfff
2940 if (set_zero_bit_copies > 8
2941 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2945 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2946 rtx shift = GEN_INT (set_zero_bit_copies);
2950 gen_rtx_SET (VOIDmode, sub,
2952 gen_rtx_LSHIFTRT (mode,
2957 gen_rtx_SET (VOIDmode, target,
2959 gen_rtx_ASHIFT (mode, sub,
2965 /* This will never be reached for Thumb2 because orn is a valid
2966 instruction. This is for Thumb1 and the ARM 32 bit cases.
2968 x = y | constant (such that ~constant is a valid constant)
2970 x = ~(~y & ~constant).
2972 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2976 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2977 emit_constant_insn (cond,
2978 gen_rtx_SET (VOIDmode, sub,
2979 gen_rtx_NOT (mode, source)));
2982 sub = gen_reg_rtx (mode);
2983 emit_constant_insn (cond,
2984 gen_rtx_SET (VOIDmode, sub,
2985 gen_rtx_AND (mode, source,
2987 emit_constant_insn (cond,
2988 gen_rtx_SET (VOIDmode, target,
2989 gen_rtx_NOT (mode, sub)));
2996 /* See if two shifts will do 2 or more insn's worth of work. */
2997 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2999 HOST_WIDE_INT shift_mask = ((0xffffffff
3000 << (32 - clear_sign_bit_copies))
3003 if ((remainder | shift_mask) != 0xffffffff)
3007 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3008 insns = arm_gen_constant (AND, mode, cond,
3009 remainder | shift_mask,
3010 new_src, source, subtargets, 1);
3015 rtx targ = subtargets ? NULL_RTX : target;
3016 insns = arm_gen_constant (AND, mode, cond,
3017 remainder | shift_mask,
3018 targ, source, subtargets, 0);
3024 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3025 rtx shift = GEN_INT (clear_sign_bit_copies);
3027 emit_insn (gen_ashlsi3 (new_src, source, shift));
3028 emit_insn (gen_lshrsi3 (target, new_src, shift));
3034 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3036 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3038 if ((remainder | shift_mask) != 0xffffffff)
3042 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3044 insns = arm_gen_constant (AND, mode, cond,
3045 remainder | shift_mask,
3046 new_src, source, subtargets, 1);
3051 rtx targ = subtargets ? NULL_RTX : target;
3053 insns = arm_gen_constant (AND, mode, cond,
3054 remainder | shift_mask,
3055 targ, source, subtargets, 0);
3061 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3062 rtx shift = GEN_INT (clear_zero_bit_copies);
3064 emit_insn (gen_lshrsi3 (new_src, source, shift));
3065 emit_insn (gen_ashlsi3 (target, new_src, shift));
3077 for (i = 0; i < 32; i++)
3078 if (remainder & (1 << i))
3082 || (code != IOR && can_invert && num_bits_set > 16))
3083 remainder ^= 0xffffffff;
3084 else if (code == PLUS && num_bits_set > 16)
3085 remainder = (-remainder) & 0xffffffff;
3087 /* For XOR, if more than half the bits are set and there's a sequence
3088 of more than 8 consecutive ones in the pattern then we can XOR by the
3089 inverted constant and then invert the final result; this may save an
3090 instruction and might also lead to the final mvn being merged with
3091 some other operation. */
3092 else if (code == XOR && num_bits_set > 16
3093 && (count_insns_for_constant (remainder ^ 0xffffffff,
3095 (remainder ^ 0xffffffff))
3096 < count_insns_for_constant (remainder,
3097 find_best_start (remainder))))
3099 remainder ^= 0xffffffff;
3108 /* Now try and find a way of doing the job in either two or three
3110 We start by looking for the largest block of zeros that are aligned on
3111 a 2-bit boundary, we then fill up the temps, wrapping around to the
3112 top of the word when we drop off the bottom.
3113 In the worst case this code should produce no more than four insns.
3114 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3115 best place to start. */
3117 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3120 /* Now start emitting the insns. */
3121 i = find_best_start (remainder);
3128 if (remainder & (3 << (i - 2)))
3133 temp1 = remainder & ((0x0ff << end)
3134 | ((i < end) ? (0xff >> (32 - end)) : 0));
3135 remainder &= ~temp1;
3139 rtx new_src, temp1_rtx;
3141 if (code == SET || code == MINUS)
3143 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3144 if (can_invert && code != MINUS)
3149 if ((final_invert || remainder) && subtargets)
3150 new_src = gen_reg_rtx (mode);
3155 else if (can_negate)
3159 temp1 = trunc_int_for_mode (temp1, mode);
3160 temp1_rtx = GEN_INT (temp1);
3164 else if (code == MINUS)
3165 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3167 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3169 emit_constant_insn (cond,
3170 gen_rtx_SET (VOIDmode, new_src,
3180 else if (code == MINUS)
3186 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3196 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3197 gen_rtx_NOT (mode, source)));
3204 /* Canonicalize a comparison so that we are more likely to recognize it.
3205 This can be done for a few constant compares, where we can make the
3206 immediate value easier to load. */
3209 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3212 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3213 unsigned HOST_WIDE_INT maxval;
3214 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3225 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3227 *op1 = GEN_INT (i + 1);
3228 return code == GT ? GE : LT;
3235 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3237 *op1 = GEN_INT (i - 1);
3238 return code == GE ? GT : LE;
3244 if (i != ~((unsigned HOST_WIDE_INT) 0)
3245 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3247 *op1 = GEN_INT (i + 1);
3248 return code == GTU ? GEU : LTU;
3255 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3257 *op1 = GEN_INT (i - 1);
3258 return code == GEU ? GTU : LEU;
3270 /* Define how to find the value returned by a function. */
3273 arm_function_value(const_tree type, const_tree func,
3274 bool outgoing ATTRIBUTE_UNUSED)
3276 enum machine_mode mode;
3277 int unsignedp ATTRIBUTE_UNUSED;
3278 rtx r ATTRIBUTE_UNUSED;
3280 mode = TYPE_MODE (type);
3282 if (TARGET_AAPCS_BASED)
3283 return aapcs_allocate_return_reg (mode, type, func);
3285 /* Promote integer types. */
3286 if (INTEGRAL_TYPE_P (type))
3287 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3289 /* Promotes small structs returned in a register to full-word size
3290 for big-endian AAPCS. */
3291 if (arm_return_in_msb (type))
3293 HOST_WIDE_INT size = int_size_in_bytes (type);
3294 if (size % UNITS_PER_WORD != 0)
3296 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3297 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3301 return LIBCALL_VALUE (mode);
3305 libcall_eq (const void *p1, const void *p2)
3307 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3311 libcall_hash (const void *p1)
3313 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3317 add_libcall (htab_t htab, rtx libcall)
3319 *htab_find_slot (htab, libcall, INSERT) = libcall;
3323 arm_libcall_uses_aapcs_base (const_rtx libcall)
3325 static bool init_done = false;
3326 static htab_t libcall_htab;
3332 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3334 add_libcall (libcall_htab,
3335 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3336 add_libcall (libcall_htab,
3337 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3338 add_libcall (libcall_htab,
3339 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3340 add_libcall (libcall_htab,
3341 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3343 add_libcall (libcall_htab,
3344 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3345 add_libcall (libcall_htab,
3346 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3347 add_libcall (libcall_htab,
3348 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3349 add_libcall (libcall_htab,
3350 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3352 add_libcall (libcall_htab,
3353 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3354 add_libcall (libcall_htab,
3355 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3356 add_libcall (libcall_htab,
3357 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3358 add_libcall (libcall_htab,
3359 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3360 add_libcall (libcall_htab,
3361 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3362 add_libcall (libcall_htab,
3363 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3366 return libcall && htab_find (libcall_htab, libcall) != NULL;
3370 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3372 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3373 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3375 /* The following libcalls return their result in integer registers,
3376 even though they return a floating point value. */
3377 if (arm_libcall_uses_aapcs_base (libcall))
3378 return gen_rtx_REG (mode, ARG_REGISTER(1));
3382 return LIBCALL_VALUE (mode);
3385 /* Determine the amount of memory needed to store the possible return
3386 registers of an untyped call. */
3388 arm_apply_result_size (void)
3394 if (TARGET_HARD_FLOAT_ABI)
3400 if (TARGET_MAVERICK)
3403 if (TARGET_IWMMXT_ABI)
3410 /* Decide whether TYPE should be returned in memory (true)
3411 or in a register (false). FNTYPE is the type of the function making
3414 arm_return_in_memory (const_tree type, const_tree fntype)
3418 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3420 if (TARGET_AAPCS_BASED)
3422 /* Simple, non-aggregate types (ie not including vectors and
3423 complex) are always returned in a register (or registers).
3424 We don't care about which register here, so we can short-cut
3425 some of the detail. */
3426 if (!AGGREGATE_TYPE_P (type)
3427 && TREE_CODE (type) != VECTOR_TYPE
3428 && TREE_CODE (type) != COMPLEX_TYPE)
3431 /* Any return value that is no larger than one word can be
3433 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3436 /* Check any available co-processors to see if they accept the
3437 type as a register candidate (VFP, for example, can return
3438 some aggregates in consecutive registers). These aren't
3439 available if the call is variadic. */
3440 if (aapcs_select_return_coproc (type, fntype) >= 0)
3443 /* Vector values should be returned using ARM registers, not
3444 memory (unless they're over 16 bytes, which will break since
3445 we only have four call-clobbered registers to play with). */
3446 if (TREE_CODE (type) == VECTOR_TYPE)
3447 return (size < 0 || size > (4 * UNITS_PER_WORD));
3449 /* The rest go in memory. */
3453 if (TREE_CODE (type) == VECTOR_TYPE)
3454 return (size < 0 || size > (4 * UNITS_PER_WORD));
3456 if (!AGGREGATE_TYPE_P (type) &&
3457 (TREE_CODE (type) != VECTOR_TYPE))
3458 /* All simple types are returned in registers. */
3461 if (arm_abi != ARM_ABI_APCS)
3463 /* ATPCS and later return aggregate types in memory only if they are
3464 larger than a word (or are variable size). */
3465 return (size < 0 || size > UNITS_PER_WORD);
3468 /* For the arm-wince targets we choose to be compatible with Microsoft's
3469 ARM and Thumb compilers, which always return aggregates in memory. */
3471 /* All structures/unions bigger than one word are returned in memory.
3472 Also catch the case where int_size_in_bytes returns -1. In this case
3473 the aggregate is either huge or of variable size, and in either case
3474 we will want to return it via memory and not in a register. */
3475 if (size < 0 || size > UNITS_PER_WORD)
3478 if (TREE_CODE (type) == RECORD_TYPE)
3482 /* For a struct the APCS says that we only return in a register
3483 if the type is 'integer like' and every addressable element
3484 has an offset of zero. For practical purposes this means
3485 that the structure can have at most one non bit-field element
3486 and that this element must be the first one in the structure. */
3488 /* Find the first field, ignoring non FIELD_DECL things which will
3489 have been created by C++. */
3490 for (field = TYPE_FIELDS (type);
3491 field && TREE_CODE (field) != FIELD_DECL;
3492 field = TREE_CHAIN (field))
3496 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3498 /* Check that the first field is valid for returning in a register. */
3500 /* ... Floats are not allowed */
3501 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3504 /* ... Aggregates that are not themselves valid for returning in
3505 a register are not allowed. */
3506 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3509 /* Now check the remaining fields, if any. Only bitfields are allowed,
3510 since they are not addressable. */
3511 for (field = TREE_CHAIN (field);
3513 field = TREE_CHAIN (field))
3515 if (TREE_CODE (field) != FIELD_DECL)
3518 if (!DECL_BIT_FIELD_TYPE (field))
3525 if (TREE_CODE (type) == UNION_TYPE)
3529 /* Unions can be returned in registers if every element is
3530 integral, or can be returned in an integer register. */
3531 for (field = TYPE_FIELDS (type);
3533 field = TREE_CHAIN (field))
3535 if (TREE_CODE (field) != FIELD_DECL)
3538 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3541 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3547 #endif /* not ARM_WINCE */
3549 /* Return all other types in memory. */
3553 /* Indicate whether or not words of a double are in big-endian order. */
3556 arm_float_words_big_endian (void)
3558 if (TARGET_MAVERICK)
3561 /* For FPA, float words are always big-endian. For VFP, floats words
3562 follow the memory system mode. */
3570 return (TARGET_BIG_END ? 1 : 0);
3575 const struct pcs_attribute_arg
3579 } pcs_attribute_args[] =
3581 {"aapcs", ARM_PCS_AAPCS},
3582 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3584 /* We could recognize these, but changes would be needed elsewhere
3585 * to implement them. */
3586 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3587 {"atpcs", ARM_PCS_ATPCS},
3588 {"apcs", ARM_PCS_APCS},
3590 {NULL, ARM_PCS_UNKNOWN}
3594 arm_pcs_from_attribute (tree attr)
3596 const struct pcs_attribute_arg *ptr;
3599 /* Get the value of the argument. */
3600 if (TREE_VALUE (attr) == NULL_TREE
3601 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3602 return ARM_PCS_UNKNOWN;
3604 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3606 /* Check it against the list of known arguments. */
3607 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3608 if (streq (arg, ptr->arg))
3611 /* An unrecognized interrupt type. */
3612 return ARM_PCS_UNKNOWN;
3615 /* Get the PCS variant to use for this call. TYPE is the function's type
3616 specification, DECL is the specific declartion. DECL may be null if
3617 the call could be indirect or if this is a library call. */
3619 arm_get_pcs_model (const_tree type, const_tree decl)
3621 bool user_convention = false;
3622 enum arm_pcs user_pcs = arm_pcs_default;
3627 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3630 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3631 user_convention = true;
3634 if (TARGET_AAPCS_BASED)
3636 /* Detect varargs functions. These always use the base rules
3637 (no argument is ever a candidate for a co-processor
3639 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3640 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3641 != void_type_node));
3643 if (user_convention)
3645 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3646 sorry ("Non-AAPCS derived PCS variant");
3647 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3648 error ("Variadic functions must use the base AAPCS variant");
3652 return ARM_PCS_AAPCS;
3653 else if (user_convention)
3655 else if (decl && flag_unit_at_a_time)
3657 /* Local functions never leak outside this compilation unit,
3658 so we are free to use whatever conventions are
3660 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3661 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3663 return ARM_PCS_AAPCS_LOCAL;
3666 else if (user_convention && user_pcs != arm_pcs_default)
3667 sorry ("PCS variant");
3669 /* For everything else we use the target's default. */
3670 return arm_pcs_default;
3675 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3676 const_tree fntype ATTRIBUTE_UNUSED,
3677 rtx libcall ATTRIBUTE_UNUSED,
3678 const_tree fndecl ATTRIBUTE_UNUSED)
3680 /* Record the unallocated VFP registers. */
3681 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3682 pcum->aapcs_vfp_reg_alloc = 0;
3685 /* Walk down the type tree of TYPE counting consecutive base elements.
3686 If *MODEP is VOIDmode, then set it to the first valid floating point
3687 type. If a non-floating point type is found, or if a floating point
3688 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3689 otherwise return the count in the sub-tree. */
3691 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3693 enum machine_mode mode;
3696 switch (TREE_CODE (type))
3699 mode = TYPE_MODE (type);
3700 if (mode != DFmode && mode != SFmode)
3703 if (*modep == VOIDmode)
3712 mode = TYPE_MODE (TREE_TYPE (type));
3713 if (mode != DFmode && mode != SFmode)
3716 if (*modep == VOIDmode)
3725 /* Use V2SImode and V4SImode as representatives of all 64-bit
3726 and 128-bit vector types, whether or not those modes are
3727 supported with the present options. */
3728 size = int_size_in_bytes (type);
3741 if (*modep == VOIDmode)
3744 /* Vector modes are considered to be opaque: two vectors are
3745 equivalent for the purposes of being homogeneous aggregates
3746 if they are the same size. */
3755 tree index = TYPE_DOMAIN (type);
3757 /* Can't handle incomplete types. */
3758 if (!COMPLETE_TYPE_P(type))
3761 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3764 || !TYPE_MAX_VALUE (index)
3765 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3766 || !TYPE_MIN_VALUE (index)
3767 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3771 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3772 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3774 /* There must be no padding. */
3775 if (!host_integerp (TYPE_SIZE (type), 1)
3776 || (tree_low_cst (TYPE_SIZE (type), 1)
3777 != count * GET_MODE_BITSIZE (*modep)))
3789 /* Can't handle incomplete types. */
3790 if (!COMPLETE_TYPE_P(type))
3793 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3795 if (TREE_CODE (field) != FIELD_DECL)
3798 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3804 /* There must be no padding. */
3805 if (!host_integerp (TYPE_SIZE (type), 1)
3806 || (tree_low_cst (TYPE_SIZE (type), 1)
3807 != count * GET_MODE_BITSIZE (*modep)))
3814 case QUAL_UNION_TYPE:
3816 /* These aren't very interesting except in a degenerate case. */
3821 /* Can't handle incomplete types. */
3822 if (!COMPLETE_TYPE_P(type))
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3827 if (TREE_CODE (field) != FIELD_DECL)
3830 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3833 count = count > sub_count ? count : sub_count;
3836 /* There must be no padding. */
3837 if (!host_integerp (TYPE_SIZE (type), 1)
3838 || (tree_low_cst (TYPE_SIZE (type), 1)
3839 != count * GET_MODE_BITSIZE (*modep)))
3852 /* Return true if PCS_VARIANT should use VFP registers. */
3854 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3856 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3859 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3862 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3863 (TARGET_VFP_DOUBLE || !is_double));
3867 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3868 enum machine_mode mode, const_tree type,
3869 enum machine_mode *base_mode, int *count)
3871 enum machine_mode new_mode = VOIDmode;
3873 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3874 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3875 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3880 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3883 new_mode = (mode == DCmode ? DFmode : SFmode);
3885 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3887 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3889 if (ag_count > 0 && ag_count <= 4)
3898 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3901 *base_mode = new_mode;
3906 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3907 enum machine_mode mode, const_tree type)
3909 int count ATTRIBUTE_UNUSED;
3910 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3912 if (!use_vfp_abi (pcs_variant, false))
3914 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3919 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3922 if (!use_vfp_abi (pcum->pcs_variant, false))
3925 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3926 &pcum->aapcs_vfp_rmode,
3927 &pcum->aapcs_vfp_rcount);
3931 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3932 const_tree type ATTRIBUTE_UNUSED)
3934 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3935 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3938 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3939 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3941 pcum->aapcs_vfp_reg_alloc = mask << regno;
3942 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3945 int rcount = pcum->aapcs_vfp_rcount;
3947 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3951 /* Avoid using unsupported vector modes. */
3952 if (rmode == V2SImode)
3954 else if (rmode == V4SImode)
3961 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3962 for (i = 0; i < rcount; i++)
3964 rtx tmp = gen_rtx_REG (rmode,
3965 FIRST_VFP_REGNUM + regno + i * rshift);
3966 tmp = gen_rtx_EXPR_LIST
3968 GEN_INT (i * GET_MODE_SIZE (rmode)));
3969 XVECEXP (par, 0, i) = tmp;
3972 pcum->aapcs_reg = par;
3975 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3982 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3983 enum machine_mode mode,
3984 const_tree type ATTRIBUTE_UNUSED)
3986 if (!use_vfp_abi (pcs_variant, false))
3989 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3992 enum machine_mode ag_mode;
3997 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4002 if (ag_mode == V2SImode)
4004 else if (ag_mode == V4SImode)
4010 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4011 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4012 for (i = 0; i < count; i++)
4014 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4015 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4016 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4017 XVECEXP (par, 0, i) = tmp;
4023 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4027 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4028 enum machine_mode mode ATTRIBUTE_UNUSED,
4029 const_tree type ATTRIBUTE_UNUSED)
4031 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4032 pcum->aapcs_vfp_reg_alloc = 0;
4036 #define AAPCS_CP(X) \
4038 aapcs_ ## X ## _cum_init, \
4039 aapcs_ ## X ## _is_call_candidate, \
4040 aapcs_ ## X ## _allocate, \
4041 aapcs_ ## X ## _is_return_candidate, \
4042 aapcs_ ## X ## _allocate_return_reg, \
4043 aapcs_ ## X ## _advance \
4046 /* Table of co-processors that can be used to pass arguments in
4047 registers. Idealy no arugment should be a candidate for more than
4048 one co-processor table entry, but the table is processed in order
4049 and stops after the first match. If that entry then fails to put
4050 the argument into a co-processor register, the argument will go on
4054 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4055 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4057 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4058 BLKmode) is a candidate for this co-processor's registers; this
4059 function should ignore any position-dependent state in
4060 CUMULATIVE_ARGS and only use call-type dependent information. */
4061 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4063 /* Return true if the argument does get a co-processor register; it
4064 should set aapcs_reg to an RTX of the register allocated as is
4065 required for a return from FUNCTION_ARG. */
4066 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4068 /* Return true if a result of mode MODE (or type TYPE if MODE is
4069 BLKmode) is can be returned in this co-processor's registers. */
4070 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4072 /* Allocate and return an RTX element to hold the return type of a
4073 call, this routine must not fail and will only be called if
4074 is_return_candidate returned true with the same parameters. */
4075 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4077 /* Finish processing this argument and prepare to start processing
4079 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4080 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4088 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4093 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4094 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4101 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4103 /* We aren't passed a decl, so we can't check that a call is local.
4104 However, it isn't clear that that would be a win anyway, since it
4105 might limit some tail-calling opportunities. */
4106 enum arm_pcs pcs_variant;
4110 const_tree fndecl = NULL_TREE;
4112 if (TREE_CODE (fntype) == FUNCTION_DECL)
4115 fntype = TREE_TYPE (fntype);
4118 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4121 pcs_variant = arm_pcs_default;
4123 if (pcs_variant != ARM_PCS_AAPCS)
4127 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4128 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4137 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4140 /* We aren't passed a decl, so we can't check that a call is local.
4141 However, it isn't clear that that would be a win anyway, since it
4142 might limit some tail-calling opportunities. */
4143 enum arm_pcs pcs_variant;
4144 int unsignedp ATTRIBUTE_UNUSED;
4148 const_tree fndecl = NULL_TREE;
4150 if (TREE_CODE (fntype) == FUNCTION_DECL)
4153 fntype = TREE_TYPE (fntype);
4156 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4159 pcs_variant = arm_pcs_default;
4161 /* Promote integer types. */
4162 if (type && INTEGRAL_TYPE_P (type))
4163 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4165 if (pcs_variant != ARM_PCS_AAPCS)
4169 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4170 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4172 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4176 /* Promotes small structs returned in a register to full-word size
4177 for big-endian AAPCS. */
4178 if (type && arm_return_in_msb (type))
4180 HOST_WIDE_INT size = int_size_in_bytes (type);
4181 if (size % UNITS_PER_WORD != 0)
4183 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4184 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4188 return gen_rtx_REG (mode, R0_REGNUM);
4192 aapcs_libcall_value (enum machine_mode mode)
4194 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4197 /* Lay out a function argument using the AAPCS rules. The rule
4198 numbers referred to here are those in the AAPCS. */
4200 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4201 tree type, int named)
4206 /* We only need to do this once per argument. */
4207 if (pcum->aapcs_arg_processed)
4210 pcum->aapcs_arg_processed = true;
4212 /* Special case: if named is false then we are handling an incoming
4213 anonymous argument which is on the stack. */
4217 /* Is this a potential co-processor register candidate? */
4218 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4220 int slot = aapcs_select_call_coproc (pcum, mode, type);
4221 pcum->aapcs_cprc_slot = slot;
4223 /* We don't have to apply any of the rules from part B of the
4224 preparation phase, these are handled elsewhere in the
4229 /* A Co-processor register candidate goes either in its own
4230 class of registers or on the stack. */
4231 if (!pcum->aapcs_cprc_failed[slot])
4233 /* C1.cp - Try to allocate the argument to co-processor
4235 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4238 /* C2.cp - Put the argument on the stack and note that we
4239 can't assign any more candidates in this slot. We also
4240 need to note that we have allocated stack space, so that
4241 we won't later try to split a non-cprc candidate between
4242 core registers and the stack. */
4243 pcum->aapcs_cprc_failed[slot] = true;
4244 pcum->can_split = false;
4247 /* We didn't get a register, so this argument goes on the
4249 gcc_assert (pcum->can_split == false);
4254 /* C3 - For double-word aligned arguments, round the NCRN up to the
4255 next even number. */
4256 ncrn = pcum->aapcs_ncrn;
4257 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4260 nregs = ARM_NUM_REGS2(mode, type);
4262 /* Sigh, this test should really assert that nregs > 0, but a GCC
4263 extension allows empty structs and then gives them empty size; it
4264 then allows such a structure to be passed by value. For some of
4265 the code below we have to pretend that such an argument has
4266 non-zero size so that we 'locate' it correctly either in
4267 registers or on the stack. */
4268 gcc_assert (nregs >= 0);
4270 nregs2 = nregs ? nregs : 1;
4272 /* C4 - Argument fits entirely in core registers. */
4273 if (ncrn + nregs2 <= NUM_ARG_REGS)
4275 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4276 pcum->aapcs_next_ncrn = ncrn + nregs;
4280 /* C5 - Some core registers left and there are no arguments already
4281 on the stack: split this argument between the remaining core
4282 registers and the stack. */
4283 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4285 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4286 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4287 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4291 /* C6 - NCRN is set to 4. */
4292 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4294 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4298 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4299 for a call to a function whose data type is FNTYPE.
4300 For a library call, FNTYPE is NULL. */
4302 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4304 tree fndecl ATTRIBUTE_UNUSED)
4306 /* Long call handling. */
4308 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4310 pcum->pcs_variant = arm_pcs_default;
4312 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4314 if (arm_libcall_uses_aapcs_base (libname))
4315 pcum->pcs_variant = ARM_PCS_AAPCS;
4317 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4318 pcum->aapcs_reg = NULL_RTX;
4319 pcum->aapcs_partial = 0;
4320 pcum->aapcs_arg_processed = false;
4321 pcum->aapcs_cprc_slot = -1;
4322 pcum->can_split = true;
4324 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4328 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4330 pcum->aapcs_cprc_failed[i] = false;
4331 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4339 /* On the ARM, the offset starts at 0. */
4341 pcum->iwmmxt_nregs = 0;
4342 pcum->can_split = true;
4344 /* Varargs vectors are treated the same as long long.
4345 named_count avoids having to change the way arm handles 'named' */
4346 pcum->named_count = 0;
4349 if (TARGET_REALLY_IWMMXT && fntype)
4353 for (fn_arg = TYPE_ARG_TYPES (fntype);
4355 fn_arg = TREE_CHAIN (fn_arg))
4356 pcum->named_count += 1;
4358 if (! pcum->named_count)
4359 pcum->named_count = INT_MAX;
4364 /* Return true if mode/type need doubleword alignment. */
4366 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4368 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4369 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4373 /* Determine where to put an argument to a function.
4374 Value is zero to push the argument on the stack,
4375 or a hard register in which to store the argument.
4377 MODE is the argument's machine mode.
4378 TYPE is the data type of the argument (as a tree).
4379 This is null for libcalls where that information may
4381 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4382 the preceding args and about the function being called.
4383 NAMED is nonzero if this argument is a named parameter
4384 (otherwise it is an extra parameter matching an ellipsis). */
4387 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4388 tree type, int named)
4392 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4393 a call insn (op3 of a call_value insn). */
4394 if (mode == VOIDmode)
4397 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4399 aapcs_layout_arg (pcum, mode, type, named);
4400 return pcum->aapcs_reg;
4403 /* Varargs vectors are treated the same as long long.
4404 named_count avoids having to change the way arm handles 'named' */
4405 if (TARGET_IWMMXT_ABI
4406 && arm_vector_mode_supported_p (mode)
4407 && pcum->named_count > pcum->nargs + 1)
4409 if (pcum->iwmmxt_nregs <= 9)
4410 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4413 pcum->can_split = false;
4418 /* Put doubleword aligned quantities in even register pairs. */
4420 && ARM_DOUBLEWORD_ALIGN
4421 && arm_needs_doubleword_align (mode, type))
4424 if (mode == VOIDmode)
4425 /* Pick an arbitrary value for operand 2 of the call insn. */
4428 /* Only allow splitting an arg between regs and memory if all preceding
4429 args were allocated to regs. For args passed by reference we only count
4430 the reference pointer. */
4431 if (pcum->can_split)
4434 nregs = ARM_NUM_REGS2 (mode, type);
4436 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4439 return gen_rtx_REG (mode, pcum->nregs);
4443 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4444 tree type, bool named)
4446 int nregs = pcum->nregs;
4448 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4450 aapcs_layout_arg (pcum, mode, type, named);
4451 return pcum->aapcs_partial;
4454 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4457 if (NUM_ARG_REGS > nregs
4458 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4460 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4466 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4467 tree type, bool named)
4469 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4471 aapcs_layout_arg (pcum, mode, type, named);
4473 if (pcum->aapcs_cprc_slot >= 0)
4475 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4477 pcum->aapcs_cprc_slot = -1;
4480 /* Generic stuff. */
4481 pcum->aapcs_arg_processed = false;
4482 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4483 pcum->aapcs_reg = NULL_RTX;
4484 pcum->aapcs_partial = 0;
4489 if (arm_vector_mode_supported_p (mode)
4490 && pcum->named_count > pcum->nargs
4491 && TARGET_IWMMXT_ABI)
4492 pcum->iwmmxt_nregs += 1;
4494 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4498 /* Variable sized types are passed by reference. This is a GCC
4499 extension to the ARM ABI. */
4502 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4503 enum machine_mode mode ATTRIBUTE_UNUSED,
4504 const_tree type, bool named ATTRIBUTE_UNUSED)
4506 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4509 /* Encode the current state of the #pragma [no_]long_calls. */
4512 OFF, /* No #pragma [no_]long_calls is in effect. */
4513 LONG, /* #pragma long_calls is in effect. */
4514 SHORT /* #pragma no_long_calls is in effect. */
4517 static arm_pragma_enum arm_pragma_long_calls = OFF;
4520 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4522 arm_pragma_long_calls = LONG;
4526 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4528 arm_pragma_long_calls = SHORT;
4532 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4534 arm_pragma_long_calls = OFF;
4537 /* Handle an attribute requiring a FUNCTION_DECL;
4538 arguments as in struct attribute_spec.handler. */
4540 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4541 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4543 if (TREE_CODE (*node) != FUNCTION_DECL)
4545 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4547 *no_add_attrs = true;
4553 /* Handle an "interrupt" or "isr" attribute;
4554 arguments as in struct attribute_spec.handler. */
4556 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4561 if (TREE_CODE (*node) != FUNCTION_DECL)
4563 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4565 *no_add_attrs = true;
4567 /* FIXME: the argument if any is checked for type attributes;
4568 should it be checked for decl ones? */
4572 if (TREE_CODE (*node) == FUNCTION_TYPE
4573 || TREE_CODE (*node) == METHOD_TYPE)
4575 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4577 warning (OPT_Wattributes, "%qE attribute ignored",
4579 *no_add_attrs = true;
4582 else if (TREE_CODE (*node) == POINTER_TYPE
4583 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4584 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4585 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4587 *node = build_variant_type_copy (*node);
4588 TREE_TYPE (*node) = build_type_attribute_variant
4590 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4591 *no_add_attrs = true;
4595 /* Possibly pass this attribute on from the type to a decl. */
4596 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4597 | (int) ATTR_FLAG_FUNCTION_NEXT
4598 | (int) ATTR_FLAG_ARRAY_NEXT))
4600 *no_add_attrs = true;
4601 return tree_cons (name, args, NULL_TREE);
4605 warning (OPT_Wattributes, "%qE attribute ignored",
4614 /* Handle a "pcs" attribute; arguments as in struct
4615 attribute_spec.handler. */
4617 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4618 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4620 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4622 warning (OPT_Wattributes, "%qE attribute ignored", name);
4623 *no_add_attrs = true;
4628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4629 /* Handle the "notshared" attribute. This attribute is another way of
4630 requesting hidden visibility. ARM's compiler supports
4631 "__declspec(notshared)"; we support the same thing via an
4635 arm_handle_notshared_attribute (tree *node,
4636 tree name ATTRIBUTE_UNUSED,
4637 tree args ATTRIBUTE_UNUSED,
4638 int flags ATTRIBUTE_UNUSED,
4641 tree decl = TYPE_NAME (*node);
4645 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4646 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4647 *no_add_attrs = false;
4653 /* Return 0 if the attributes for two types are incompatible, 1 if they
4654 are compatible, and 2 if they are nearly compatible (which causes a
4655 warning to be generated). */
4657 arm_comp_type_attributes (const_tree type1, const_tree type2)
4661 /* Check for mismatch of non-default calling convention. */
4662 if (TREE_CODE (type1) != FUNCTION_TYPE)
4665 /* Check for mismatched call attributes. */
4666 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4667 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4668 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4669 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4671 /* Only bother to check if an attribute is defined. */
4672 if (l1 | l2 | s1 | s2)
4674 /* If one type has an attribute, the other must have the same attribute. */
4675 if ((l1 != l2) || (s1 != s2))
4678 /* Disallow mixed attributes. */
4679 if ((l1 & s2) || (l2 & s1))
4683 /* Check for mismatched ISR attribute. */
4684 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4686 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4687 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4689 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4696 /* Assigns default attributes to newly defined type. This is used to
4697 set short_call/long_call attributes for function types of
4698 functions defined inside corresponding #pragma scopes. */
4700 arm_set_default_type_attributes (tree type)
4702 /* Add __attribute__ ((long_call)) to all functions, when
4703 inside #pragma long_calls or __attribute__ ((short_call)),
4704 when inside #pragma no_long_calls. */
4705 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4707 tree type_attr_list, attr_name;
4708 type_attr_list = TYPE_ATTRIBUTES (type);
4710 if (arm_pragma_long_calls == LONG)
4711 attr_name = get_identifier ("long_call");
4712 else if (arm_pragma_long_calls == SHORT)
4713 attr_name = get_identifier ("short_call");
4717 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4718 TYPE_ATTRIBUTES (type) = type_attr_list;
4722 /* Return true if DECL is known to be linked into section SECTION. */
4725 arm_function_in_section_p (tree decl, section *section)
4727 /* We can only be certain about functions defined in the same
4728 compilation unit. */
4729 if (!TREE_STATIC (decl))
4732 /* Make sure that SYMBOL always binds to the definition in this
4733 compilation unit. */
4734 if (!targetm.binds_local_p (decl))
4737 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4738 if (!DECL_SECTION_NAME (decl))
4740 /* Make sure that we will not create a unique section for DECL. */
4741 if (flag_function_sections || DECL_ONE_ONLY (decl))
4745 return function_section (decl) == section;
4748 /* Return nonzero if a 32-bit "long_call" should be generated for
4749 a call from the current function to DECL. We generate a long_call
4752 a. has an __attribute__((long call))
4753 or b. is within the scope of a #pragma long_calls
4754 or c. the -mlong-calls command line switch has been specified
4756 However we do not generate a long call if the function:
4758 d. has an __attribute__ ((short_call))
4759 or e. is inside the scope of a #pragma no_long_calls
4760 or f. is defined in the same section as the current function. */
4763 arm_is_long_call_p (tree decl)
4768 return TARGET_LONG_CALLS;
4770 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4771 if (lookup_attribute ("short_call", attrs))
4774 /* For "f", be conservative, and only cater for cases in which the
4775 whole of the current function is placed in the same section. */
4776 if (!flag_reorder_blocks_and_partition
4777 && TREE_CODE (decl) == FUNCTION_DECL
4778 && arm_function_in_section_p (decl, current_function_section ()))
4781 if (lookup_attribute ("long_call", attrs))
4784 return TARGET_LONG_CALLS;
4787 /* Return nonzero if it is ok to make a tail-call to DECL. */
4789 arm_function_ok_for_sibcall (tree decl, tree exp)
4791 unsigned long func_type;
4793 if (cfun->machine->sibcall_blocked)
4796 /* Never tailcall something for which we have no decl, or if we
4797 are generating code for Thumb-1. */
4798 if (decl == NULL || TARGET_THUMB1)
4801 /* The PIC register is live on entry to VxWorks PLT entries, so we
4802 must make the call before restoring the PIC register. */
4803 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4806 /* Cannot tail-call to long calls, since these are out of range of
4807 a branch instruction. */
4808 if (arm_is_long_call_p (decl))
4811 /* If we are interworking and the function is not declared static
4812 then we can't tail-call it unless we know that it exists in this
4813 compilation unit (since it might be a Thumb routine). */
4814 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4817 func_type = arm_current_func_type ();
4818 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4819 if (IS_INTERRUPT (func_type))
4822 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4824 /* Check that the return value locations are the same. For
4825 example that we aren't returning a value from the sibling in
4826 a VFP register but then need to transfer it to a core
4830 a = arm_function_value (TREE_TYPE (exp), decl, false);
4831 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4833 if (!rtx_equal_p (a, b))
4837 /* Never tailcall if function may be called with a misaligned SP. */
4838 if (IS_STACKALIGN (func_type))
4841 /* Everything else is ok. */
4846 /* Addressing mode support functions. */
4848 /* Return nonzero if X is a legitimate immediate operand when compiling
4849 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4851 legitimate_pic_operand_p (rtx x)
4853 if (GET_CODE (x) == SYMBOL_REF
4854 || (GET_CODE (x) == CONST
4855 && GET_CODE (XEXP (x, 0)) == PLUS
4856 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4862 /* Record that the current function needs a PIC register. Initialize
4863 cfun->machine->pic_reg if we have not already done so. */
4866 require_pic_register (void)
4868 /* A lot of the logic here is made obscure by the fact that this
4869 routine gets called as part of the rtx cost estimation process.
4870 We don't want those calls to affect any assumptions about the real
4871 function; and further, we can't call entry_of_function() until we
4872 start the real expansion process. */
4873 if (!crtl->uses_pic_offset_table)
4875 gcc_assert (can_create_pseudo_p ());
4876 if (arm_pic_register != INVALID_REGNUM)
4878 if (!cfun->machine->pic_reg)
4879 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4881 /* Play games to avoid marking the function as needing pic
4882 if we are being called as part of the cost-estimation
4884 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4885 crtl->uses_pic_offset_table = 1;
4891 if (!cfun->machine->pic_reg)
4892 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4894 /* Play games to avoid marking the function as needing pic
4895 if we are being called as part of the cost-estimation
4897 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4899 crtl->uses_pic_offset_table = 1;
4902 arm_load_pic_register (0UL);
4906 /* We can be called during expansion of PHI nodes, where
4907 we can't yet emit instructions directly in the final
4908 insn stream. Queue the insns on the entry edge, they will
4909 be committed after everything else is expanded. */
4910 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4917 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4919 if (GET_CODE (orig) == SYMBOL_REF
4920 || GET_CODE (orig) == LABEL_REF)
4922 rtx pic_ref, address;
4927 gcc_assert (can_create_pseudo_p ());
4928 reg = gen_reg_rtx (Pmode);
4929 address = gen_reg_rtx (Pmode);
4934 /* VxWorks does not impose a fixed gap between segments; the run-time
4935 gap can be different from the object-file gap. We therefore can't
4936 use GOTOFF unless we are absolutely sure that the symbol is in the
4937 same segment as the GOT. Unfortunately, the flexibility of linker
4938 scripts means that we can't be sure of that in general, so assume
4939 that GOTOFF is never valid on VxWorks. */
4940 if ((GET_CODE (orig) == LABEL_REF
4941 || (GET_CODE (orig) == SYMBOL_REF &&
4942 SYMBOL_REF_LOCAL_P (orig)))
4944 && !TARGET_VXWORKS_RTP)
4945 insn = arm_pic_static_addr (orig, reg);
4948 /* If this function doesn't have a pic register, create one now. */
4949 require_pic_register ();
4952 emit_insn (gen_pic_load_addr_32bit (address, orig));
4953 else /* TARGET_THUMB1 */
4954 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4956 pic_ref = gen_const_mem (Pmode,
4957 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4959 insn = emit_move_insn (reg, pic_ref);
4962 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4964 set_unique_reg_note (insn, REG_EQUAL, orig);
4968 else if (GET_CODE (orig) == CONST)
4972 if (GET_CODE (XEXP (orig, 0)) == PLUS
4973 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4976 /* Handle the case where we have: const (UNSPEC_TLS). */
4977 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4978 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4981 /* Handle the case where we have:
4982 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4984 if (GET_CODE (XEXP (orig, 0)) == PLUS
4985 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4986 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4988 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4994 gcc_assert (can_create_pseudo_p ());
4995 reg = gen_reg_rtx (Pmode);
4998 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5000 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5001 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5002 base == reg ? 0 : reg);
5004 if (GET_CODE (offset) == CONST_INT)
5006 /* The base register doesn't really matter, we only want to
5007 test the index for the appropriate mode. */
5008 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5010 gcc_assert (can_create_pseudo_p ());
5011 offset = force_reg (Pmode, offset);
5014 if (GET_CODE (offset) == CONST_INT)
5015 return plus_constant (base, INTVAL (offset));
5018 if (GET_MODE_SIZE (mode) > 4
5019 && (GET_MODE_CLASS (mode) == MODE_INT
5020 || TARGET_SOFT_FLOAT))
5022 emit_insn (gen_addsi3 (reg, base, offset));
5026 return gen_rtx_PLUS (Pmode, base, offset);
5033 /* Find a spare register to use during the prolog of a function. */
5036 thumb_find_work_register (unsigned long pushed_regs_mask)
5040 /* Check the argument registers first as these are call-used. The
5041 register allocation order means that sometimes r3 might be used
5042 but earlier argument registers might not, so check them all. */
5043 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5044 if (!df_regs_ever_live_p (reg))
5047 /* Before going on to check the call-saved registers we can try a couple
5048 more ways of deducing that r3 is available. The first is when we are
5049 pushing anonymous arguments onto the stack and we have less than 4
5050 registers worth of fixed arguments(*). In this case r3 will be part of
5051 the variable argument list and so we can be sure that it will be
5052 pushed right at the start of the function. Hence it will be available
5053 for the rest of the prologue.
5054 (*): ie crtl->args.pretend_args_size is greater than 0. */
5055 if (cfun->machine->uses_anonymous_args
5056 && crtl->args.pretend_args_size > 0)
5057 return LAST_ARG_REGNUM;
5059 /* The other case is when we have fixed arguments but less than 4 registers
5060 worth. In this case r3 might be used in the body of the function, but
5061 it is not being used to convey an argument into the function. In theory
5062 we could just check crtl->args.size to see how many bytes are
5063 being passed in argument registers, but it seems that it is unreliable.
5064 Sometimes it will have the value 0 when in fact arguments are being
5065 passed. (See testcase execute/20021111-1.c for an example). So we also
5066 check the args_info.nregs field as well. The problem with this field is
5067 that it makes no allowances for arguments that are passed to the
5068 function but which are not used. Hence we could miss an opportunity
5069 when a function has an unused argument in r3. But it is better to be
5070 safe than to be sorry. */
5071 if (! cfun->machine->uses_anonymous_args
5072 && crtl->args.size >= 0
5073 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5074 && crtl->args.info.nregs < 4)
5075 return LAST_ARG_REGNUM;
5077 /* Otherwise look for a call-saved register that is going to be pushed. */
5078 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5079 if (pushed_regs_mask & (1 << reg))
5084 /* Thumb-2 can use high regs. */
5085 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5086 if (pushed_regs_mask & (1 << reg))
5089 /* Something went wrong - thumb_compute_save_reg_mask()
5090 should have arranged for a suitable register to be pushed. */
5094 static GTY(()) int pic_labelno;
5096 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5100 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5102 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5104 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5107 gcc_assert (flag_pic);
5109 pic_reg = cfun->machine->pic_reg;
5110 if (TARGET_VXWORKS_RTP)
5112 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5113 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5114 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5116 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5118 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5119 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5123 /* We use an UNSPEC rather than a LABEL_REF because this label
5124 never appears in the code stream. */
5126 labelno = GEN_INT (pic_labelno++);
5127 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5128 l1 = gen_rtx_CONST (VOIDmode, l1);
5130 /* On the ARM the PC register contains 'dot + 8' at the time of the
5131 addition, on the Thumb it is 'dot + 4'. */
5132 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5133 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5135 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5139 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5141 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5143 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5145 else /* TARGET_THUMB1 */
5147 if (arm_pic_register != INVALID_REGNUM
5148 && REGNO (pic_reg) > LAST_LO_REGNUM)
5150 /* We will have pushed the pic register, so we should always be
5151 able to find a work register. */
5152 pic_tmp = gen_rtx_REG (SImode,
5153 thumb_find_work_register (saved_regs));
5154 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5155 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5158 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5159 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5163 /* Need to emit this whether or not we obey regdecls,
5164 since setjmp/longjmp can cause life info to screw up. */
5168 /* Generate code to load the address of a static var when flag_pic is set. */
5170 arm_pic_static_addr (rtx orig, rtx reg)
5172 rtx l1, labelno, offset_rtx, insn;
5174 gcc_assert (flag_pic);
5176 /* We use an UNSPEC rather than a LABEL_REF because this label
5177 never appears in the code stream. */
5178 labelno = GEN_INT (pic_labelno++);
5179 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5180 l1 = gen_rtx_CONST (VOIDmode, l1);
5182 /* On the ARM the PC register contains 'dot + 8' at the time of the
5183 addition, on the Thumb it is 'dot + 4'. */
5184 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5185 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5186 UNSPEC_SYMBOL_OFFSET);
5187 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5191 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5193 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5195 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5197 else /* TARGET_THUMB1 */
5199 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5200 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5206 /* Return nonzero if X is valid as an ARM state addressing register. */
5208 arm_address_register_rtx_p (rtx x, int strict_p)
5212 if (GET_CODE (x) != REG)
5218 return ARM_REGNO_OK_FOR_BASE_P (regno);
5220 return (regno <= LAST_ARM_REGNUM
5221 || regno >= FIRST_PSEUDO_REGISTER
5222 || regno == FRAME_POINTER_REGNUM
5223 || regno == ARG_POINTER_REGNUM);
5226 /* Return TRUE if this rtx is the difference of a symbol and a label,
5227 and will reduce to a PC-relative relocation in the object file.
5228 Expressions like this can be left alone when generating PIC, rather
5229 than forced through the GOT. */
5231 pcrel_constant_p (rtx x)
5233 if (GET_CODE (x) == MINUS)
5234 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5239 /* Return nonzero if X is a valid ARM state address operand. */
5241 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5245 enum rtx_code code = GET_CODE (x);
5247 if (arm_address_register_rtx_p (x, strict_p))
5250 use_ldrd = (TARGET_LDRD
5252 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5254 if (code == POST_INC || code == PRE_DEC
5255 || ((code == PRE_INC || code == POST_DEC)
5256 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5257 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5259 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5260 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5261 && GET_CODE (XEXP (x, 1)) == PLUS
5262 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5264 rtx addend = XEXP (XEXP (x, 1), 1);
5266 /* Don't allow ldrd post increment by register because it's hard
5267 to fixup invalid register choices. */
5269 && GET_CODE (x) == POST_MODIFY
5270 && GET_CODE (addend) == REG)
5273 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5274 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5277 /* After reload constants split into minipools will have addresses
5278 from a LABEL_REF. */
5279 else if (reload_completed
5280 && (code == LABEL_REF
5282 && GET_CODE (XEXP (x, 0)) == PLUS
5283 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5284 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5287 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5290 else if (code == PLUS)
5292 rtx xop0 = XEXP (x, 0);
5293 rtx xop1 = XEXP (x, 1);
5295 return ((arm_address_register_rtx_p (xop0, strict_p)
5296 && GET_CODE(xop1) == CONST_INT
5297 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5298 || (arm_address_register_rtx_p (xop1, strict_p)
5299 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5303 /* Reload currently can't handle MINUS, so disable this for now */
5304 else if (GET_CODE (x) == MINUS)
5306 rtx xop0 = XEXP (x, 0);
5307 rtx xop1 = XEXP (x, 1);
5309 return (arm_address_register_rtx_p (xop0, strict_p)
5310 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5314 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5315 && code == SYMBOL_REF
5316 && CONSTANT_POOL_ADDRESS_P (x)
5318 && symbol_mentioned_p (get_pool_constant (x))
5319 && ! pcrel_constant_p (get_pool_constant (x))))
5325 /* Return nonzero if X is a valid Thumb-2 address operand. */
5327 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5330 enum rtx_code code = GET_CODE (x);
5332 if (arm_address_register_rtx_p (x, strict_p))
5335 use_ldrd = (TARGET_LDRD
5337 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5339 if (code == POST_INC || code == PRE_DEC
5340 || ((code == PRE_INC || code == POST_DEC)
5341 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5342 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5344 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5345 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5346 && GET_CODE (XEXP (x, 1)) == PLUS
5347 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5349 /* Thumb-2 only has autoincrement by constant. */
5350 rtx addend = XEXP (XEXP (x, 1), 1);
5351 HOST_WIDE_INT offset;
5353 if (GET_CODE (addend) != CONST_INT)
5356 offset = INTVAL(addend);
5357 if (GET_MODE_SIZE (mode) <= 4)
5358 return (offset > -256 && offset < 256);
5360 return (use_ldrd && offset > -1024 && offset < 1024
5361 && (offset & 3) == 0);
5364 /* After reload constants split into minipools will have addresses
5365 from a LABEL_REF. */
5366 else if (reload_completed
5367 && (code == LABEL_REF
5369 && GET_CODE (XEXP (x, 0)) == PLUS
5370 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5371 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5374 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5377 else if (code == PLUS)
5379 rtx xop0 = XEXP (x, 0);
5380 rtx xop1 = XEXP (x, 1);
5382 return ((arm_address_register_rtx_p (xop0, strict_p)
5383 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5384 || (arm_address_register_rtx_p (xop1, strict_p)
5385 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5388 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5389 && code == SYMBOL_REF
5390 && CONSTANT_POOL_ADDRESS_P (x)
5392 && symbol_mentioned_p (get_pool_constant (x))
5393 && ! pcrel_constant_p (get_pool_constant (x))))
5399 /* Return nonzero if INDEX is valid for an address index operand in
5402 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5405 HOST_WIDE_INT range;
5406 enum rtx_code code = GET_CODE (index);
5408 /* Standard coprocessor addressing modes. */
5409 if (TARGET_HARD_FLOAT
5410 && (TARGET_FPA || TARGET_MAVERICK)
5411 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5412 || (TARGET_MAVERICK && mode == DImode)))
5413 return (code == CONST_INT && INTVAL (index) < 1024
5414 && INTVAL (index) > -1024
5415 && (INTVAL (index) & 3) == 0);
5418 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5419 return (code == CONST_INT
5420 && INTVAL (index) < 1016
5421 && INTVAL (index) > -1024
5422 && (INTVAL (index) & 3) == 0);
5424 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5425 return (code == CONST_INT
5426 && INTVAL (index) < 1024
5427 && INTVAL (index) > -1024
5428 && (INTVAL (index) & 3) == 0);
5430 if (arm_address_register_rtx_p (index, strict_p)
5431 && (GET_MODE_SIZE (mode) <= 4))
5434 if (mode == DImode || mode == DFmode)
5436 if (code == CONST_INT)
5438 HOST_WIDE_INT val = INTVAL (index);
5441 return val > -256 && val < 256;
5443 return val > -4096 && val < 4092;
5446 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5449 if (GET_MODE_SIZE (mode) <= 4
5453 || (mode == QImode && outer == SIGN_EXTEND))))
5457 rtx xiop0 = XEXP (index, 0);
5458 rtx xiop1 = XEXP (index, 1);
5460 return ((arm_address_register_rtx_p (xiop0, strict_p)
5461 && power_of_two_operand (xiop1, SImode))
5462 || (arm_address_register_rtx_p (xiop1, strict_p)
5463 && power_of_two_operand (xiop0, SImode)));
5465 else if (code == LSHIFTRT || code == ASHIFTRT
5466 || code == ASHIFT || code == ROTATERT)
5468 rtx op = XEXP (index, 1);
5470 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5471 && GET_CODE (op) == CONST_INT
5473 && INTVAL (op) <= 31);
5477 /* For ARM v4 we may be doing a sign-extend operation during the
5483 || (outer == SIGN_EXTEND && mode == QImode))
5489 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5491 return (code == CONST_INT
5492 && INTVAL (index) < range
5493 && INTVAL (index) > -range);
5496 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5497 index operand. i.e. 1, 2, 4 or 8. */
5499 thumb2_index_mul_operand (rtx op)
5503 if (GET_CODE(op) != CONST_INT)
5507 return (val == 1 || val == 2 || val == 4 || val == 8);
5510 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5512 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5514 enum rtx_code code = GET_CODE (index);
5516 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5517 /* Standard coprocessor addressing modes. */
5518 if (TARGET_HARD_FLOAT
5519 && (TARGET_FPA || TARGET_MAVERICK)
5520 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5521 || (TARGET_MAVERICK && mode == DImode)))
5522 return (code == CONST_INT && INTVAL (index) < 1024
5523 && INTVAL (index) > -1024
5524 && (INTVAL (index) & 3) == 0);
5526 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5528 /* For DImode assume values will usually live in core regs
5529 and only allow LDRD addressing modes. */
5530 if (!TARGET_LDRD || mode != DImode)
5531 return (code == CONST_INT
5532 && INTVAL (index) < 1024
5533 && INTVAL (index) > -1024
5534 && (INTVAL (index) & 3) == 0);
5538 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5539 return (code == CONST_INT
5540 && INTVAL (index) < 1016
5541 && INTVAL (index) > -1024
5542 && (INTVAL (index) & 3) == 0);
5544 if (arm_address_register_rtx_p (index, strict_p)
5545 && (GET_MODE_SIZE (mode) <= 4))
5548 if (mode == DImode || mode == DFmode)
5550 if (code == CONST_INT)
5552 HOST_WIDE_INT val = INTVAL (index);
5553 /* ??? Can we assume ldrd for thumb2? */
5554 /* Thumb-2 ldrd only has reg+const addressing modes. */
5555 /* ldrd supports offsets of +-1020.
5556 However the ldr fallback does not. */
5557 return val > -256 && val < 256 && (val & 3) == 0;
5565 rtx xiop0 = XEXP (index, 0);
5566 rtx xiop1 = XEXP (index, 1);
5568 return ((arm_address_register_rtx_p (xiop0, strict_p)
5569 && thumb2_index_mul_operand (xiop1))
5570 || (arm_address_register_rtx_p (xiop1, strict_p)
5571 && thumb2_index_mul_operand (xiop0)));
5573 else if (code == ASHIFT)
5575 rtx op = XEXP (index, 1);
5577 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5578 && GET_CODE (op) == CONST_INT
5580 && INTVAL (op) <= 3);
5583 return (code == CONST_INT
5584 && INTVAL (index) < 4096
5585 && INTVAL (index) > -256);
5588 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5590 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5594 if (GET_CODE (x) != REG)
5600 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5602 return (regno <= LAST_LO_REGNUM
5603 || regno > LAST_VIRTUAL_REGISTER
5604 || regno == FRAME_POINTER_REGNUM
5605 || (GET_MODE_SIZE (mode) >= 4
5606 && (regno == STACK_POINTER_REGNUM
5607 || regno >= FIRST_PSEUDO_REGISTER
5608 || x == hard_frame_pointer_rtx
5609 || x == arg_pointer_rtx)));
5612 /* Return nonzero if x is a legitimate index register. This is the case
5613 for any base register that can access a QImode object. */
5615 thumb1_index_register_rtx_p (rtx x, int strict_p)
5617 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5620 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5622 The AP may be eliminated to either the SP or the FP, so we use the
5623 least common denominator, e.g. SImode, and offsets from 0 to 64.
5625 ??? Verify whether the above is the right approach.
5627 ??? Also, the FP may be eliminated to the SP, so perhaps that
5628 needs special handling also.
5630 ??? Look at how the mips16 port solves this problem. It probably uses
5631 better ways to solve some of these problems.
5633 Although it is not incorrect, we don't accept QImode and HImode
5634 addresses based on the frame pointer or arg pointer until the
5635 reload pass starts. This is so that eliminating such addresses
5636 into stack based ones won't produce impossible code. */
5638 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5640 /* ??? Not clear if this is right. Experiment. */
5641 if (GET_MODE_SIZE (mode) < 4
5642 && !(reload_in_progress || reload_completed)
5643 && (reg_mentioned_p (frame_pointer_rtx, x)
5644 || reg_mentioned_p (arg_pointer_rtx, x)
5645 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5646 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5647 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5648 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5651 /* Accept any base register. SP only in SImode or larger. */
5652 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5655 /* This is PC relative data before arm_reorg runs. */
5656 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5657 && GET_CODE (x) == SYMBOL_REF
5658 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5661 /* This is PC relative data after arm_reorg runs. */
5662 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5664 && (GET_CODE (x) == LABEL_REF
5665 || (GET_CODE (x) == CONST
5666 && GET_CODE (XEXP (x, 0)) == PLUS
5667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5668 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5671 /* Post-inc indexing only supported for SImode and larger. */
5672 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5673 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5676 else if (GET_CODE (x) == PLUS)
5678 /* REG+REG address can be any two index registers. */
5679 /* We disallow FRAME+REG addressing since we know that FRAME
5680 will be replaced with STACK, and SP relative addressing only
5681 permits SP+OFFSET. */
5682 if (GET_MODE_SIZE (mode) <= 4
5683 && XEXP (x, 0) != frame_pointer_rtx
5684 && XEXP (x, 1) != frame_pointer_rtx
5685 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5686 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5689 /* REG+const has 5-7 bit offset for non-SP registers. */
5690 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5691 || XEXP (x, 0) == arg_pointer_rtx)
5692 && GET_CODE (XEXP (x, 1)) == CONST_INT
5693 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5696 /* REG+const has 10-bit offset for SP, but only SImode and
5697 larger is supported. */
5698 /* ??? Should probably check for DI/DFmode overflow here
5699 just like GO_IF_LEGITIMATE_OFFSET does. */
5700 else if (GET_CODE (XEXP (x, 0)) == REG
5701 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5702 && GET_MODE_SIZE (mode) >= 4
5703 && GET_CODE (XEXP (x, 1)) == CONST_INT
5704 && INTVAL (XEXP (x, 1)) >= 0
5705 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5706 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5709 else if (GET_CODE (XEXP (x, 0)) == REG
5710 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5711 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5712 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5713 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5714 && GET_MODE_SIZE (mode) >= 4
5715 && GET_CODE (XEXP (x, 1)) == CONST_INT
5716 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5720 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5721 && GET_MODE_SIZE (mode) == 4
5722 && GET_CODE (x) == SYMBOL_REF
5723 && CONSTANT_POOL_ADDRESS_P (x)
5725 && symbol_mentioned_p (get_pool_constant (x))
5726 && ! pcrel_constant_p (get_pool_constant (x))))
5732 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5733 instruction of mode MODE. */
5735 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5737 switch (GET_MODE_SIZE (mode))
5740 return val >= 0 && val < 32;
5743 return val >= 0 && val < 64 && (val & 1) == 0;
5747 && (val + GET_MODE_SIZE (mode)) <= 128
5753 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5756 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5757 else if (TARGET_THUMB2)
5758 return thumb2_legitimate_address_p (mode, x, strict_p);
5759 else /* if (TARGET_THUMB1) */
5760 return thumb1_legitimate_address_p (mode, x, strict_p);
5763 /* Build the SYMBOL_REF for __tls_get_addr. */
5765 static GTY(()) rtx tls_get_addr_libfunc;
5768 get_tls_get_addr (void)
5770 if (!tls_get_addr_libfunc)
5771 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5772 return tls_get_addr_libfunc;
5776 arm_load_tp (rtx target)
5779 target = gen_reg_rtx (SImode);
5783 /* Can return in any reg. */
5784 emit_insn (gen_load_tp_hard (target));
5788 /* Always returned in r0. Immediately copy the result into a pseudo,
5789 otherwise other uses of r0 (e.g. setting up function arguments) may
5790 clobber the value. */
5794 emit_insn (gen_load_tp_soft ());
5796 tmp = gen_rtx_REG (SImode, 0);
5797 emit_move_insn (target, tmp);
5803 load_tls_operand (rtx x, rtx reg)
5807 if (reg == NULL_RTX)
5808 reg = gen_reg_rtx (SImode);
5810 tmp = gen_rtx_CONST (SImode, x);
5812 emit_move_insn (reg, tmp);
5818 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5820 rtx insns, label, labelno, sum;
5824 labelno = GEN_INT (pic_labelno++);
5825 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5826 label = gen_rtx_CONST (VOIDmode, label);
5828 sum = gen_rtx_UNSPEC (Pmode,
5829 gen_rtvec (4, x, GEN_INT (reloc), label,
5830 GEN_INT (TARGET_ARM ? 8 : 4)),
5832 reg = load_tls_operand (sum, reg);
5835 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5836 else if (TARGET_THUMB2)
5837 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5838 else /* TARGET_THUMB1 */
5839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5841 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5842 Pmode, 1, reg, Pmode);
5844 insns = get_insns ();
5851 legitimize_tls_address (rtx x, rtx reg)
5853 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5854 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5858 case TLS_MODEL_GLOBAL_DYNAMIC:
5859 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5860 dest = gen_reg_rtx (Pmode);
5861 emit_libcall_block (insns, dest, ret, x);
5864 case TLS_MODEL_LOCAL_DYNAMIC:
5865 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5867 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5868 share the LDM result with other LD model accesses. */
5869 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5871 dest = gen_reg_rtx (Pmode);
5872 emit_libcall_block (insns, dest, ret, eqv);
5874 /* Load the addend. */
5875 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5877 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5878 return gen_rtx_PLUS (Pmode, dest, addend);
5880 case TLS_MODEL_INITIAL_EXEC:
5881 labelno = GEN_INT (pic_labelno++);
5882 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5883 label = gen_rtx_CONST (VOIDmode, label);
5884 sum = gen_rtx_UNSPEC (Pmode,
5885 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5886 GEN_INT (TARGET_ARM ? 8 : 4)),
5888 reg = load_tls_operand (sum, reg);
5891 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5892 else if (TARGET_THUMB2)
5893 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5896 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5897 emit_move_insn (reg, gen_const_mem (SImode, reg));
5900 tp = arm_load_tp (NULL_RTX);
5902 return gen_rtx_PLUS (Pmode, tp, reg);
5904 case TLS_MODEL_LOCAL_EXEC:
5905 tp = arm_load_tp (NULL_RTX);
5907 reg = gen_rtx_UNSPEC (Pmode,
5908 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5910 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5912 return gen_rtx_PLUS (Pmode, tp, reg);
5919 /* Try machine-dependent ways of modifying an illegitimate address
5920 to be legitimate. If we find one, return the new, valid address. */
5922 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5926 /* TODO: legitimize_address for Thumb2. */
5929 return thumb_legitimize_address (x, orig_x, mode);
5932 if (arm_tls_symbol_p (x))
5933 return legitimize_tls_address (x, NULL_RTX);
5935 if (GET_CODE (x) == PLUS)
5937 rtx xop0 = XEXP (x, 0);
5938 rtx xop1 = XEXP (x, 1);
5940 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5941 xop0 = force_reg (SImode, xop0);
5943 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5944 xop1 = force_reg (SImode, xop1);
5946 if (ARM_BASE_REGISTER_RTX_P (xop0)
5947 && GET_CODE (xop1) == CONST_INT)
5949 HOST_WIDE_INT n, low_n;
5953 /* VFP addressing modes actually allow greater offsets, but for
5954 now we just stick with the lowest common denominator. */
5956 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5968 low_n = ((mode) == TImode ? 0
5969 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5973 base_reg = gen_reg_rtx (SImode);
5974 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5975 emit_move_insn (base_reg, val);
5976 x = plus_constant (base_reg, low_n);
5978 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5979 x = gen_rtx_PLUS (SImode, xop0, xop1);
5982 /* XXX We don't allow MINUS any more -- see comment in
5983 arm_legitimate_address_outer_p (). */
5984 else if (GET_CODE (x) == MINUS)
5986 rtx xop0 = XEXP (x, 0);
5987 rtx xop1 = XEXP (x, 1);
5989 if (CONSTANT_P (xop0))
5990 xop0 = force_reg (SImode, xop0);
5992 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5993 xop1 = force_reg (SImode, xop1);
5995 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5996 x = gen_rtx_MINUS (SImode, xop0, xop1);
5999 /* Make sure to take full advantage of the pre-indexed addressing mode
6000 with absolute addresses which often allows for the base register to
6001 be factorized for multiple adjacent memory references, and it might
6002 even allows for the mini pool to be avoided entirely. */
6003 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6006 HOST_WIDE_INT mask, base, index;
6009 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6010 use a 8-bit index. So let's use a 12-bit index for SImode only and
6011 hope that arm_gen_constant will enable ldrb to use more bits. */
6012 bits = (mode == SImode) ? 12 : 8;
6013 mask = (1 << bits) - 1;
6014 base = INTVAL (x) & ~mask;
6015 index = INTVAL (x) & mask;
6016 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6018 /* It'll most probably be more efficient to generate the base
6019 with more bits set and use a negative index instead. */
6023 base_reg = force_reg (SImode, GEN_INT (base));
6024 x = plus_constant (base_reg, index);
6029 /* We need to find and carefully transform any SYMBOL and LABEL
6030 references; so go back to the original address expression. */
6031 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6033 if (new_x != orig_x)
6041 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6042 to be legitimate. If we find one, return the new, valid address. */
6044 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6046 if (arm_tls_symbol_p (x))
6047 return legitimize_tls_address (x, NULL_RTX);
6049 if (GET_CODE (x) == PLUS
6050 && GET_CODE (XEXP (x, 1)) == CONST_INT
6051 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6052 || INTVAL (XEXP (x, 1)) < 0))
6054 rtx xop0 = XEXP (x, 0);
6055 rtx xop1 = XEXP (x, 1);
6056 HOST_WIDE_INT offset = INTVAL (xop1);
6058 /* Try and fold the offset into a biasing of the base register and
6059 then offsetting that. Don't do this when optimizing for space
6060 since it can cause too many CSEs. */
6061 if (optimize_size && offset >= 0
6062 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6064 HOST_WIDE_INT delta;
6067 delta = offset - (256 - GET_MODE_SIZE (mode));
6068 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6069 delta = 31 * GET_MODE_SIZE (mode);
6071 delta = offset & (~31 * GET_MODE_SIZE (mode));
6073 xop0 = force_operand (plus_constant (xop0, offset - delta),
6075 x = plus_constant (xop0, delta);
6077 else if (offset < 0 && offset > -256)
6078 /* Small negative offsets are best done with a subtract before the
6079 dereference, forcing these into a register normally takes two
6081 x = force_operand (x, NULL_RTX);
6084 /* For the remaining cases, force the constant into a register. */
6085 xop1 = force_reg (SImode, xop1);
6086 x = gen_rtx_PLUS (SImode, xop0, xop1);
6089 else if (GET_CODE (x) == PLUS
6090 && s_register_operand (XEXP (x, 1), SImode)
6091 && !s_register_operand (XEXP (x, 0), SImode))
6093 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6095 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6100 /* We need to find and carefully transform any SYMBOL and LABEL
6101 references; so go back to the original address expression. */
6102 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6104 if (new_x != orig_x)
6112 thumb_legitimize_reload_address (rtx *x_p,
6113 enum machine_mode mode,
6114 int opnum, int type,
6115 int ind_levels ATTRIBUTE_UNUSED)
6119 if (GET_CODE (x) == PLUS
6120 && GET_MODE_SIZE (mode) < 4
6121 && REG_P (XEXP (x, 0))
6122 && XEXP (x, 0) == stack_pointer_rtx
6123 && GET_CODE (XEXP (x, 1)) == CONST_INT
6124 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6129 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6130 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6134 /* If both registers are hi-regs, then it's better to reload the
6135 entire expression rather than each register individually. That
6136 only requires one reload register rather than two. */
6137 if (GET_CODE (x) == PLUS
6138 && REG_P (XEXP (x, 0))
6139 && REG_P (XEXP (x, 1))
6140 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6141 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6146 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6147 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6154 /* Test for various thread-local symbols. */
6156 /* Return TRUE if X is a thread-local symbol. */
6159 arm_tls_symbol_p (rtx x)
6161 if (! TARGET_HAVE_TLS)
6164 if (GET_CODE (x) != SYMBOL_REF)
6167 return SYMBOL_REF_TLS_MODEL (x) != 0;
6170 /* Helper for arm_tls_referenced_p. */
6173 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6175 if (GET_CODE (*x) == SYMBOL_REF)
6176 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6178 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6179 TLS offsets, not real symbol references. */
6180 if (GET_CODE (*x) == UNSPEC
6181 && XINT (*x, 1) == UNSPEC_TLS)
6187 /* Return TRUE if X contains any TLS symbol references. */
6190 arm_tls_referenced_p (rtx x)
6192 if (! TARGET_HAVE_TLS)
6195 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6198 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6201 arm_cannot_force_const_mem (rtx x)
6205 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6207 split_const (x, &base, &offset);
6208 if (GET_CODE (base) == SYMBOL_REF
6209 && !offset_within_block_p (base, INTVAL (offset)))
6212 return arm_tls_referenced_p (x);
6215 #define REG_OR_SUBREG_REG(X) \
6216 (GET_CODE (X) == REG \
6217 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6219 #define REG_OR_SUBREG_RTX(X) \
6220 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6222 #ifndef COSTS_N_INSNS
6223 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6226 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6228 enum machine_mode mode = GET_MODE (x);
6241 return COSTS_N_INSNS (1);
6244 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6247 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6254 return COSTS_N_INSNS (2) + cycles;
6256 return COSTS_N_INSNS (1) + 16;
6259 return (COSTS_N_INSNS (1)
6260 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6261 + GET_CODE (SET_DEST (x)) == MEM));
6266 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6268 if (thumb_shiftable_const (INTVAL (x)))
6269 return COSTS_N_INSNS (2);
6270 return COSTS_N_INSNS (3);
6272 else if ((outer == PLUS || outer == COMPARE)
6273 && INTVAL (x) < 256 && INTVAL (x) > -256)
6275 else if ((outer == IOR || outer == XOR || outer == AND)
6276 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6277 return COSTS_N_INSNS (1);
6278 else if (outer == AND)
6281 /* This duplicates the tests in the andsi3 expander. */
6282 for (i = 9; i <= 31; i++)
6283 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6284 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6285 return COSTS_N_INSNS (2);
6287 else if (outer == ASHIFT || outer == ASHIFTRT
6288 || outer == LSHIFTRT)
6290 return COSTS_N_INSNS (2);
6296 return COSTS_N_INSNS (3);
6314 /* XXX another guess. */
6315 /* Memory costs quite a lot for the first word, but subsequent words
6316 load at the equivalent of a single insn each. */
6317 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6318 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6323 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6328 /* XXX still guessing. */
6329 switch (GET_MODE (XEXP (x, 0)))
6332 return (1 + (mode == DImode ? 4 : 0)
6333 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6336 return (4 + (mode == DImode ? 4 : 0)
6337 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6340 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6352 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6354 enum machine_mode mode = GET_MODE (x);
6355 enum rtx_code subcode;
6357 enum rtx_code code = GET_CODE (x);
6363 /* Memory costs quite a lot for the first word, but subsequent words
6364 load at the equivalent of a single insn each. */
6365 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6372 if (TARGET_HARD_FLOAT && mode == SFmode)
6373 *total = COSTS_N_INSNS (2);
6374 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6375 *total = COSTS_N_INSNS (4);
6377 *total = COSTS_N_INSNS (20);
6381 if (GET_CODE (XEXP (x, 1)) == REG)
6382 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6383 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6384 *total = rtx_cost (XEXP (x, 1), code, speed);
6390 *total += COSTS_N_INSNS (4);
6395 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6396 *total += rtx_cost (XEXP (x, 0), code, speed);
6399 *total += COSTS_N_INSNS (3);
6403 *total += COSTS_N_INSNS (1);
6404 /* Increase the cost of complex shifts because they aren't any faster,
6405 and reduce dual issue opportunities. */
6406 if (arm_tune_cortex_a9
6407 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6415 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6417 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6418 *total = COSTS_N_INSNS (1);
6420 *total = COSTS_N_INSNS (20);
6423 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6424 /* Thumb2 does not have RSB, so all arguments must be
6425 registers (subtracting a constant is canonicalized as
6426 addition of the negated constant). */
6432 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6433 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6434 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6436 *total += rtx_cost (XEXP (x, 1), code, speed);
6440 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6441 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6443 *total += rtx_cost (XEXP (x, 0), code, speed);
6450 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6452 if (TARGET_HARD_FLOAT
6454 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6456 *total = COSTS_N_INSNS (1);
6457 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6458 && arm_const_double_rtx (XEXP (x, 0)))
6460 *total += rtx_cost (XEXP (x, 1), code, speed);
6464 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6465 && arm_const_double_rtx (XEXP (x, 1)))
6467 *total += rtx_cost (XEXP (x, 0), code, speed);
6473 *total = COSTS_N_INSNS (20);
6477 *total = COSTS_N_INSNS (1);
6478 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6479 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6481 *total += rtx_cost (XEXP (x, 1), code, speed);
6485 subcode = GET_CODE (XEXP (x, 1));
6486 if (subcode == ASHIFT || subcode == ASHIFTRT
6487 || subcode == LSHIFTRT
6488 || subcode == ROTATE || subcode == ROTATERT)
6490 *total += rtx_cost (XEXP (x, 0), code, speed);
6491 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6495 /* A shift as a part of RSB costs no more than RSB itself. */
6496 if (GET_CODE (XEXP (x, 0)) == MULT
6497 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6499 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6500 *total += rtx_cost (XEXP (x, 1), code, speed);
6505 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6507 *total += rtx_cost (XEXP (x, 0), code, speed);
6508 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6512 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6513 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6515 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6516 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6517 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6518 *total += COSTS_N_INSNS (1);
6526 if (code == PLUS && arm_arch6 && mode == SImode
6527 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6528 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6530 *total = COSTS_N_INSNS (1);
6531 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6533 *total += rtx_cost (XEXP (x, 1), code, speed);
6537 /* MLA: All arguments must be registers. We filter out
6538 multiplication by a power of two, so that we fall down into
6540 if (GET_CODE (XEXP (x, 0)) == MULT
6541 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6543 /* The cost comes from the cost of the multiply. */
6547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6549 if (TARGET_HARD_FLOAT
6551 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6553 *total = COSTS_N_INSNS (1);
6554 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6555 && arm_const_double_rtx (XEXP (x, 1)))
6557 *total += rtx_cost (XEXP (x, 0), code, speed);
6564 *total = COSTS_N_INSNS (20);
6568 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6569 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6571 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6572 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6573 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6574 *total += COSTS_N_INSNS (1);
6580 case AND: case XOR: case IOR:
6582 /* Normally the frame registers will be spilt into reg+const during
6583 reload, so it is a bad idea to combine them with other instructions,
6584 since then they might not be moved outside of loops. As a compromise
6585 we allow integration with ops that have a constant as their second
6587 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6588 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6589 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6590 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6591 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6596 *total += COSTS_N_INSNS (2);
6597 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6598 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6600 *total += rtx_cost (XEXP (x, 0), code, speed);
6607 *total += COSTS_N_INSNS (1);
6608 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6609 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6611 *total += rtx_cost (XEXP (x, 0), code, speed);
6614 subcode = GET_CODE (XEXP (x, 0));
6615 if (subcode == ASHIFT || subcode == ASHIFTRT
6616 || subcode == LSHIFTRT
6617 || subcode == ROTATE || subcode == ROTATERT)
6619 *total += rtx_cost (XEXP (x, 1), code, speed);
6620 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6625 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6627 *total += rtx_cost (XEXP (x, 1), code, speed);
6628 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6632 if (subcode == UMIN || subcode == UMAX
6633 || subcode == SMIN || subcode == SMAX)
6635 *total = COSTS_N_INSNS (3);
6642 /* This should have been handled by the CPU specific routines. */
6646 if (arm_arch3m && mode == SImode
6647 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6649 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6650 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6651 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6652 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6654 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6657 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6661 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6663 if (TARGET_HARD_FLOAT
6665 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6667 *total = COSTS_N_INSNS (1);
6670 *total = COSTS_N_INSNS (2);
6676 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6677 if (mode == SImode && code == NOT)
6679 subcode = GET_CODE (XEXP (x, 0));
6680 if (subcode == ASHIFT || subcode == ASHIFTRT
6681 || subcode == LSHIFTRT
6682 || subcode == ROTATE || subcode == ROTATERT
6684 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6686 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6687 /* Register shifts cost an extra cycle. */
6688 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6689 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6698 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6700 *total = COSTS_N_INSNS (4);
6704 operand = XEXP (x, 0);
6706 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6707 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6708 && GET_CODE (XEXP (operand, 0)) == REG
6709 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6710 *total += COSTS_N_INSNS (1);
6711 *total += (rtx_cost (XEXP (x, 1), code, speed)
6712 + rtx_cost (XEXP (x, 2), code, speed));
6716 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6718 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6724 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6725 && mode == SImode && XEXP (x, 1) == const0_rtx)
6727 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6733 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6734 && mode == SImode && XEXP (x, 1) == const0_rtx)
6736 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6756 /* SCC insns. In the case where the comparison has already been
6757 performed, then they cost 2 instructions. Otherwise they need
6758 an additional comparison before them. */
6759 *total = COSTS_N_INSNS (2);
6760 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6767 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6773 *total += COSTS_N_INSNS (1);
6774 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6775 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6777 *total += rtx_cost (XEXP (x, 0), code, speed);
6781 subcode = GET_CODE (XEXP (x, 0));
6782 if (subcode == ASHIFT || subcode == ASHIFTRT
6783 || subcode == LSHIFTRT
6784 || subcode == ROTATE || subcode == ROTATERT)
6786 *total += rtx_cost (XEXP (x, 1), code, speed);
6787 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6792 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6794 *total += rtx_cost (XEXP (x, 1), code, speed);
6795 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6805 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6806 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6807 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6808 *total += rtx_cost (XEXP (x, 1), code, speed);
6812 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6814 if (TARGET_HARD_FLOAT
6816 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6818 *total = COSTS_N_INSNS (1);
6821 *total = COSTS_N_INSNS (20);
6824 *total = COSTS_N_INSNS (1);
6826 *total += COSTS_N_INSNS (3);
6830 if (GET_MODE_CLASS (mode) == MODE_INT)
6834 *total += COSTS_N_INSNS (1);
6836 if (GET_MODE (XEXP (x, 0)) != SImode)
6840 if (GET_CODE (XEXP (x, 0)) != MEM)
6841 *total += COSTS_N_INSNS (1);
6843 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6844 *total += COSTS_N_INSNS (2);
6853 if (GET_MODE_CLASS (mode) == MODE_INT)
6856 *total += COSTS_N_INSNS (1);
6858 if (GET_MODE (XEXP (x, 0)) != SImode)
6862 if (GET_CODE (XEXP (x, 0)) != MEM)
6863 *total += COSTS_N_INSNS (1);
6865 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6866 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6873 switch (GET_MODE (XEXP (x, 0)))
6880 *total = COSTS_N_INSNS (1);
6890 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6894 if (const_ok_for_arm (INTVAL (x))
6895 || const_ok_for_arm (~INTVAL (x)))
6896 *total = COSTS_N_INSNS (1);
6898 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6899 INTVAL (x), NULL_RTX,
6906 *total = COSTS_N_INSNS (3);
6910 *total = COSTS_N_INSNS (1);
6914 *total = COSTS_N_INSNS (1);
6915 *total += rtx_cost (XEXP (x, 0), code, speed);
6919 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6920 && (mode == SFmode || !TARGET_VFP_SINGLE))
6921 *total = COSTS_N_INSNS (1);
6923 *total = COSTS_N_INSNS (4);
6927 *total = COSTS_N_INSNS (4);
6932 /* Estimates the size cost of thumb1 instructions.
6933 For now most of the code is copied from thumb1_rtx_costs. We need more
6934 fine grain tuning when we have more related test cases. */
6936 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6938 enum machine_mode mode = GET_MODE (x);
6951 return COSTS_N_INSNS (1);
6954 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6956 /* Thumb1 mul instruction can't operate on const. We must Load it
6957 into a register first. */
6958 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6959 return COSTS_N_INSNS (1) + const_size;
6961 return COSTS_N_INSNS (1);
6964 return (COSTS_N_INSNS (1)
6965 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6966 + GET_CODE (SET_DEST (x)) == MEM));
6971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6973 if (thumb_shiftable_const (INTVAL (x)))
6974 return COSTS_N_INSNS (2);
6975 return COSTS_N_INSNS (3);
6977 else if ((outer == PLUS || outer == COMPARE)
6978 && INTVAL (x) < 256 && INTVAL (x) > -256)
6980 else if ((outer == IOR || outer == XOR || outer == AND)
6981 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6982 return COSTS_N_INSNS (1);
6983 else if (outer == AND)
6986 /* This duplicates the tests in the andsi3 expander. */
6987 for (i = 9; i <= 31; i++)
6988 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6989 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6990 return COSTS_N_INSNS (2);
6992 else if (outer == ASHIFT || outer == ASHIFTRT
6993 || outer == LSHIFTRT)
6995 return COSTS_N_INSNS (2);
7001 return COSTS_N_INSNS (3);
7019 /* XXX another guess. */
7020 /* Memory costs quite a lot for the first word, but subsequent words
7021 load at the equivalent of a single insn each. */
7022 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7023 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7028 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7033 /* XXX still guessing. */
7034 switch (GET_MODE (XEXP (x, 0)))
7037 return (1 + (mode == DImode ? 4 : 0)
7038 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7041 return (4 + (mode == DImode ? 4 : 0)
7042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7045 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7056 /* RTX costs when optimizing for size. */
7058 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7061 enum machine_mode mode = GET_MODE (x);
7064 *total = thumb1_size_rtx_costs (x, code, outer_code);
7068 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7072 /* A memory access costs 1 insn if the mode is small, or the address is
7073 a single register, otherwise it costs one insn per word. */
7074 if (REG_P (XEXP (x, 0)))
7075 *total = COSTS_N_INSNS (1);
7077 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7084 /* Needs a libcall, so it costs about this. */
7085 *total = COSTS_N_INSNS (2);
7089 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7091 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7099 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7101 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7104 else if (mode == SImode)
7106 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7107 /* Slightly disparage register shifts, but not by much. */
7108 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7109 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7113 /* Needs a libcall. */
7114 *total = COSTS_N_INSNS (2);
7118 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7119 && (mode == SFmode || !TARGET_VFP_SINGLE))
7121 *total = COSTS_N_INSNS (1);
7127 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7128 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7130 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7131 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7132 || subcode1 == ROTATE || subcode1 == ROTATERT
7133 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7134 || subcode1 == ASHIFTRT)
7136 /* It's just the cost of the two operands. */
7141 *total = COSTS_N_INSNS (1);
7145 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7149 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7150 && (mode == SFmode || !TARGET_VFP_SINGLE))
7152 *total = COSTS_N_INSNS (1);
7156 /* A shift as a part of ADD costs nothing. */
7157 if (GET_CODE (XEXP (x, 0)) == MULT
7158 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7160 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7161 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7162 *total += rtx_cost (XEXP (x, 1), code, false);
7167 case AND: case XOR: case IOR:
7170 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7172 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7173 || subcode == LSHIFTRT || subcode == ASHIFTRT
7174 || (code == AND && subcode == NOT))
7176 /* It's just the cost of the two operands. */
7182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7186 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7190 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7191 && (mode == SFmode || !TARGET_VFP_SINGLE))
7193 *total = COSTS_N_INSNS (1);
7199 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7208 if (cc_register (XEXP (x, 0), VOIDmode))
7211 *total = COSTS_N_INSNS (1);
7215 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7216 && (mode == SFmode || !TARGET_VFP_SINGLE))
7217 *total = COSTS_N_INSNS (1);
7219 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7224 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7226 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7227 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7230 *total += COSTS_N_INSNS (1);
7235 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7237 switch (GET_MODE (XEXP (x, 0)))
7240 *total += COSTS_N_INSNS (1);
7244 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7250 *total += COSTS_N_INSNS (2);
7255 *total += COSTS_N_INSNS (1);
7260 if (const_ok_for_arm (INTVAL (x)))
7261 /* A multiplication by a constant requires another instruction
7262 to load the constant to a register. */
7263 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7265 else if (const_ok_for_arm (~INTVAL (x)))
7266 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7267 else if (const_ok_for_arm (-INTVAL (x)))
7269 if (outer_code == COMPARE || outer_code == PLUS
7270 || outer_code == MINUS)
7273 *total = COSTS_N_INSNS (1);
7276 *total = COSTS_N_INSNS (2);
7282 *total = COSTS_N_INSNS (2);
7286 *total = COSTS_N_INSNS (4);
7291 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7292 cost of these slightly. */
7293 *total = COSTS_N_INSNS (1) + 1;
7297 if (mode != VOIDmode)
7298 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7300 *total = COSTS_N_INSNS (4); /* How knows? */
7305 /* RTX costs when optimizing for size. */
7307 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7311 return arm_size_rtx_costs (x, (enum rtx_code) code,
7312 (enum rtx_code) outer_code, total);
7314 return current_tune->rtx_costs (x, (enum rtx_code) code,
7315 (enum rtx_code) outer_code,
7319 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7320 supported on any "slowmul" cores, so it can be ignored. */
7323 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7324 int *total, bool speed)
7326 enum machine_mode mode = GET_MODE (x);
7330 *total = thumb1_rtx_costs (x, code, outer_code);
7337 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7340 *total = COSTS_N_INSNS (20);
7344 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7346 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7347 & (unsigned HOST_WIDE_INT) 0xffffffff);
7348 int cost, const_ok = const_ok_for_arm (i);
7349 int j, booth_unit_size;
7351 /* Tune as appropriate. */
7352 cost = const_ok ? 4 : 8;
7353 booth_unit_size = 2;
7354 for (j = 0; i && j < 32; j += booth_unit_size)
7356 i >>= booth_unit_size;
7360 *total = COSTS_N_INSNS (cost);
7361 *total += rtx_cost (XEXP (x, 0), code, speed);
7365 *total = COSTS_N_INSNS (20);
7369 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7374 /* RTX cost for cores with a fast multiply unit (M variants). */
7377 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7378 int *total, bool speed)
7380 enum machine_mode mode = GET_MODE (x);
7384 *total = thumb1_rtx_costs (x, code, outer_code);
7388 /* ??? should thumb2 use different costs? */
7392 /* There is no point basing this on the tuning, since it is always the
7393 fast variant if it exists at all. */
7395 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7396 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7397 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7399 *total = COSTS_N_INSNS(2);
7406 *total = COSTS_N_INSNS (5);
7410 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7412 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7413 & (unsigned HOST_WIDE_INT) 0xffffffff);
7414 int cost, const_ok = const_ok_for_arm (i);
7415 int j, booth_unit_size;
7417 /* Tune as appropriate. */
7418 cost = const_ok ? 4 : 8;
7419 booth_unit_size = 8;
7420 for (j = 0; i && j < 32; j += booth_unit_size)
7422 i >>= booth_unit_size;
7426 *total = COSTS_N_INSNS(cost);
7432 *total = COSTS_N_INSNS (4);
7436 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7438 if (TARGET_HARD_FLOAT
7440 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7442 *total = COSTS_N_INSNS (1);
7447 /* Requires a lib call */
7448 *total = COSTS_N_INSNS (20);
7452 return arm_rtx_costs_1 (x, outer_code, total, speed);
7457 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7458 so it can be ignored. */
7461 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7462 int *total, bool speed)
7464 enum machine_mode mode = GET_MODE (x);
7468 *total = thumb1_rtx_costs (x, code, outer_code);
7475 if (GET_CODE (XEXP (x, 0)) != MULT)
7476 return arm_rtx_costs_1 (x, outer_code, total, speed);
7478 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7479 will stall until the multiplication is complete. */
7480 *total = COSTS_N_INSNS (3);
7484 /* There is no point basing this on the tuning, since it is always the
7485 fast variant if it exists at all. */
7487 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7488 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7489 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7491 *total = COSTS_N_INSNS (2);
7498 *total = COSTS_N_INSNS (5);
7502 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7504 /* If operand 1 is a constant we can more accurately
7505 calculate the cost of the multiply. The multiplier can
7506 retire 15 bits on the first cycle and a further 12 on the
7507 second. We do, of course, have to load the constant into
7508 a register first. */
7509 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7510 /* There's a general overhead of one cycle. */
7512 unsigned HOST_WIDE_INT masked_const;
7517 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7519 masked_const = i & 0xffff8000;
7520 if (masked_const != 0)
7523 masked_const = i & 0xf8000000;
7524 if (masked_const != 0)
7527 *total = COSTS_N_INSNS (cost);
7533 *total = COSTS_N_INSNS (3);
7537 /* Requires a lib call */
7538 *total = COSTS_N_INSNS (20);
7542 return arm_rtx_costs_1 (x, outer_code, total, speed);
7547 /* RTX costs for 9e (and later) cores. */
7550 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7551 int *total, bool speed)
7553 enum machine_mode mode = GET_MODE (x);
7560 *total = COSTS_N_INSNS (3);
7564 *total = thumb1_rtx_costs (x, code, outer_code);
7572 /* There is no point basing this on the tuning, since it is always the
7573 fast variant if it exists at all. */
7575 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7576 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7577 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7579 *total = COSTS_N_INSNS (2);
7586 *total = COSTS_N_INSNS (5);
7592 *total = COSTS_N_INSNS (2);
7596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7598 if (TARGET_HARD_FLOAT
7600 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7602 *total = COSTS_N_INSNS (1);
7607 *total = COSTS_N_INSNS (20);
7611 return arm_rtx_costs_1 (x, outer_code, total, speed);
7614 /* All address computations that can be done are free, but rtx cost returns
7615 the same for practically all of them. So we weight the different types
7616 of address here in the order (most pref first):
7617 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7619 arm_arm_address_cost (rtx x)
7621 enum rtx_code c = GET_CODE (x);
7623 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7625 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7630 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7633 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7643 arm_thumb_address_cost (rtx x)
7645 enum rtx_code c = GET_CODE (x);
7650 && GET_CODE (XEXP (x, 0)) == REG
7651 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7658 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7660 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7664 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7668 /* Some true dependencies can have a higher cost depending
7669 on precisely how certain input operands are used. */
7671 && REG_NOTE_KIND (link) == 0
7672 && recog_memoized (insn) >= 0
7673 && recog_memoized (dep) >= 0)
7675 int shift_opnum = get_attr_shift (insn);
7676 enum attr_type attr_type = get_attr_type (dep);
7678 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7679 operand for INSN. If we have a shifted input operand and the
7680 instruction we depend on is another ALU instruction, then we may
7681 have to account for an additional stall. */
7682 if (shift_opnum != 0
7683 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7685 rtx shifted_operand;
7688 /* Get the shifted operand. */
7689 extract_insn (insn);
7690 shifted_operand = recog_data.operand[shift_opnum];
7692 /* Iterate over all the operands in DEP. If we write an operand
7693 that overlaps with SHIFTED_OPERAND, then we have increase the
7694 cost of this dependency. */
7696 preprocess_constraints ();
7697 for (opno = 0; opno < recog_data.n_operands; opno++)
7699 /* We can ignore strict inputs. */
7700 if (recog_data.operand_type[opno] == OP_IN)
7703 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7710 /* XXX This is not strictly true for the FPA. */
7711 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7712 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7715 /* Call insns don't incur a stall, even if they follow a load. */
7716 if (REG_NOTE_KIND (link) == 0
7717 && GET_CODE (insn) == CALL_INSN)
7720 if ((i_pat = single_set (insn)) != NULL
7721 && GET_CODE (SET_SRC (i_pat)) == MEM
7722 && (d_pat = single_set (dep)) != NULL
7723 && GET_CODE (SET_DEST (d_pat)) == MEM)
7725 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7726 /* This is a load after a store, there is no conflict if the load reads
7727 from a cached area. Assume that loads from the stack, and from the
7728 constant pool are cached, and that others will miss. This is a
7731 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7732 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7733 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7734 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7741 static int fp_consts_inited = 0;
7743 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7744 static const char * const strings_fp[8] =
7747 "4", "5", "0.5", "10"
7750 static REAL_VALUE_TYPE values_fp[8];
7753 init_fp_table (void)
7759 fp_consts_inited = 1;
7761 fp_consts_inited = 8;
7763 for (i = 0; i < fp_consts_inited; i++)
7765 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7770 /* Return TRUE if rtx X is a valid immediate FP constant. */
7772 arm_const_double_rtx (rtx x)
7777 if (!fp_consts_inited)
7780 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7781 if (REAL_VALUE_MINUS_ZERO (r))
7784 for (i = 0; i < fp_consts_inited; i++)
7785 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7791 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7793 neg_const_double_rtx_ok_for_fpa (rtx x)
7798 if (!fp_consts_inited)
7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7802 r = real_value_negate (&r);
7803 if (REAL_VALUE_MINUS_ZERO (r))
7806 for (i = 0; i < 8; i++)
7807 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7814 /* VFPv3 has a fairly wide range of representable immediates, formed from
7815 "quarter-precision" floating-point values. These can be evaluated using this
7816 formula (with ^ for exponentiation):
7820 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7821 16 <= n <= 31 and 0 <= r <= 7.
7823 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7825 - A (most-significant) is the sign bit.
7826 - BCD are the exponent (encoded as r XOR 3).
7827 - EFGH are the mantissa (encoded as n - 16).
7830 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7831 fconst[sd] instruction, or -1 if X isn't suitable. */
7833 vfp3_const_double_index (rtx x)
7835 REAL_VALUE_TYPE r, m;
7837 unsigned HOST_WIDE_INT mantissa, mant_hi;
7838 unsigned HOST_WIDE_INT mask;
7839 HOST_WIDE_INT m1, m2;
7840 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7842 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7845 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7847 /* We can't represent these things, so detect them first. */
7848 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7851 /* Extract sign, exponent and mantissa. */
7852 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7853 r = real_value_abs (&r);
7854 exponent = REAL_EXP (&r);
7855 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7856 highest (sign) bit, with a fixed binary point at bit point_pos.
7857 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7858 bits for the mantissa, this may fail (low bits would be lost). */
7859 real_ldexp (&m, &r, point_pos - exponent);
7860 REAL_VALUE_TO_INT (&m1, &m2, m);
7864 /* If there are bits set in the low part of the mantissa, we can't
7865 represent this value. */
7869 /* Now make it so that mantissa contains the most-significant bits, and move
7870 the point_pos to indicate that the least-significant bits have been
7872 point_pos -= HOST_BITS_PER_WIDE_INT;
7875 /* We can permit four significant bits of mantissa only, plus a high bit
7876 which is always 1. */
7877 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7878 if ((mantissa & mask) != 0)
7881 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7882 mantissa >>= point_pos - 5;
7884 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7885 floating-point immediate zero with Neon using an integer-zero load, but
7886 that case is handled elsewhere.) */
7890 gcc_assert (mantissa >= 16 && mantissa <= 31);
7892 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7893 normalized significands are in the range [1, 2). (Our mantissa is shifted
7894 left 4 places at this point relative to normalized IEEE754 values). GCC
7895 internally uses [0.5, 1) (see real.c), so the exponent returned from
7896 REAL_EXP must be altered. */
7897 exponent = 5 - exponent;
7899 if (exponent < 0 || exponent > 7)
7902 /* Sign, mantissa and exponent are now in the correct form to plug into the
7903 formula described in the comment above. */
7904 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7907 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7909 vfp3_const_double_rtx (rtx x)
7914 return vfp3_const_double_index (x) != -1;
7917 /* Recognize immediates which can be used in various Neon instructions. Legal
7918 immediates are described by the following table (for VMVN variants, the
7919 bitwise inverse of the constant shown is recognized. In either case, VMOV
7920 is output and the correct instruction to use for a given constant is chosen
7921 by the assembler). The constant shown is replicated across all elements of
7922 the destination vector.
7924 insn elems variant constant (binary)
7925 ---- ----- ------- -----------------
7926 vmov i32 0 00000000 00000000 00000000 abcdefgh
7927 vmov i32 1 00000000 00000000 abcdefgh 00000000
7928 vmov i32 2 00000000 abcdefgh 00000000 00000000
7929 vmov i32 3 abcdefgh 00000000 00000000 00000000
7930 vmov i16 4 00000000 abcdefgh
7931 vmov i16 5 abcdefgh 00000000
7932 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7933 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7934 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7935 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7936 vmvn i16 10 00000000 abcdefgh
7937 vmvn i16 11 abcdefgh 00000000
7938 vmov i32 12 00000000 00000000 abcdefgh 11111111
7939 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7940 vmov i32 14 00000000 abcdefgh 11111111 11111111
7941 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7943 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7944 eeeeeeee ffffffff gggggggg hhhhhhhh
7945 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7947 For case 18, B = !b. Representable values are exactly those accepted by
7948 vfp3_const_double_index, but are output as floating-point numbers rather
7951 Variants 0-5 (inclusive) may also be used as immediates for the second
7952 operand of VORR/VBIC instructions.
7954 The INVERSE argument causes the bitwise inverse of the given operand to be
7955 recognized instead (used for recognizing legal immediates for the VAND/VORN
7956 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7957 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7958 output, rather than the real insns vbic/vorr).
7960 INVERSE makes no difference to the recognition of float vectors.
7962 The return value is the variant of immediate as shown in the above table, or
7963 -1 if the given value doesn't match any of the listed patterns.
7966 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7967 rtx *modconst, int *elementwidth)
7969 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7971 for (i = 0; i < idx; i += (STRIDE)) \
7976 immtype = (CLASS); \
7977 elsize = (ELSIZE); \
7981 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7982 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7983 unsigned char bytes[16];
7984 int immtype = -1, matches;
7985 unsigned int invmask = inverse ? 0xff : 0;
7987 /* Vectors of float constants. */
7988 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7990 rtx el0 = CONST_VECTOR_ELT (op, 0);
7993 if (!vfp3_const_double_rtx (el0))
7996 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7998 for (i = 1; i < n_elts; i++)
8000 rtx elt = CONST_VECTOR_ELT (op, i);
8003 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8005 if (!REAL_VALUES_EQUAL (r0, re))
8010 *modconst = CONST_VECTOR_ELT (op, 0);
8018 /* Splat vector constant out into a byte vector. */
8019 for (i = 0; i < n_elts; i++)
8021 rtx el = CONST_VECTOR_ELT (op, i);
8022 unsigned HOST_WIDE_INT elpart;
8023 unsigned int part, parts;
8025 if (GET_CODE (el) == CONST_INT)
8027 elpart = INTVAL (el);
8030 else if (GET_CODE (el) == CONST_DOUBLE)
8032 elpart = CONST_DOUBLE_LOW (el);
8038 for (part = 0; part < parts; part++)
8041 for (byte = 0; byte < innersize; byte++)
8043 bytes[idx++] = (elpart & 0xff) ^ invmask;
8044 elpart >>= BITS_PER_UNIT;
8046 if (GET_CODE (el) == CONST_DOUBLE)
8047 elpart = CONST_DOUBLE_HIGH (el);
8052 gcc_assert (idx == GET_MODE_SIZE (mode));
8056 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8057 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8059 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8062 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8063 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8065 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8066 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8068 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8070 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8072 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8073 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8075 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8076 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8078 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8079 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8081 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8082 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8084 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8086 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8088 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8089 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8091 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8092 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8094 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8095 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8097 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8098 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8100 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8102 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8103 && bytes[i] == bytes[(i + 8) % idx]);
8111 *elementwidth = elsize;
8115 unsigned HOST_WIDE_INT imm = 0;
8117 /* Un-invert bytes of recognized vector, if necessary. */
8119 for (i = 0; i < idx; i++)
8120 bytes[i] ^= invmask;
8124 /* FIXME: Broken on 32-bit H_W_I hosts. */
8125 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8127 for (i = 0; i < 8; i++)
8128 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8129 << (i * BITS_PER_UNIT);
8131 *modconst = GEN_INT (imm);
8135 unsigned HOST_WIDE_INT imm = 0;
8137 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8138 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8140 *modconst = GEN_INT (imm);
8148 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8149 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8150 float elements), and a modified constant (whatever should be output for a
8151 VMOV) in *MODCONST. */
8154 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8155 rtx *modconst, int *elementwidth)
8159 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8165 *modconst = tmpconst;
8168 *elementwidth = tmpwidth;
8173 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8174 the immediate is valid, write a constant suitable for using as an operand
8175 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8176 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8179 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8180 rtx *modconst, int *elementwidth)
8184 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8186 if (retval < 0 || retval > 5)
8190 *modconst = tmpconst;
8193 *elementwidth = tmpwidth;
8198 /* Return a string suitable for output of Neon immediate logic operation
8202 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8203 int inverse, int quad)
8205 int width, is_valid;
8206 static char templ[40];
8208 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8210 gcc_assert (is_valid != 0);
8213 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8215 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8220 /* Output a sequence of pairwise operations to implement a reduction.
8221 NOTE: We do "too much work" here, because pairwise operations work on two
8222 registers-worth of operands in one go. Unfortunately we can't exploit those
8223 extra calculations to do the full operation in fewer steps, I don't think.
8224 Although all vector elements of the result but the first are ignored, we
8225 actually calculate the same result in each of the elements. An alternative
8226 such as initially loading a vector with zero to use as each of the second
8227 operands would use up an additional register and take an extra instruction,
8228 for no particular gain. */
8231 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8232 rtx (*reduc) (rtx, rtx, rtx))
8234 enum machine_mode inner = GET_MODE_INNER (mode);
8235 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8238 for (i = parts / 2; i >= 1; i /= 2)
8240 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8241 emit_insn (reduc (dest, tmpsum, tmpsum));
8246 /* If VALS is a vector constant that can be loaded into a register
8247 using VDUP, generate instructions to do so and return an RTX to
8248 assign to the register. Otherwise return NULL_RTX. */
8251 neon_vdup_constant (rtx vals)
8253 enum machine_mode mode = GET_MODE (vals);
8254 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8255 int n_elts = GET_MODE_NUNITS (mode);
8256 bool all_same = true;
8260 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8263 for (i = 0; i < n_elts; ++i)
8265 x = XVECEXP (vals, 0, i);
8266 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8271 /* The elements are not all the same. We could handle repeating
8272 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8273 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8277 /* We can load this constant by using VDUP and a constant in a
8278 single ARM register. This will be cheaper than a vector
8281 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8282 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8286 /* Generate code to load VALS, which is a PARALLEL containing only
8287 constants (for vec_init) or CONST_VECTOR, efficiently into a
8288 register. Returns an RTX to copy into the register, or NULL_RTX
8289 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8292 neon_make_constant (rtx vals)
8294 enum machine_mode mode = GET_MODE (vals);
8296 rtx const_vec = NULL_RTX;
8297 int n_elts = GET_MODE_NUNITS (mode);
8301 if (GET_CODE (vals) == CONST_VECTOR)
8303 else if (GET_CODE (vals) == PARALLEL)
8305 /* A CONST_VECTOR must contain only CONST_INTs and
8306 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8307 Only store valid constants in a CONST_VECTOR. */
8308 for (i = 0; i < n_elts; ++i)
8310 rtx x = XVECEXP (vals, 0, i);
8311 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8314 if (n_const == n_elts)
8315 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8320 if (const_vec != NULL
8321 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8322 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8324 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8325 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8326 pipeline cycle; creating the constant takes one or two ARM
8329 else if (const_vec != NULL_RTX)
8330 /* Load from constant pool. On Cortex-A8 this takes two cycles
8331 (for either double or quad vectors). We can not take advantage
8332 of single-cycle VLD1 because we need a PC-relative addressing
8336 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8337 We can not construct an initializer. */
8341 /* Initialize vector TARGET to VALS. */
8344 neon_expand_vector_init (rtx target, rtx vals)
8346 enum machine_mode mode = GET_MODE (target);
8347 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8348 int n_elts = GET_MODE_NUNITS (mode);
8349 int n_var = 0, one_var = -1;
8350 bool all_same = true;
8354 for (i = 0; i < n_elts; ++i)
8356 x = XVECEXP (vals, 0, i);
8357 if (!CONSTANT_P (x))
8358 ++n_var, one_var = i;
8360 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8366 rtx constant = neon_make_constant (vals);
8367 if (constant != NULL_RTX)
8369 emit_move_insn (target, constant);
8374 /* Splat a single non-constant element if we can. */
8375 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8377 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8378 emit_insn (gen_rtx_SET (VOIDmode, target,
8379 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8384 /* One field is non-constant. Load constant then overwrite varying
8385 field. This is more efficient than using the stack. */
8388 rtx copy = copy_rtx (vals);
8391 /* Load constant part of vector, substitute neighboring value for
8393 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8394 neon_expand_vector_init (target, copy);
8396 /* Insert variable. */
8397 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8398 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8399 emit_insn (gen_rtx_SET (VOIDmode, target,
8400 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8404 /* Construct the vector in memory one field at a time
8405 and load the whole vector. */
8406 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8407 for (i = 0; i < n_elts; i++)
8408 emit_move_insn (adjust_address_nv (mem, inner_mode,
8409 i * GET_MODE_SIZE (inner_mode)),
8410 XVECEXP (vals, 0, i));
8411 emit_move_insn (target, mem);
8414 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8415 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8416 reported source locations are bogus. */
8419 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8424 gcc_assert (GET_CODE (operand) == CONST_INT);
8426 lane = INTVAL (operand);
8428 if (lane < low || lane >= high)
8432 /* Bounds-check lanes. */
8435 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8437 bounds_check (operand, low, high, "lane out of range");
8440 /* Bounds-check constants. */
8443 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8445 bounds_check (operand, low, high, "constant out of range");
8449 neon_element_bits (enum machine_mode mode)
8452 return GET_MODE_BITSIZE (mode);
8454 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8458 /* Predicates for `match_operand' and `match_operator'. */
8460 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8462 cirrus_memory_offset (rtx op)
8464 /* Reject eliminable registers. */
8465 if (! (reload_in_progress || reload_completed)
8466 && ( reg_mentioned_p (frame_pointer_rtx, op)
8467 || reg_mentioned_p (arg_pointer_rtx, op)
8468 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8469 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8470 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8471 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8474 if (GET_CODE (op) == MEM)
8480 /* Match: (mem (reg)). */
8481 if (GET_CODE (ind) == REG)
8487 if (GET_CODE (ind) == PLUS
8488 && GET_CODE (XEXP (ind, 0)) == REG
8489 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8490 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8497 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8498 WB is true if full writeback address modes are allowed and is false
8499 if limited writeback address modes (POST_INC and PRE_DEC) are
8503 arm_coproc_mem_operand (rtx op, bool wb)
8507 /* Reject eliminable registers. */
8508 if (! (reload_in_progress || reload_completed)
8509 && ( reg_mentioned_p (frame_pointer_rtx, op)
8510 || reg_mentioned_p (arg_pointer_rtx, op)
8511 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8512 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8513 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8514 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8517 /* Constants are converted into offsets from labels. */
8518 if (GET_CODE (op) != MEM)
8523 if (reload_completed
8524 && (GET_CODE (ind) == LABEL_REF
8525 || (GET_CODE (ind) == CONST
8526 && GET_CODE (XEXP (ind, 0)) == PLUS
8527 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8528 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8531 /* Match: (mem (reg)). */
8532 if (GET_CODE (ind) == REG)
8533 return arm_address_register_rtx_p (ind, 0);
8535 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8536 acceptable in any case (subject to verification by
8537 arm_address_register_rtx_p). We need WB to be true to accept
8538 PRE_INC and POST_DEC. */
8539 if (GET_CODE (ind) == POST_INC
8540 || GET_CODE (ind) == PRE_DEC
8542 && (GET_CODE (ind) == PRE_INC
8543 || GET_CODE (ind) == POST_DEC)))
8544 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8547 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8548 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8549 && GET_CODE (XEXP (ind, 1)) == PLUS
8550 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8551 ind = XEXP (ind, 1);
8556 if (GET_CODE (ind) == PLUS
8557 && GET_CODE (XEXP (ind, 0)) == REG
8558 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8559 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8560 && INTVAL (XEXP (ind, 1)) > -1024
8561 && INTVAL (XEXP (ind, 1)) < 1024
8562 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8568 /* Return TRUE if OP is a memory operand which we can load or store a vector
8569 to/from. TYPE is one of the following values:
8570 0 - Vector load/stor (vldr)
8571 1 - Core registers (ldm)
8572 2 - Element/structure loads (vld1)
8575 neon_vector_mem_operand (rtx op, int type)
8579 /* Reject eliminable registers. */
8580 if (! (reload_in_progress || reload_completed)
8581 && ( reg_mentioned_p (frame_pointer_rtx, op)
8582 || reg_mentioned_p (arg_pointer_rtx, op)
8583 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8584 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8585 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8586 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8589 /* Constants are converted into offsets from labels. */
8590 if (GET_CODE (op) != MEM)
8595 if (reload_completed
8596 && (GET_CODE (ind) == LABEL_REF
8597 || (GET_CODE (ind) == CONST
8598 && GET_CODE (XEXP (ind, 0)) == PLUS
8599 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8600 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8603 /* Match: (mem (reg)). */
8604 if (GET_CODE (ind) == REG)
8605 return arm_address_register_rtx_p (ind, 0);
8607 /* Allow post-increment with Neon registers. */
8608 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8609 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8611 /* FIXME: vld1 allows register post-modify. */
8617 && GET_CODE (ind) == PLUS
8618 && GET_CODE (XEXP (ind, 0)) == REG
8619 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8620 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8621 && INTVAL (XEXP (ind, 1)) > -1024
8622 && INTVAL (XEXP (ind, 1)) < 1016
8623 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8629 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8632 neon_struct_mem_operand (rtx op)
8636 /* Reject eliminable registers. */
8637 if (! (reload_in_progress || reload_completed)
8638 && ( reg_mentioned_p (frame_pointer_rtx, op)
8639 || reg_mentioned_p (arg_pointer_rtx, op)
8640 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8641 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8642 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8643 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8646 /* Constants are converted into offsets from labels. */
8647 if (GET_CODE (op) != MEM)
8652 if (reload_completed
8653 && (GET_CODE (ind) == LABEL_REF
8654 || (GET_CODE (ind) == CONST
8655 && GET_CODE (XEXP (ind, 0)) == PLUS
8656 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8657 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8660 /* Match: (mem (reg)). */
8661 if (GET_CODE (ind) == REG)
8662 return arm_address_register_rtx_p (ind, 0);
8667 /* Return true if X is a register that will be eliminated later on. */
8669 arm_eliminable_register (rtx x)
8671 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8672 || REGNO (x) == ARG_POINTER_REGNUM
8673 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8674 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8677 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8678 coprocessor registers. Otherwise return NO_REGS. */
8681 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8685 if (!TARGET_NEON_FP16)
8686 return GENERAL_REGS;
8687 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8689 return GENERAL_REGS;
8693 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8694 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8695 && neon_vector_mem_operand (x, 0))
8698 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8701 return GENERAL_REGS;
8704 /* Values which must be returned in the most-significant end of the return
8708 arm_return_in_msb (const_tree valtype)
8710 return (TARGET_AAPCS_BASED
8712 && (AGGREGATE_TYPE_P (valtype)
8713 || TREE_CODE (valtype) == COMPLEX_TYPE));
8716 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8717 Use by the Cirrus Maverick code which has to workaround
8718 a hardware bug triggered by such instructions. */
8720 arm_memory_load_p (rtx insn)
8722 rtx body, lhs, rhs;;
8724 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8727 body = PATTERN (insn);
8729 if (GET_CODE (body) != SET)
8732 lhs = XEXP (body, 0);
8733 rhs = XEXP (body, 1);
8735 lhs = REG_OR_SUBREG_RTX (lhs);
8737 /* If the destination is not a general purpose
8738 register we do not have to worry. */
8739 if (GET_CODE (lhs) != REG
8740 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8743 /* As well as loads from memory we also have to react
8744 to loads of invalid constants which will be turned
8745 into loads from the minipool. */
8746 return (GET_CODE (rhs) == MEM
8747 || GET_CODE (rhs) == SYMBOL_REF
8748 || note_invalid_constants (insn, -1, false));
8751 /* Return TRUE if INSN is a Cirrus instruction. */
8753 arm_cirrus_insn_p (rtx insn)
8755 enum attr_cirrus attr;
8757 /* get_attr cannot accept USE or CLOBBER. */
8759 || GET_CODE (insn) != INSN
8760 || GET_CODE (PATTERN (insn)) == USE
8761 || GET_CODE (PATTERN (insn)) == CLOBBER)
8764 attr = get_attr_cirrus (insn);
8766 return attr != CIRRUS_NOT;
8769 /* Cirrus reorg for invalid instruction combinations. */
8771 cirrus_reorg (rtx first)
8773 enum attr_cirrus attr;
8774 rtx body = PATTERN (first);
8778 /* Any branch must be followed by 2 non Cirrus instructions. */
8779 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8782 t = next_nonnote_insn (first);
8784 if (arm_cirrus_insn_p (t))
8787 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8791 emit_insn_after (gen_nop (), first);
8796 /* (float (blah)) is in parallel with a clobber. */
8797 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8798 body = XVECEXP (body, 0, 0);
8800 if (GET_CODE (body) == SET)
8802 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8804 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8805 be followed by a non Cirrus insn. */
8806 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8808 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8809 emit_insn_after (gen_nop (), first);
8813 else if (arm_memory_load_p (first))
8815 unsigned int arm_regno;
8817 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8818 ldr/cfmv64hr combination where the Rd field is the same
8819 in both instructions must be split with a non Cirrus
8826 /* Get Arm register number for ldr insn. */
8827 if (GET_CODE (lhs) == REG)
8828 arm_regno = REGNO (lhs);
8831 gcc_assert (GET_CODE (rhs) == REG);
8832 arm_regno = REGNO (rhs);
8836 first = next_nonnote_insn (first);
8838 if (! arm_cirrus_insn_p (first))
8841 body = PATTERN (first);
8843 /* (float (blah)) is in parallel with a clobber. */
8844 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8845 body = XVECEXP (body, 0, 0);
8847 if (GET_CODE (body) == FLOAT)
8848 body = XEXP (body, 0);
8850 if (get_attr_cirrus (first) == CIRRUS_MOVE
8851 && GET_CODE (XEXP (body, 1)) == REG
8852 && arm_regno == REGNO (XEXP (body, 1)))
8853 emit_insn_after (gen_nop (), first);
8859 /* get_attr cannot accept USE or CLOBBER. */
8861 || GET_CODE (first) != INSN
8862 || GET_CODE (PATTERN (first)) == USE
8863 || GET_CODE (PATTERN (first)) == CLOBBER)
8866 attr = get_attr_cirrus (first);
8868 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8869 must be followed by a non-coprocessor instruction. */
8870 if (attr == CIRRUS_COMPARE)
8874 t = next_nonnote_insn (first);
8876 if (arm_cirrus_insn_p (t))
8879 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8883 emit_insn_after (gen_nop (), first);
8889 /* Return TRUE if X references a SYMBOL_REF. */
8891 symbol_mentioned_p (rtx x)
8896 if (GET_CODE (x) == SYMBOL_REF)
8899 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8900 are constant offsets, not symbols. */
8901 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8904 fmt = GET_RTX_FORMAT (GET_CODE (x));
8906 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8912 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8913 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8916 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8923 /* Return TRUE if X references a LABEL_REF. */
8925 label_mentioned_p (rtx x)
8930 if (GET_CODE (x) == LABEL_REF)
8933 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8934 instruction, but they are constant offsets, not symbols. */
8935 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8938 fmt = GET_RTX_FORMAT (GET_CODE (x));
8939 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8945 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8946 if (label_mentioned_p (XVECEXP (x, i, j)))
8949 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8957 tls_mentioned_p (rtx x)
8959 switch (GET_CODE (x))
8962 return tls_mentioned_p (XEXP (x, 0));
8965 if (XINT (x, 1) == UNSPEC_TLS)
8973 /* Must not copy any rtx that uses a pc-relative address. */
8976 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8978 if (GET_CODE (*x) == UNSPEC
8979 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8985 arm_cannot_copy_insn_p (rtx insn)
8987 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8993 enum rtx_code code = GET_CODE (x);
9010 /* Return 1 if memory locations are adjacent. */
9012 adjacent_mem_locations (rtx a, rtx b)
9014 /* We don't guarantee to preserve the order of these memory refs. */
9015 if (volatile_refs_p (a) || volatile_refs_p (b))
9018 if ((GET_CODE (XEXP (a, 0)) == REG
9019 || (GET_CODE (XEXP (a, 0)) == PLUS
9020 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9021 && (GET_CODE (XEXP (b, 0)) == REG
9022 || (GET_CODE (XEXP (b, 0)) == PLUS
9023 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9025 HOST_WIDE_INT val0 = 0, val1 = 0;
9029 if (GET_CODE (XEXP (a, 0)) == PLUS)
9031 reg0 = XEXP (XEXP (a, 0), 0);
9032 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9037 if (GET_CODE (XEXP (b, 0)) == PLUS)
9039 reg1 = XEXP (XEXP (b, 0), 0);
9040 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9045 /* Don't accept any offset that will require multiple
9046 instructions to handle, since this would cause the
9047 arith_adjacentmem pattern to output an overlong sequence. */
9048 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9051 /* Don't allow an eliminable register: register elimination can make
9052 the offset too large. */
9053 if (arm_eliminable_register (reg0))
9056 val_diff = val1 - val0;
9060 /* If the target has load delay slots, then there's no benefit
9061 to using an ldm instruction unless the offset is zero and
9062 we are optimizing for size. */
9063 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9064 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9065 && (val_diff == 4 || val_diff == -4));
9068 return ((REGNO (reg0) == REGNO (reg1))
9069 && (val_diff == 4 || val_diff == -4));
9075 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9076 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9077 instruction. ADD_OFFSET is nonzero if the base address register needs
9078 to be modified with an add instruction before we can use it. */
9081 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9082 int nops, HOST_WIDE_INT add_offset)
9084 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9085 if the offset isn't small enough. The reason 2 ldrs are faster
9086 is because these ARMs are able to do more than one cache access
9087 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9088 whilst the ARM8 has a double bandwidth cache. This means that
9089 these cores can do both an instruction fetch and a data fetch in
9090 a single cycle, so the trick of calculating the address into a
9091 scratch register (one of the result regs) and then doing a load
9092 multiple actually becomes slower (and no smaller in code size).
9093 That is the transformation
9095 ldr rd1, [rbase + offset]
9096 ldr rd2, [rbase + offset + 4]
9100 add rd1, rbase, offset
9101 ldmia rd1, {rd1, rd2}
9103 produces worse code -- '3 cycles + any stalls on rd2' instead of
9104 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9105 access per cycle, the first sequence could never complete in less
9106 than 6 cycles, whereas the ldm sequence would only take 5 and
9107 would make better use of sequential accesses if not hitting the
9110 We cheat here and test 'arm_ld_sched' which we currently know to
9111 only be true for the ARM8, ARM9 and StrongARM. If this ever
9112 changes, then the test below needs to be reworked. */
9113 if (nops == 2 && arm_ld_sched && add_offset != 0)
9119 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9120 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9121 an array ORDER which describes the sequence to use when accessing the
9122 offsets that produces an ascending order. In this sequence, each
9123 offset must be larger by exactly 4 than the previous one. ORDER[0]
9124 must have been filled in with the lowest offset by the caller.
9125 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9126 we use to verify that ORDER produces an ascending order of registers.
9127 Return true if it was possible to construct such an order, false if
9131 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9135 for (i = 1; i < nops; i++)
9139 order[i] = order[i - 1];
9140 for (j = 0; j < nops; j++)
9141 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9143 /* We must find exactly one offset that is higher than the
9144 previous one by 4. */
9145 if (order[i] != order[i - 1])
9149 if (order[i] == order[i - 1])
9151 /* The register numbers must be ascending. */
9152 if (unsorted_regs != NULL
9153 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9160 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9161 HOST_WIDE_INT *load_offset)
9163 int unsorted_regs[MAX_LDM_STM_OPS];
9164 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9165 int order[MAX_LDM_STM_OPS];
9169 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9170 easily extended if required. */
9171 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9173 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9175 /* Loop over the operands and check that the memory references are
9176 suitable (i.e. immediate offsets from the same base register). At
9177 the same time, extract the target register, and the memory
9179 for (i = 0; i < nops; i++)
9184 /* Convert a subreg of a mem into the mem itself. */
9185 if (GET_CODE (operands[nops + i]) == SUBREG)
9186 operands[nops + i] = alter_subreg (operands + (nops + i));
9188 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9190 /* Don't reorder volatile memory references; it doesn't seem worth
9191 looking for the case where the order is ok anyway. */
9192 if (MEM_VOLATILE_P (operands[nops + i]))
9195 offset = const0_rtx;
9197 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9198 || (GET_CODE (reg) == SUBREG
9199 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9200 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9201 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9203 || (GET_CODE (reg) == SUBREG
9204 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9205 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9209 base_reg = REGNO (reg);
9212 if (base_reg != (int) REGNO (reg))
9213 /* Not addressed from the same base register. */
9216 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9217 ? REGNO (operands[i])
9218 : REGNO (SUBREG_REG (operands[i])));
9220 /* If it isn't an integer register, or if it overwrites the
9221 base register but isn't the last insn in the list, then
9222 we can't do this. */
9223 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9224 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9227 unsorted_offsets[i] = INTVAL (offset);
9228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9232 /* Not a suitable memory address. */
9236 /* All the useful information has now been extracted from the
9237 operands into unsorted_regs and unsorted_offsets; additionally,
9238 order[0] has been set to the lowest offset in the list. Sort
9239 the offsets into order, verifying that they are adjacent, and
9240 check that the register numbers are ascending. */
9241 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9248 for (i = 0; i < nops; i++)
9249 regs[i] = unsorted_regs[order[i]];
9251 *load_offset = unsorted_offsets[order[0]];
9254 if (unsorted_offsets[order[0]] == 0)
9255 ldm_case = 1; /* ldmia */
9256 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9257 ldm_case = 2; /* ldmib */
9258 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9259 ldm_case = 3; /* ldmda */
9260 else if (unsorted_offsets[order[nops - 1]] == -4)
9261 ldm_case = 4; /* ldmdb */
9262 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9263 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9268 if (!multiple_operation_profitable_p (false, nops,
9270 ? unsorted_offsets[order[0]] : 0))
9277 emit_ldm_seq (rtx *operands, int nops)
9279 int regs[MAX_LDM_STM_OPS];
9281 HOST_WIDE_INT offset;
9285 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9288 strcpy (buf, "ldm%(ia%)\t");
9292 strcpy (buf, "ldm%(ib%)\t");
9296 strcpy (buf, "ldm%(da%)\t");
9300 strcpy (buf, "ldm%(db%)\t");
9305 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9306 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9309 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9310 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9312 output_asm_insn (buf, operands);
9314 strcpy (buf, "ldm%(ia%)\t");
9321 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9322 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9324 for (i = 1; i < nops; i++)
9325 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9326 reg_names[regs[i]]);
9328 strcat (buf, "}\t%@ phole ldm");
9330 output_asm_insn (buf, operands);
9335 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9336 HOST_WIDE_INT * load_offset)
9338 int unsorted_regs[MAX_LDM_STM_OPS];
9339 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9340 int order[MAX_LDM_STM_OPS];
9344 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9345 easily extended if required. */
9346 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9348 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9350 /* Loop over the operands and check that the memory references are
9351 suitable (i.e. immediate offsets from the same base register). At
9352 the same time, extract the target register, and the memory
9354 for (i = 0; i < nops; i++)
9359 /* Convert a subreg of a mem into the mem itself. */
9360 if (GET_CODE (operands[nops + i]) == SUBREG)
9361 operands[nops + i] = alter_subreg (operands + (nops + i));
9363 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9365 /* Don't reorder volatile memory references; it doesn't seem worth
9366 looking for the case where the order is ok anyway. */
9367 if (MEM_VOLATILE_P (operands[nops + i]))
9370 offset = const0_rtx;
9372 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9373 || (GET_CODE (reg) == SUBREG
9374 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9375 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9376 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9378 || (GET_CODE (reg) == SUBREG
9379 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9380 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9383 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9384 ? REGNO (operands[i])
9385 : REGNO (SUBREG_REG (operands[i])));
9387 base_reg = REGNO (reg);
9388 else if (base_reg != (int) REGNO (reg))
9389 /* Not addressed from the same base register. */
9392 /* If it isn't an integer register, then we can't do this. */
9393 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9396 unsorted_offsets[i] = INTVAL (offset);
9397 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9401 /* Not a suitable memory address. */
9405 /* All the useful information has now been extracted from the
9406 operands into unsorted_regs and unsorted_offsets; additionally,
9407 order[0] has been set to the lowest offset in the list. Sort
9408 the offsets into order, verifying that they are adjacent, and
9409 check that the register numbers are ascending. */
9410 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9417 for (i = 0; i < nops; i++)
9418 regs[i] = unsorted_regs[order[i]];
9420 *load_offset = unsorted_offsets[order[0]];
9423 if (unsorted_offsets[order[0]] == 0)
9424 stm_case = 1; /* stmia */
9425 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9426 stm_case = 2; /* stmib */
9427 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9428 stm_case = 3; /* stmda */
9429 else if (unsorted_offsets[order[nops - 1]] == -4)
9430 stm_case = 4; /* stmdb */
9434 if (!multiple_operation_profitable_p (false, nops, 0))
9441 emit_stm_seq (rtx *operands, int nops)
9443 int regs[MAX_LDM_STM_OPS];
9445 HOST_WIDE_INT offset;
9449 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9452 strcpy (buf, "stm%(ia%)\t");
9456 strcpy (buf, "stm%(ib%)\t");
9460 strcpy (buf, "stm%(da%)\t");
9464 strcpy (buf, "stm%(db%)\t");
9471 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9472 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9474 for (i = 1; i < nops; i++)
9475 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9476 reg_names[regs[i]]);
9478 strcat (buf, "}\t%@ phole stm");
9480 output_asm_insn (buf, operands);
9484 /* Routines for use in generating RTL. */
9487 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9488 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9490 HOST_WIDE_INT offset = *offsetp;
9493 int sign = up ? 1 : -1;
9496 /* XScale has load-store double instructions, but they have stricter
9497 alignment requirements than load-store multiple, so we cannot
9500 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9501 the pipeline until completion.
9509 An ldr instruction takes 1-3 cycles, but does not block the
9518 Best case ldr will always win. However, the more ldr instructions
9519 we issue, the less likely we are to be able to schedule them well.
9520 Using ldr instructions also increases code size.
9522 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9523 for counts of 3 or 4 regs. */
9524 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9530 for (i = 0; i < count; i++)
9532 addr = plus_constant (from, i * 4 * sign);
9533 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9534 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9540 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9550 result = gen_rtx_PARALLEL (VOIDmode,
9551 rtvec_alloc (count + (write_back ? 1 : 0)));
9554 XVECEXP (result, 0, 0)
9555 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9560 for (j = 0; i < count; i++, j++)
9562 addr = plus_constant (from, j * 4 * sign);
9563 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9564 XVECEXP (result, 0, i)
9565 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9576 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9577 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9579 HOST_WIDE_INT offset = *offsetp;
9582 int sign = up ? 1 : -1;
9585 /* See arm_gen_load_multiple for discussion of
9586 the pros/cons of ldm/stm usage for XScale. */
9587 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9593 for (i = 0; i < count; i++)
9595 addr = plus_constant (to, i * 4 * sign);
9596 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9597 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9603 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9613 result = gen_rtx_PARALLEL (VOIDmode,
9614 rtvec_alloc (count + (write_back ? 1 : 0)));
9617 XVECEXP (result, 0, 0)
9618 = gen_rtx_SET (VOIDmode, to,
9619 plus_constant (to, count * 4 * sign));
9624 for (j = 0; i < count; i++, j++)
9626 addr = plus_constant (to, j * 4 * sign);
9627 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9628 XVECEXP (result, 0, i)
9629 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9640 arm_gen_movmemqi (rtx *operands)
9642 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9643 HOST_WIDE_INT srcoffset, dstoffset;
9645 rtx src, dst, srcbase, dstbase;
9646 rtx part_bytes_reg = NULL;
9649 if (GET_CODE (operands[2]) != CONST_INT
9650 || GET_CODE (operands[3]) != CONST_INT
9651 || INTVAL (operands[2]) > 64
9652 || INTVAL (operands[3]) & 3)
9655 dstbase = operands[0];
9656 srcbase = operands[1];
9658 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9659 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9661 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9662 out_words_to_go = INTVAL (operands[2]) / 4;
9663 last_bytes = INTVAL (operands[2]) & 3;
9664 dstoffset = srcoffset = 0;
9666 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9667 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9669 for (i = 0; in_words_to_go >= 2; i+=4)
9671 if (in_words_to_go > 4)
9672 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9673 srcbase, &srcoffset));
9675 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9676 FALSE, srcbase, &srcoffset));
9678 if (out_words_to_go)
9680 if (out_words_to_go > 4)
9681 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9682 dstbase, &dstoffset));
9683 else if (out_words_to_go != 1)
9684 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9688 dstbase, &dstoffset));
9691 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9692 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9693 if (last_bytes != 0)
9695 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9701 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9702 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9705 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9706 if (out_words_to_go)
9710 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9711 sreg = copy_to_reg (mem);
9713 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9714 emit_move_insn (mem, sreg);
9717 gcc_assert (!in_words_to_go); /* Sanity check */
9722 gcc_assert (in_words_to_go > 0);
9724 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9725 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9728 gcc_assert (!last_bytes || part_bytes_reg);
9730 if (BYTES_BIG_ENDIAN && last_bytes)
9732 rtx tmp = gen_reg_rtx (SImode);
9734 /* The bytes we want are in the top end of the word. */
9735 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9736 GEN_INT (8 * (4 - last_bytes))));
9737 part_bytes_reg = tmp;
9741 mem = adjust_automodify_address (dstbase, QImode,
9742 plus_constant (dst, last_bytes - 1),
9743 dstoffset + last_bytes - 1);
9744 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9748 tmp = gen_reg_rtx (SImode);
9749 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9750 part_bytes_reg = tmp;
9759 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9760 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9764 rtx tmp = gen_reg_rtx (SImode);
9765 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9766 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9767 part_bytes_reg = tmp;
9774 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9775 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9782 /* Select a dominance comparison mode if possible for a test of the general
9783 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9784 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9785 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9786 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9787 In all cases OP will be either EQ or NE, but we don't need to know which
9788 here. If we are unable to support a dominance comparison we return
9789 CC mode. This will then fail to match for the RTL expressions that
9790 generate this call. */
9792 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9794 enum rtx_code cond1, cond2;
9797 /* Currently we will probably get the wrong result if the individual
9798 comparisons are not simple. This also ensures that it is safe to
9799 reverse a comparison if necessary. */
9800 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9802 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9806 /* The if_then_else variant of this tests the second condition if the
9807 first passes, but is true if the first fails. Reverse the first
9808 condition to get a true "inclusive-or" expression. */
9809 if (cond_or == DOM_CC_NX_OR_Y)
9810 cond1 = reverse_condition (cond1);
9812 /* If the comparisons are not equal, and one doesn't dominate the other,
9813 then we can't do this. */
9815 && !comparison_dominates_p (cond1, cond2)
9816 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9821 enum rtx_code temp = cond1;
9829 if (cond_or == DOM_CC_X_AND_Y)
9834 case EQ: return CC_DEQmode;
9835 case LE: return CC_DLEmode;
9836 case LEU: return CC_DLEUmode;
9837 case GE: return CC_DGEmode;
9838 case GEU: return CC_DGEUmode;
9839 default: gcc_unreachable ();
9843 if (cond_or == DOM_CC_X_AND_Y)
9859 if (cond_or == DOM_CC_X_AND_Y)
9875 if (cond_or == DOM_CC_X_AND_Y)
9891 if (cond_or == DOM_CC_X_AND_Y)
9906 /* The remaining cases only occur when both comparisons are the
9909 gcc_assert (cond1 == cond2);
9913 gcc_assert (cond1 == cond2);
9917 gcc_assert (cond1 == cond2);
9921 gcc_assert (cond1 == cond2);
9925 gcc_assert (cond1 == cond2);
9934 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9936 /* All floating point compares return CCFP if it is an equality
9937 comparison, and CCFPE otherwise. */
9938 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9958 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9967 /* A compare with a shifted operand. Because of canonicalization, the
9968 comparison will have to be swapped when we emit the assembler. */
9969 if (GET_MODE (y) == SImode
9970 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9971 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9972 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9973 || GET_CODE (x) == ROTATERT))
9976 /* This operation is performed swapped, but since we only rely on the Z
9977 flag we don't need an additional mode. */
9978 if (GET_MODE (y) == SImode
9979 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9980 && GET_CODE (x) == NEG
9981 && (op == EQ || op == NE))
9984 /* This is a special case that is used by combine to allow a
9985 comparison of a shifted byte load to be split into a zero-extend
9986 followed by a comparison of the shifted integer (only valid for
9987 equalities and unsigned inequalities). */
9988 if (GET_MODE (x) == SImode
9989 && GET_CODE (x) == ASHIFT
9990 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9991 && GET_CODE (XEXP (x, 0)) == SUBREG
9992 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9993 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9994 && (op == EQ || op == NE
9995 || op == GEU || op == GTU || op == LTU || op == LEU)
9996 && GET_CODE (y) == CONST_INT)
9999 /* A construct for a conditional compare, if the false arm contains
10000 0, then both conditions must be true, otherwise either condition
10001 must be true. Not all conditions are possible, so CCmode is
10002 returned if it can't be done. */
10003 if (GET_CODE (x) == IF_THEN_ELSE
10004 && (XEXP (x, 2) == const0_rtx
10005 || XEXP (x, 2) == const1_rtx)
10006 && COMPARISON_P (XEXP (x, 0))
10007 && COMPARISON_P (XEXP (x, 1)))
10008 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10009 INTVAL (XEXP (x, 2)));
10011 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10012 if (GET_CODE (x) == AND
10013 && COMPARISON_P (XEXP (x, 0))
10014 && COMPARISON_P (XEXP (x, 1)))
10015 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10018 if (GET_CODE (x) == IOR
10019 && COMPARISON_P (XEXP (x, 0))
10020 && COMPARISON_P (XEXP (x, 1)))
10021 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10024 /* An operation (on Thumb) where we want to test for a single bit.
10025 This is done by shifting that bit up into the top bit of a
10026 scratch register; we can then branch on the sign bit. */
10028 && GET_MODE (x) == SImode
10029 && (op == EQ || op == NE)
10030 && GET_CODE (x) == ZERO_EXTRACT
10031 && XEXP (x, 1) == const1_rtx)
10034 /* An operation that sets the condition codes as a side-effect, the
10035 V flag is not set correctly, so we can only use comparisons where
10036 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10038 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10039 if (GET_MODE (x) == SImode
10041 && (op == EQ || op == NE || op == LT || op == GE)
10042 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10043 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10044 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10045 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10046 || GET_CODE (x) == LSHIFTRT
10047 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10048 || GET_CODE (x) == ROTATERT
10049 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10050 return CC_NOOVmode;
10052 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10055 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10056 && GET_CODE (x) == PLUS
10057 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10063 /* X and Y are two things to compare using CODE. Emit the compare insn and
10064 return the rtx for register 0 in the proper mode. FP means this is a
10065 floating point compare: I don't think that it is needed on the arm. */
10067 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10069 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
10070 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10072 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10077 /* Generate a sequence of insns that will generate the correct return
10078 address mask depending on the physical architecture that the program
10081 arm_gen_return_addr_mask (void)
10083 rtx reg = gen_reg_rtx (Pmode);
10085 emit_insn (gen_return_addr_mask (reg));
10090 arm_reload_in_hi (rtx *operands)
10092 rtx ref = operands[1];
10094 HOST_WIDE_INT offset = 0;
10096 if (GET_CODE (ref) == SUBREG)
10098 offset = SUBREG_BYTE (ref);
10099 ref = SUBREG_REG (ref);
10102 if (GET_CODE (ref) == REG)
10104 /* We have a pseudo which has been spilt onto the stack; there
10105 are two cases here: the first where there is a simple
10106 stack-slot replacement and a second where the stack-slot is
10107 out of range, or is used as a subreg. */
10108 if (reg_equiv_mem[REGNO (ref)])
10110 ref = reg_equiv_mem[REGNO (ref)];
10111 base = find_replacement (&XEXP (ref, 0));
10114 /* The slot is out of range, or was dressed up in a SUBREG. */
10115 base = reg_equiv_address[REGNO (ref)];
10118 base = find_replacement (&XEXP (ref, 0));
10120 /* Handle the case where the address is too complex to be offset by 1. */
10121 if (GET_CODE (base) == MINUS
10122 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10124 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10126 emit_set_insn (base_plus, base);
10129 else if (GET_CODE (base) == PLUS)
10131 /* The addend must be CONST_INT, or we would have dealt with it above. */
10132 HOST_WIDE_INT hi, lo;
10134 offset += INTVAL (XEXP (base, 1));
10135 base = XEXP (base, 0);
10137 /* Rework the address into a legal sequence of insns. */
10138 /* Valid range for lo is -4095 -> 4095 */
10141 : -((-offset) & 0xfff));
10143 /* Corner case, if lo is the max offset then we would be out of range
10144 once we have added the additional 1 below, so bump the msb into the
10145 pre-loading insn(s). */
10149 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10150 ^ (HOST_WIDE_INT) 0x80000000)
10151 - (HOST_WIDE_INT) 0x80000000);
10153 gcc_assert (hi + lo == offset);
10157 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10159 /* Get the base address; addsi3 knows how to handle constants
10160 that require more than one insn. */
10161 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10167 /* Operands[2] may overlap operands[0] (though it won't overlap
10168 operands[1]), that's why we asked for a DImode reg -- so we can
10169 use the bit that does not overlap. */
10170 if (REGNO (operands[2]) == REGNO (operands[0]))
10171 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10173 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10175 emit_insn (gen_zero_extendqisi2 (scratch,
10176 gen_rtx_MEM (QImode,
10177 plus_constant (base,
10179 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10180 gen_rtx_MEM (QImode,
10181 plus_constant (base,
10183 if (!BYTES_BIG_ENDIAN)
10184 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10185 gen_rtx_IOR (SImode,
10188 gen_rtx_SUBREG (SImode, operands[0], 0),
10192 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10193 gen_rtx_IOR (SImode,
10194 gen_rtx_ASHIFT (SImode, scratch,
10196 gen_rtx_SUBREG (SImode, operands[0], 0)));
10199 /* Handle storing a half-word to memory during reload by synthesizing as two
10200 byte stores. Take care not to clobber the input values until after we
10201 have moved them somewhere safe. This code assumes that if the DImode
10202 scratch in operands[2] overlaps either the input value or output address
10203 in some way, then that value must die in this insn (we absolutely need
10204 two scratch registers for some corner cases). */
10206 arm_reload_out_hi (rtx *operands)
10208 rtx ref = operands[0];
10209 rtx outval = operands[1];
10211 HOST_WIDE_INT offset = 0;
10213 if (GET_CODE (ref) == SUBREG)
10215 offset = SUBREG_BYTE (ref);
10216 ref = SUBREG_REG (ref);
10219 if (GET_CODE (ref) == REG)
10221 /* We have a pseudo which has been spilt onto the stack; there
10222 are two cases here: the first where there is a simple
10223 stack-slot replacement and a second where the stack-slot is
10224 out of range, or is used as a subreg. */
10225 if (reg_equiv_mem[REGNO (ref)])
10227 ref = reg_equiv_mem[REGNO (ref)];
10228 base = find_replacement (&XEXP (ref, 0));
10231 /* The slot is out of range, or was dressed up in a SUBREG. */
10232 base = reg_equiv_address[REGNO (ref)];
10235 base = find_replacement (&XEXP (ref, 0));
10237 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10239 /* Handle the case where the address is too complex to be offset by 1. */
10240 if (GET_CODE (base) == MINUS
10241 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10243 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10245 /* Be careful not to destroy OUTVAL. */
10246 if (reg_overlap_mentioned_p (base_plus, outval))
10248 /* Updating base_plus might destroy outval, see if we can
10249 swap the scratch and base_plus. */
10250 if (!reg_overlap_mentioned_p (scratch, outval))
10253 scratch = base_plus;
10258 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10260 /* Be conservative and copy OUTVAL into the scratch now,
10261 this should only be necessary if outval is a subreg
10262 of something larger than a word. */
10263 /* XXX Might this clobber base? I can't see how it can,
10264 since scratch is known to overlap with OUTVAL, and
10265 must be wider than a word. */
10266 emit_insn (gen_movhi (scratch_hi, outval));
10267 outval = scratch_hi;
10271 emit_set_insn (base_plus, base);
10274 else if (GET_CODE (base) == PLUS)
10276 /* The addend must be CONST_INT, or we would have dealt with it above. */
10277 HOST_WIDE_INT hi, lo;
10279 offset += INTVAL (XEXP (base, 1));
10280 base = XEXP (base, 0);
10282 /* Rework the address into a legal sequence of insns. */
10283 /* Valid range for lo is -4095 -> 4095 */
10286 : -((-offset) & 0xfff));
10288 /* Corner case, if lo is the max offset then we would be out of range
10289 once we have added the additional 1 below, so bump the msb into the
10290 pre-loading insn(s). */
10294 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10295 ^ (HOST_WIDE_INT) 0x80000000)
10296 - (HOST_WIDE_INT) 0x80000000);
10298 gcc_assert (hi + lo == offset);
10302 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10304 /* Be careful not to destroy OUTVAL. */
10305 if (reg_overlap_mentioned_p (base_plus, outval))
10307 /* Updating base_plus might destroy outval, see if we
10308 can swap the scratch and base_plus. */
10309 if (!reg_overlap_mentioned_p (scratch, outval))
10312 scratch = base_plus;
10317 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10319 /* Be conservative and copy outval into scratch now,
10320 this should only be necessary if outval is a
10321 subreg of something larger than a word. */
10322 /* XXX Might this clobber base? I can't see how it
10323 can, since scratch is known to overlap with
10325 emit_insn (gen_movhi (scratch_hi, outval));
10326 outval = scratch_hi;
10330 /* Get the base address; addsi3 knows how to handle constants
10331 that require more than one insn. */
10332 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10338 if (BYTES_BIG_ENDIAN)
10340 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10341 plus_constant (base, offset + 1)),
10342 gen_lowpart (QImode, outval)));
10343 emit_insn (gen_lshrsi3 (scratch,
10344 gen_rtx_SUBREG (SImode, outval, 0),
10346 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10347 gen_lowpart (QImode, scratch)));
10351 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10352 gen_lowpart (QImode, outval)));
10353 emit_insn (gen_lshrsi3 (scratch,
10354 gen_rtx_SUBREG (SImode, outval, 0),
10356 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10357 plus_constant (base, offset + 1)),
10358 gen_lowpart (QImode, scratch)));
10362 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10363 (padded to the size of a word) should be passed in a register. */
10366 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10368 if (TARGET_AAPCS_BASED)
10369 return must_pass_in_stack_var_size (mode, type);
10371 return must_pass_in_stack_var_size_or_pad (mode, type);
10375 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10376 Return true if an argument passed on the stack should be padded upwards,
10377 i.e. if the least-significant byte has useful data.
10378 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10379 aggregate types are placed in the lowest memory address. */
10382 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10384 if (!TARGET_AAPCS_BASED)
10385 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10387 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10394 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10395 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10396 byte of the register has useful data, and return the opposite if the
10397 most significant byte does.
10398 For AAPCS, small aggregates and small complex types are always padded
10402 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10403 tree type, int first ATTRIBUTE_UNUSED)
10405 if (TARGET_AAPCS_BASED
10406 && BYTES_BIG_ENDIAN
10407 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10408 && int_size_in_bytes (type) <= 4)
10411 /* Otherwise, use default padding. */
10412 return !BYTES_BIG_ENDIAN;
10416 /* Print a symbolic form of X to the debug file, F. */
10418 arm_print_value (FILE *f, rtx x)
10420 switch (GET_CODE (x))
10423 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10427 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10435 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10437 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10438 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10446 fprintf (f, "\"%s\"", XSTR (x, 0));
10450 fprintf (f, "`%s'", XSTR (x, 0));
10454 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10458 arm_print_value (f, XEXP (x, 0));
10462 arm_print_value (f, XEXP (x, 0));
10464 arm_print_value (f, XEXP (x, 1));
10472 fprintf (f, "????");
10477 /* Routines for manipulation of the constant pool. */
10479 /* Arm instructions cannot load a large constant directly into a
10480 register; they have to come from a pc relative load. The constant
10481 must therefore be placed in the addressable range of the pc
10482 relative load. Depending on the precise pc relative load
10483 instruction the range is somewhere between 256 bytes and 4k. This
10484 means that we often have to dump a constant inside a function, and
10485 generate code to branch around it.
10487 It is important to minimize this, since the branches will slow
10488 things down and make the code larger.
10490 Normally we can hide the table after an existing unconditional
10491 branch so that there is no interruption of the flow, but in the
10492 worst case the code looks like this:
10510 We fix this by performing a scan after scheduling, which notices
10511 which instructions need to have their operands fetched from the
10512 constant table and builds the table.
10514 The algorithm starts by building a table of all the constants that
10515 need fixing up and all the natural barriers in the function (places
10516 where a constant table can be dropped without breaking the flow).
10517 For each fixup we note how far the pc-relative replacement will be
10518 able to reach and the offset of the instruction into the function.
10520 Having built the table we then group the fixes together to form
10521 tables that are as large as possible (subject to addressing
10522 constraints) and emit each table of constants after the last
10523 barrier that is within range of all the instructions in the group.
10524 If a group does not contain a barrier, then we forcibly create one
10525 by inserting a jump instruction into the flow. Once the table has
10526 been inserted, the insns are then modified to reference the
10527 relevant entry in the pool.
10529 Possible enhancements to the algorithm (not implemented) are:
10531 1) For some processors and object formats, there may be benefit in
10532 aligning the pools to the start of cache lines; this alignment
10533 would need to be taken into account when calculating addressability
10536 /* These typedefs are located at the start of this file, so that
10537 they can be used in the prototypes there. This comment is to
10538 remind readers of that fact so that the following structures
10539 can be understood more easily.
10541 typedef struct minipool_node Mnode;
10542 typedef struct minipool_fixup Mfix; */
10544 struct minipool_node
10546 /* Doubly linked chain of entries. */
10549 /* The maximum offset into the code that this entry can be placed. While
10550 pushing fixes for forward references, all entries are sorted in order
10551 of increasing max_address. */
10552 HOST_WIDE_INT max_address;
10553 /* Similarly for an entry inserted for a backwards ref. */
10554 HOST_WIDE_INT min_address;
10555 /* The number of fixes referencing this entry. This can become zero
10556 if we "unpush" an entry. In this case we ignore the entry when we
10557 come to emit the code. */
10559 /* The offset from the start of the minipool. */
10560 HOST_WIDE_INT offset;
10561 /* The value in table. */
10563 /* The mode of value. */
10564 enum machine_mode mode;
10565 /* The size of the value. With iWMMXt enabled
10566 sizes > 4 also imply an alignment of 8-bytes. */
10570 struct minipool_fixup
10574 HOST_WIDE_INT address;
10576 enum machine_mode mode;
10580 HOST_WIDE_INT forwards;
10581 HOST_WIDE_INT backwards;
10584 /* Fixes less than a word need padding out to a word boundary. */
10585 #define MINIPOOL_FIX_SIZE(mode) \
10586 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10588 static Mnode * minipool_vector_head;
10589 static Mnode * minipool_vector_tail;
10590 static rtx minipool_vector_label;
10591 static int minipool_pad;
10593 /* The linked list of all minipool fixes required for this function. */
10594 Mfix * minipool_fix_head;
10595 Mfix * minipool_fix_tail;
10596 /* The fix entry for the current minipool, once it has been placed. */
10597 Mfix * minipool_barrier;
10599 /* Determines if INSN is the start of a jump table. Returns the end
10600 of the TABLE or NULL_RTX. */
10602 is_jump_table (rtx insn)
10606 if (GET_CODE (insn) == JUMP_INSN
10607 && JUMP_LABEL (insn) != NULL
10608 && ((table = next_real_insn (JUMP_LABEL (insn)))
10609 == next_real_insn (insn))
10611 && GET_CODE (table) == JUMP_INSN
10612 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10613 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10619 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10620 #define JUMP_TABLES_IN_TEXT_SECTION 0
10623 static HOST_WIDE_INT
10624 get_jump_table_size (rtx insn)
10626 /* ADDR_VECs only take room if read-only data does into the text
10628 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10630 rtx body = PATTERN (insn);
10631 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10632 HOST_WIDE_INT size;
10633 HOST_WIDE_INT modesize;
10635 modesize = GET_MODE_SIZE (GET_MODE (body));
10636 size = modesize * XVECLEN (body, elt);
10640 /* Round up size of TBB table to a halfword boundary. */
10641 size = (size + 1) & ~(HOST_WIDE_INT)1;
10644 /* No padding necessary for TBH. */
10647 /* Add two bytes for alignment on Thumb. */
10652 gcc_unreachable ();
10660 /* Move a minipool fix MP from its current location to before MAX_MP.
10661 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10662 constraints may need updating. */
10664 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10665 HOST_WIDE_INT max_address)
10667 /* The code below assumes these are different. */
10668 gcc_assert (mp != max_mp);
10670 if (max_mp == NULL)
10672 if (max_address < mp->max_address)
10673 mp->max_address = max_address;
10677 if (max_address > max_mp->max_address - mp->fix_size)
10678 mp->max_address = max_mp->max_address - mp->fix_size;
10680 mp->max_address = max_address;
10682 /* Unlink MP from its current position. Since max_mp is non-null,
10683 mp->prev must be non-null. */
10684 mp->prev->next = mp->next;
10685 if (mp->next != NULL)
10686 mp->next->prev = mp->prev;
10688 minipool_vector_tail = mp->prev;
10690 /* Re-insert it before MAX_MP. */
10692 mp->prev = max_mp->prev;
10695 if (mp->prev != NULL)
10696 mp->prev->next = mp;
10698 minipool_vector_head = mp;
10701 /* Save the new entry. */
10704 /* Scan over the preceding entries and adjust their addresses as
10706 while (mp->prev != NULL
10707 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10709 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10716 /* Add a constant to the minipool for a forward reference. Returns the
10717 node added or NULL if the constant will not fit in this pool. */
10719 add_minipool_forward_ref (Mfix *fix)
10721 /* If set, max_mp is the first pool_entry that has a lower
10722 constraint than the one we are trying to add. */
10723 Mnode * max_mp = NULL;
10724 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10727 /* If the minipool starts before the end of FIX->INSN then this FIX
10728 can not be placed into the current pool. Furthermore, adding the
10729 new constant pool entry may cause the pool to start FIX_SIZE bytes
10731 if (minipool_vector_head &&
10732 (fix->address + get_attr_length (fix->insn)
10733 >= minipool_vector_head->max_address - fix->fix_size))
10736 /* Scan the pool to see if a constant with the same value has
10737 already been added. While we are doing this, also note the
10738 location where we must insert the constant if it doesn't already
10740 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10742 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10743 && fix->mode == mp->mode
10744 && (GET_CODE (fix->value) != CODE_LABEL
10745 || (CODE_LABEL_NUMBER (fix->value)
10746 == CODE_LABEL_NUMBER (mp->value)))
10747 && rtx_equal_p (fix->value, mp->value))
10749 /* More than one fix references this entry. */
10751 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10754 /* Note the insertion point if necessary. */
10756 && mp->max_address > max_address)
10759 /* If we are inserting an 8-bytes aligned quantity and
10760 we have not already found an insertion point, then
10761 make sure that all such 8-byte aligned quantities are
10762 placed at the start of the pool. */
10763 if (ARM_DOUBLEWORD_ALIGN
10765 && fix->fix_size >= 8
10766 && mp->fix_size < 8)
10769 max_address = mp->max_address;
10773 /* The value is not currently in the minipool, so we need to create
10774 a new entry for it. If MAX_MP is NULL, the entry will be put on
10775 the end of the list since the placement is less constrained than
10776 any existing entry. Otherwise, we insert the new fix before
10777 MAX_MP and, if necessary, adjust the constraints on the other
10780 mp->fix_size = fix->fix_size;
10781 mp->mode = fix->mode;
10782 mp->value = fix->value;
10784 /* Not yet required for a backwards ref. */
10785 mp->min_address = -65536;
10787 if (max_mp == NULL)
10789 mp->max_address = max_address;
10791 mp->prev = minipool_vector_tail;
10793 if (mp->prev == NULL)
10795 minipool_vector_head = mp;
10796 minipool_vector_label = gen_label_rtx ();
10799 mp->prev->next = mp;
10801 minipool_vector_tail = mp;
10805 if (max_address > max_mp->max_address - mp->fix_size)
10806 mp->max_address = max_mp->max_address - mp->fix_size;
10808 mp->max_address = max_address;
10811 mp->prev = max_mp->prev;
10813 if (mp->prev != NULL)
10814 mp->prev->next = mp;
10816 minipool_vector_head = mp;
10819 /* Save the new entry. */
10822 /* Scan over the preceding entries and adjust their addresses as
10824 while (mp->prev != NULL
10825 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10827 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10835 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10836 HOST_WIDE_INT min_address)
10838 HOST_WIDE_INT offset;
10840 /* The code below assumes these are different. */
10841 gcc_assert (mp != min_mp);
10843 if (min_mp == NULL)
10845 if (min_address > mp->min_address)
10846 mp->min_address = min_address;
10850 /* We will adjust this below if it is too loose. */
10851 mp->min_address = min_address;
10853 /* Unlink MP from its current position. Since min_mp is non-null,
10854 mp->next must be non-null. */
10855 mp->next->prev = mp->prev;
10856 if (mp->prev != NULL)
10857 mp->prev->next = mp->next;
10859 minipool_vector_head = mp->next;
10861 /* Reinsert it after MIN_MP. */
10863 mp->next = min_mp->next;
10865 if (mp->next != NULL)
10866 mp->next->prev = mp;
10868 minipool_vector_tail = mp;
10874 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10876 mp->offset = offset;
10877 if (mp->refcount > 0)
10878 offset += mp->fix_size;
10880 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10881 mp->next->min_address = mp->min_address + mp->fix_size;
10887 /* Add a constant to the minipool for a backward reference. Returns the
10888 node added or NULL if the constant will not fit in this pool.
10890 Note that the code for insertion for a backwards reference can be
10891 somewhat confusing because the calculated offsets for each fix do
10892 not take into account the size of the pool (which is still under
10895 add_minipool_backward_ref (Mfix *fix)
10897 /* If set, min_mp is the last pool_entry that has a lower constraint
10898 than the one we are trying to add. */
10899 Mnode *min_mp = NULL;
10900 /* This can be negative, since it is only a constraint. */
10901 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10904 /* If we can't reach the current pool from this insn, or if we can't
10905 insert this entry at the end of the pool without pushing other
10906 fixes out of range, then we don't try. This ensures that we
10907 can't fail later on. */
10908 if (min_address >= minipool_barrier->address
10909 || (minipool_vector_tail->min_address + fix->fix_size
10910 >= minipool_barrier->address))
10913 /* Scan the pool to see if a constant with the same value has
10914 already been added. While we are doing this, also note the
10915 location where we must insert the constant if it doesn't already
10917 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10919 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10920 && fix->mode == mp->mode
10921 && (GET_CODE (fix->value) != CODE_LABEL
10922 || (CODE_LABEL_NUMBER (fix->value)
10923 == CODE_LABEL_NUMBER (mp->value)))
10924 && rtx_equal_p (fix->value, mp->value)
10925 /* Check that there is enough slack to move this entry to the
10926 end of the table (this is conservative). */
10927 && (mp->max_address
10928 > (minipool_barrier->address
10929 + minipool_vector_tail->offset
10930 + minipool_vector_tail->fix_size)))
10933 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10936 if (min_mp != NULL)
10937 mp->min_address += fix->fix_size;
10940 /* Note the insertion point if necessary. */
10941 if (mp->min_address < min_address)
10943 /* For now, we do not allow the insertion of 8-byte alignment
10944 requiring nodes anywhere but at the start of the pool. */
10945 if (ARM_DOUBLEWORD_ALIGN
10946 && fix->fix_size >= 8 && mp->fix_size < 8)
10951 else if (mp->max_address
10952 < minipool_barrier->address + mp->offset + fix->fix_size)
10954 /* Inserting before this entry would push the fix beyond
10955 its maximum address (which can happen if we have
10956 re-located a forwards fix); force the new fix to come
10958 if (ARM_DOUBLEWORD_ALIGN
10959 && fix->fix_size >= 8 && mp->fix_size < 8)
10964 min_address = mp->min_address + fix->fix_size;
10967 /* Do not insert a non-8-byte aligned quantity before 8-byte
10968 aligned quantities. */
10969 else if (ARM_DOUBLEWORD_ALIGN
10970 && fix->fix_size < 8
10971 && mp->fix_size >= 8)
10974 min_address = mp->min_address + fix->fix_size;
10979 /* We need to create a new entry. */
10981 mp->fix_size = fix->fix_size;
10982 mp->mode = fix->mode;
10983 mp->value = fix->value;
10985 mp->max_address = minipool_barrier->address + 65536;
10987 mp->min_address = min_address;
10989 if (min_mp == NULL)
10992 mp->next = minipool_vector_head;
10994 if (mp->next == NULL)
10996 minipool_vector_tail = mp;
10997 minipool_vector_label = gen_label_rtx ();
11000 mp->next->prev = mp;
11002 minipool_vector_head = mp;
11006 mp->next = min_mp->next;
11010 if (mp->next != NULL)
11011 mp->next->prev = mp;
11013 minipool_vector_tail = mp;
11016 /* Save the new entry. */
11024 /* Scan over the following entries and adjust their offsets. */
11025 while (mp->next != NULL)
11027 if (mp->next->min_address < mp->min_address + mp->fix_size)
11028 mp->next->min_address = mp->min_address + mp->fix_size;
11031 mp->next->offset = mp->offset + mp->fix_size;
11033 mp->next->offset = mp->offset;
11042 assign_minipool_offsets (Mfix *barrier)
11044 HOST_WIDE_INT offset = 0;
11047 minipool_barrier = barrier;
11049 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11051 mp->offset = offset;
11053 if (mp->refcount > 0)
11054 offset += mp->fix_size;
11058 /* Output the literal table */
11060 dump_minipool (rtx scan)
11066 if (ARM_DOUBLEWORD_ALIGN)
11067 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11068 if (mp->refcount > 0 && mp->fix_size >= 8)
11075 fprintf (dump_file,
11076 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11077 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11079 scan = emit_label_after (gen_label_rtx (), scan);
11080 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11081 scan = emit_label_after (minipool_vector_label, scan);
11083 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11085 if (mp->refcount > 0)
11089 fprintf (dump_file,
11090 ";; Offset %u, min %ld, max %ld ",
11091 (unsigned) mp->offset, (unsigned long) mp->min_address,
11092 (unsigned long) mp->max_address);
11093 arm_print_value (dump_file, mp->value);
11094 fputc ('\n', dump_file);
11097 switch (mp->fix_size)
11099 #ifdef HAVE_consttable_1
11101 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11105 #ifdef HAVE_consttable_2
11107 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11111 #ifdef HAVE_consttable_4
11113 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11117 #ifdef HAVE_consttable_8
11119 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11123 #ifdef HAVE_consttable_16
11125 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11130 gcc_unreachable ();
11138 minipool_vector_head = minipool_vector_tail = NULL;
11139 scan = emit_insn_after (gen_consttable_end (), scan);
11140 scan = emit_barrier_after (scan);
11143 /* Return the cost of forcibly inserting a barrier after INSN. */
11145 arm_barrier_cost (rtx insn)
11147 /* Basing the location of the pool on the loop depth is preferable,
11148 but at the moment, the basic block information seems to be
11149 corrupt by this stage of the compilation. */
11150 int base_cost = 50;
11151 rtx next = next_nonnote_insn (insn);
11153 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11156 switch (GET_CODE (insn))
11159 /* It will always be better to place the table before the label, rather
11168 return base_cost - 10;
11171 return base_cost + 10;
11175 /* Find the best place in the insn stream in the range
11176 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11177 Create the barrier by inserting a jump and add a new fix entry for
11180 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11182 HOST_WIDE_INT count = 0;
11184 rtx from = fix->insn;
11185 /* The instruction after which we will insert the jump. */
11186 rtx selected = NULL;
11188 /* The address at which the jump instruction will be placed. */
11189 HOST_WIDE_INT selected_address;
11191 HOST_WIDE_INT max_count = max_address - fix->address;
11192 rtx label = gen_label_rtx ();
11194 selected_cost = arm_barrier_cost (from);
11195 selected_address = fix->address;
11197 while (from && count < max_count)
11202 /* This code shouldn't have been called if there was a natural barrier
11204 gcc_assert (GET_CODE (from) != BARRIER);
11206 /* Count the length of this insn. */
11207 count += get_attr_length (from);
11209 /* If there is a jump table, add its length. */
11210 tmp = is_jump_table (from);
11213 count += get_jump_table_size (tmp);
11215 /* Jump tables aren't in a basic block, so base the cost on
11216 the dispatch insn. If we select this location, we will
11217 still put the pool after the table. */
11218 new_cost = arm_barrier_cost (from);
11220 if (count < max_count
11221 && (!selected || new_cost <= selected_cost))
11224 selected_cost = new_cost;
11225 selected_address = fix->address + count;
11228 /* Continue after the dispatch table. */
11229 from = NEXT_INSN (tmp);
11233 new_cost = arm_barrier_cost (from);
11235 if (count < max_count
11236 && (!selected || new_cost <= selected_cost))
11239 selected_cost = new_cost;
11240 selected_address = fix->address + count;
11243 from = NEXT_INSN (from);
11246 /* Make sure that we found a place to insert the jump. */
11247 gcc_assert (selected);
11249 /* Create a new JUMP_INSN that branches around a barrier. */
11250 from = emit_jump_insn_after (gen_jump (label), selected);
11251 JUMP_LABEL (from) = label;
11252 barrier = emit_barrier_after (from);
11253 emit_label_after (label, barrier);
11255 /* Create a minipool barrier entry for the new barrier. */
11256 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11257 new_fix->insn = barrier;
11258 new_fix->address = selected_address;
11259 new_fix->next = fix->next;
11260 fix->next = new_fix;
11265 /* Record that there is a natural barrier in the insn stream at
11268 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11270 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11273 fix->address = address;
11276 if (minipool_fix_head != NULL)
11277 minipool_fix_tail->next = fix;
11279 minipool_fix_head = fix;
11281 minipool_fix_tail = fix;
11284 /* Record INSN, which will need fixing up to load a value from the
11285 minipool. ADDRESS is the offset of the insn since the start of the
11286 function; LOC is a pointer to the part of the insn which requires
11287 fixing; VALUE is the constant that must be loaded, which is of type
11290 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11291 enum machine_mode mode, rtx value)
11293 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11296 fix->address = address;
11299 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11300 fix->value = value;
11301 fix->forwards = get_attr_pool_range (insn);
11302 fix->backwards = get_attr_neg_pool_range (insn);
11303 fix->minipool = NULL;
11305 /* If an insn doesn't have a range defined for it, then it isn't
11306 expecting to be reworked by this code. Better to stop now than
11307 to generate duff assembly code. */
11308 gcc_assert (fix->forwards || fix->backwards);
11310 /* If an entry requires 8-byte alignment then assume all constant pools
11311 require 4 bytes of padding. Trying to do this later on a per-pool
11312 basis is awkward because existing pool entries have to be modified. */
11313 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11318 fprintf (dump_file,
11319 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11320 GET_MODE_NAME (mode),
11321 INSN_UID (insn), (unsigned long) address,
11322 -1 * (long)fix->backwards, (long)fix->forwards);
11323 arm_print_value (dump_file, fix->value);
11324 fprintf (dump_file, "\n");
11327 /* Add it to the chain of fixes. */
11330 if (minipool_fix_head != NULL)
11331 minipool_fix_tail->next = fix;
11333 minipool_fix_head = fix;
11335 minipool_fix_tail = fix;
11338 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11339 Returns the number of insns needed, or 99 if we don't know how to
11342 arm_const_double_inline_cost (rtx val)
11344 rtx lowpart, highpart;
11345 enum machine_mode mode;
11347 mode = GET_MODE (val);
11349 if (mode == VOIDmode)
11352 gcc_assert (GET_MODE_SIZE (mode) == 8);
11354 lowpart = gen_lowpart (SImode, val);
11355 highpart = gen_highpart_mode (SImode, mode, val);
11357 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11358 gcc_assert (GET_CODE (highpart) == CONST_INT);
11360 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11361 NULL_RTX, NULL_RTX, 0, 0)
11362 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11363 NULL_RTX, NULL_RTX, 0, 0));
11366 /* Return true if it is worthwhile to split a 64-bit constant into two
11367 32-bit operations. This is the case if optimizing for size, or
11368 if we have load delay slots, or if one 32-bit part can be done with
11369 a single data operation. */
11371 arm_const_double_by_parts (rtx val)
11373 enum machine_mode mode = GET_MODE (val);
11376 if (optimize_size || arm_ld_sched)
11379 if (mode == VOIDmode)
11382 part = gen_highpart_mode (SImode, mode, val);
11384 gcc_assert (GET_CODE (part) == CONST_INT);
11386 if (const_ok_for_arm (INTVAL (part))
11387 || const_ok_for_arm (~INTVAL (part)))
11390 part = gen_lowpart (SImode, val);
11392 gcc_assert (GET_CODE (part) == CONST_INT);
11394 if (const_ok_for_arm (INTVAL (part))
11395 || const_ok_for_arm (~INTVAL (part)))
11401 /* Scan INSN and note any of its operands that need fixing.
11402 If DO_PUSHES is false we do not actually push any of the fixups
11403 needed. The function returns TRUE if any fixups were needed/pushed.
11404 This is used by arm_memory_load_p() which needs to know about loads
11405 of constants that will be converted into minipool loads. */
11407 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11409 bool result = false;
11412 extract_insn (insn);
11414 if (!constrain_operands (1))
11415 fatal_insn_not_found (insn);
11417 if (recog_data.n_alternatives == 0)
11420 /* Fill in recog_op_alt with information about the constraints of
11422 preprocess_constraints ();
11424 for (opno = 0; opno < recog_data.n_operands; opno++)
11426 /* Things we need to fix can only occur in inputs. */
11427 if (recog_data.operand_type[opno] != OP_IN)
11430 /* If this alternative is a memory reference, then any mention
11431 of constants in this alternative is really to fool reload
11432 into allowing us to accept one there. We need to fix them up
11433 now so that we output the right code. */
11434 if (recog_op_alt[opno][which_alternative].memory_ok)
11436 rtx op = recog_data.operand[opno];
11438 if (CONSTANT_P (op))
11441 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11442 recog_data.operand_mode[opno], op);
11445 else if (GET_CODE (op) == MEM
11446 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11447 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11451 rtx cop = avoid_constant_pool_reference (op);
11453 /* Casting the address of something to a mode narrower
11454 than a word can cause avoid_constant_pool_reference()
11455 to return the pool reference itself. That's no good to
11456 us here. Lets just hope that we can use the
11457 constant pool value directly. */
11459 cop = get_pool_constant (XEXP (op, 0));
11461 push_minipool_fix (insn, address,
11462 recog_data.operand_loc[opno],
11463 recog_data.operand_mode[opno], cop);
11474 /* Gcc puts the pool in the wrong place for ARM, since we can only
11475 load addresses a limited distance around the pc. We do some
11476 special munging to move the constant pool values to the correct
11477 point in the code. */
11482 HOST_WIDE_INT address = 0;
11485 minipool_fix_head = minipool_fix_tail = NULL;
11487 /* The first insn must always be a note, or the code below won't
11488 scan it properly. */
11489 insn = get_insns ();
11490 gcc_assert (GET_CODE (insn) == NOTE);
11493 /* Scan all the insns and record the operands that will need fixing. */
11494 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11496 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11497 && (arm_cirrus_insn_p (insn)
11498 || GET_CODE (insn) == JUMP_INSN
11499 || arm_memory_load_p (insn)))
11500 cirrus_reorg (insn);
11502 if (GET_CODE (insn) == BARRIER)
11503 push_minipool_barrier (insn, address);
11504 else if (INSN_P (insn))
11508 note_invalid_constants (insn, address, true);
11509 address += get_attr_length (insn);
11511 /* If the insn is a vector jump, add the size of the table
11512 and skip the table. */
11513 if ((table = is_jump_table (insn)) != NULL)
11515 address += get_jump_table_size (table);
11521 fix = minipool_fix_head;
11523 /* Now scan the fixups and perform the required changes. */
11528 Mfix * last_added_fix;
11529 Mfix * last_barrier = NULL;
11532 /* Skip any further barriers before the next fix. */
11533 while (fix && GET_CODE (fix->insn) == BARRIER)
11536 /* No more fixes. */
11540 last_added_fix = NULL;
11542 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11544 if (GET_CODE (ftmp->insn) == BARRIER)
11546 if (ftmp->address >= minipool_vector_head->max_address)
11549 last_barrier = ftmp;
11551 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11554 last_added_fix = ftmp; /* Keep track of the last fix added. */
11557 /* If we found a barrier, drop back to that; any fixes that we
11558 could have reached but come after the barrier will now go in
11559 the next mini-pool. */
11560 if (last_barrier != NULL)
11562 /* Reduce the refcount for those fixes that won't go into this
11564 for (fdel = last_barrier->next;
11565 fdel && fdel != ftmp;
11568 fdel->minipool->refcount--;
11569 fdel->minipool = NULL;
11572 ftmp = last_barrier;
11576 /* ftmp is first fix that we can't fit into this pool and
11577 there no natural barriers that we could use. Insert a
11578 new barrier in the code somewhere between the previous
11579 fix and this one, and arrange to jump around it. */
11580 HOST_WIDE_INT max_address;
11582 /* The last item on the list of fixes must be a barrier, so
11583 we can never run off the end of the list of fixes without
11584 last_barrier being set. */
11587 max_address = minipool_vector_head->max_address;
11588 /* Check that there isn't another fix that is in range that
11589 we couldn't fit into this pool because the pool was
11590 already too large: we need to put the pool before such an
11591 instruction. The pool itself may come just after the
11592 fix because create_fix_barrier also allows space for a
11593 jump instruction. */
11594 if (ftmp->address < max_address)
11595 max_address = ftmp->address + 1;
11597 last_barrier = create_fix_barrier (last_added_fix, max_address);
11600 assign_minipool_offsets (last_barrier);
11604 if (GET_CODE (ftmp->insn) != BARRIER
11605 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11612 /* Scan over the fixes we have identified for this pool, fixing them
11613 up and adding the constants to the pool itself. */
11614 for (this_fix = fix; this_fix && ftmp != this_fix;
11615 this_fix = this_fix->next)
11616 if (GET_CODE (this_fix->insn) != BARRIER)
11619 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11620 minipool_vector_label),
11621 this_fix->minipool->offset);
11622 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11625 dump_minipool (last_barrier->insn);
11629 /* From now on we must synthesize any constants that we can't handle
11630 directly. This can happen if the RTL gets split during final
11631 instruction generation. */
11632 after_arm_reorg = 1;
11634 /* Free the minipool memory. */
11635 obstack_free (&minipool_obstack, minipool_startobj);
11638 /* Routines to output assembly language. */
11640 /* If the rtx is the correct value then return the string of the number.
11641 In this way we can ensure that valid double constants are generated even
11642 when cross compiling. */
11644 fp_immediate_constant (rtx x)
11649 if (!fp_consts_inited)
11652 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11653 for (i = 0; i < 8; i++)
11654 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11655 return strings_fp[i];
11657 gcc_unreachable ();
11660 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11661 static const char *
11662 fp_const_from_val (REAL_VALUE_TYPE *r)
11666 if (!fp_consts_inited)
11669 for (i = 0; i < 8; i++)
11670 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11671 return strings_fp[i];
11673 gcc_unreachable ();
11676 /* Output the operands of a LDM/STM instruction to STREAM.
11677 MASK is the ARM register set mask of which only bits 0-15 are important.
11678 REG is the base register, either the frame pointer or the stack pointer,
11679 INSTR is the possibly suffixed load or store instruction.
11680 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11683 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11684 unsigned long mask, int rfe)
11687 bool not_first = FALSE;
11689 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11690 fputc ('\t', stream);
11691 asm_fprintf (stream, instr, reg);
11692 fputc ('{', stream);
11694 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11695 if (mask & (1 << i))
11698 fprintf (stream, ", ");
11700 asm_fprintf (stream, "%r", i);
11705 fprintf (stream, "}^\n");
11707 fprintf (stream, "}\n");
11711 /* Output a FLDMD instruction to STREAM.
11712 BASE if the register containing the address.
11713 REG and COUNT specify the register range.
11714 Extra registers may be added to avoid hardware bugs.
11716 We output FLDMD even for ARMv5 VFP implementations. Although
11717 FLDMD is technically not supported until ARMv6, it is believed
11718 that all VFP implementations support its use in this context. */
11721 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11725 /* Workaround ARM10 VFPr1 bug. */
11726 if (count == 2 && !arm_arch6)
11733 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11734 load into multiple parts if we have to handle more than 16 registers. */
11737 vfp_output_fldmd (stream, base, reg, 16);
11738 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11742 fputc ('\t', stream);
11743 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11745 for (i = reg; i < reg + count; i++)
11748 fputs (", ", stream);
11749 asm_fprintf (stream, "d%d", i);
11751 fputs ("}\n", stream);
11756 /* Output the assembly for a store multiple. */
11759 vfp_output_fstmd (rtx * operands)
11766 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11767 p = strlen (pattern);
11769 gcc_assert (GET_CODE (operands[1]) == REG);
11771 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11772 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11774 p += sprintf (&pattern[p], ", d%d", base + i);
11776 strcpy (&pattern[p], "}");
11778 output_asm_insn (pattern, operands);
11783 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11784 number of bytes pushed. */
11787 vfp_emit_fstmd (int base_reg, int count)
11794 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11795 register pairs are stored by a store multiple insn. We avoid this
11796 by pushing an extra pair. */
11797 if (count == 2 && !arm_arch6)
11799 if (base_reg == LAST_VFP_REGNUM - 3)
11804 /* FSTMD may not store more than 16 doubleword registers at once. Split
11805 larger stores into multiple parts (up to a maximum of two, in
11810 /* NOTE: base_reg is an internal register number, so each D register
11812 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11813 saved += vfp_emit_fstmd (base_reg, 16);
11817 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11818 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11820 reg = gen_rtx_REG (DFmode, base_reg);
11823 XVECEXP (par, 0, 0)
11824 = gen_rtx_SET (VOIDmode,
11827 gen_rtx_PRE_MODIFY (Pmode,
11830 (stack_pointer_rtx,
11833 gen_rtx_UNSPEC (BLKmode,
11834 gen_rtvec (1, reg),
11835 UNSPEC_PUSH_MULT));
11837 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11838 plus_constant (stack_pointer_rtx, -(count * 8)));
11839 RTX_FRAME_RELATED_P (tmp) = 1;
11840 XVECEXP (dwarf, 0, 0) = tmp;
11842 tmp = gen_rtx_SET (VOIDmode,
11843 gen_frame_mem (DFmode, stack_pointer_rtx),
11845 RTX_FRAME_RELATED_P (tmp) = 1;
11846 XVECEXP (dwarf, 0, 1) = tmp;
11848 for (i = 1; i < count; i++)
11850 reg = gen_rtx_REG (DFmode, base_reg);
11852 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11854 tmp = gen_rtx_SET (VOIDmode,
11855 gen_frame_mem (DFmode,
11856 plus_constant (stack_pointer_rtx,
11859 RTX_FRAME_RELATED_P (tmp) = 1;
11860 XVECEXP (dwarf, 0, i + 1) = tmp;
11863 par = emit_insn (par);
11864 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11865 RTX_FRAME_RELATED_P (par) = 1;
11870 /* Emit a call instruction with pattern PAT. ADDR is the address of
11871 the call target. */
11874 arm_emit_call_insn (rtx pat, rtx addr)
11878 insn = emit_call_insn (pat);
11880 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11881 If the call might use such an entry, add a use of the PIC register
11882 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11883 if (TARGET_VXWORKS_RTP
11885 && GET_CODE (addr) == SYMBOL_REF
11886 && (SYMBOL_REF_DECL (addr)
11887 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11888 : !SYMBOL_REF_LOCAL_P (addr)))
11890 require_pic_register ();
11891 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11895 /* Output a 'call' insn. */
11897 output_call (rtx *operands)
11899 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11901 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11902 if (REGNO (operands[0]) == LR_REGNUM)
11904 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11905 output_asm_insn ("mov%?\t%0, %|lr", operands);
11908 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11910 if (TARGET_INTERWORK || arm_arch4t)
11911 output_asm_insn ("bx%?\t%0", operands);
11913 output_asm_insn ("mov%?\t%|pc, %0", operands);
11918 /* Output a 'call' insn that is a reference in memory. This is
11919 disabled for ARMv5 and we prefer a blx instead because otherwise
11920 there's a significant performance overhead. */
11922 output_call_mem (rtx *operands)
11924 gcc_assert (!arm_arch5);
11925 if (TARGET_INTERWORK)
11927 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11928 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11929 output_asm_insn ("bx%?\t%|ip", operands);
11931 else if (regno_use_in (LR_REGNUM, operands[0]))
11933 /* LR is used in the memory address. We load the address in the
11934 first instruction. It's safe to use IP as the target of the
11935 load since the call will kill it anyway. */
11936 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11937 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11939 output_asm_insn ("bx%?\t%|ip", operands);
11941 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11945 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11946 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11953 /* Output a move from arm registers to an fpa registers.
11954 OPERANDS[0] is an fpa register.
11955 OPERANDS[1] is the first registers of an arm register pair. */
11957 output_mov_long_double_fpa_from_arm (rtx *operands)
11959 int arm_reg0 = REGNO (operands[1]);
11962 gcc_assert (arm_reg0 != IP_REGNUM);
11964 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11965 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11966 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11968 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11969 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11974 /* Output a move from an fpa register to arm registers.
11975 OPERANDS[0] is the first registers of an arm register pair.
11976 OPERANDS[1] is an fpa register. */
11978 output_mov_long_double_arm_from_fpa (rtx *operands)
11980 int arm_reg0 = REGNO (operands[0]);
11983 gcc_assert (arm_reg0 != IP_REGNUM);
11985 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11986 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11987 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11989 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11990 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11994 /* Output a move from arm registers to arm registers of a long double
11995 OPERANDS[0] is the destination.
11996 OPERANDS[1] is the source. */
11998 output_mov_long_double_arm_from_arm (rtx *operands)
12000 /* We have to be careful here because the two might overlap. */
12001 int dest_start = REGNO (operands[0]);
12002 int src_start = REGNO (operands[1]);
12006 if (dest_start < src_start)
12008 for (i = 0; i < 3; i++)
12010 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12011 ops[1] = gen_rtx_REG (SImode, src_start + i);
12012 output_asm_insn ("mov%?\t%0, %1", ops);
12017 for (i = 2; i >= 0; i--)
12019 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12020 ops[1] = gen_rtx_REG (SImode, src_start + i);
12021 output_asm_insn ("mov%?\t%0, %1", ops);
12029 arm_emit_movpair (rtx dest, rtx src)
12031 /* If the src is an immediate, simplify it. */
12032 if (CONST_INT_P (src))
12034 HOST_WIDE_INT val = INTVAL (src);
12035 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12036 if ((val >> 16) & 0x0000ffff)
12037 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12039 GEN_INT ((val >> 16) & 0x0000ffff));
12042 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12043 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12046 /* Output a move from arm registers to an fpa registers.
12047 OPERANDS[0] is an fpa register.
12048 OPERANDS[1] is the first registers of an arm register pair. */
12050 output_mov_double_fpa_from_arm (rtx *operands)
12052 int arm_reg0 = REGNO (operands[1]);
12055 gcc_assert (arm_reg0 != IP_REGNUM);
12057 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12058 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12059 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12060 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12064 /* Output a move from an fpa register to arm registers.
12065 OPERANDS[0] is the first registers of an arm register pair.
12066 OPERANDS[1] is an fpa register. */
12068 output_mov_double_arm_from_fpa (rtx *operands)
12070 int arm_reg0 = REGNO (operands[0]);
12073 gcc_assert (arm_reg0 != IP_REGNUM);
12075 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12076 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12077 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12078 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12082 /* Output a move between double words.
12083 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12084 or MEM<-REG and all MEMs must be offsettable addresses. */
12086 output_move_double (rtx *operands)
12088 enum rtx_code code0 = GET_CODE (operands[0]);
12089 enum rtx_code code1 = GET_CODE (operands[1]);
12094 unsigned int reg0 = REGNO (operands[0]);
12096 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12098 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12100 switch (GET_CODE (XEXP (operands[1], 0)))
12104 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12105 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12107 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12111 gcc_assert (TARGET_LDRD);
12112 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12117 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12119 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12124 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12126 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12130 gcc_assert (TARGET_LDRD);
12131 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12136 /* Autoicrement addressing modes should never have overlapping
12137 base and destination registers, and overlapping index registers
12138 are already prohibited, so this doesn't need to worry about
12140 otherops[0] = operands[0];
12141 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12142 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12144 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12146 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12148 /* Registers overlap so split out the increment. */
12149 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12150 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12154 /* Use a single insn if we can.
12155 FIXME: IWMMXT allows offsets larger than ldrd can
12156 handle, fix these up with a pair of ldr. */
12158 || GET_CODE (otherops[2]) != CONST_INT
12159 || (INTVAL (otherops[2]) > -256
12160 && INTVAL (otherops[2]) < 256))
12161 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12164 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12165 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12171 /* Use a single insn if we can.
12172 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12173 fix these up with a pair of ldr. */
12175 || GET_CODE (otherops[2]) != CONST_INT
12176 || (INTVAL (otherops[2]) > -256
12177 && INTVAL (otherops[2]) < 256))
12178 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12181 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12182 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12189 /* We might be able to use ldrd %0, %1 here. However the range is
12190 different to ldr/adr, and it is broken on some ARMv7-M
12191 implementations. */
12192 /* Use the second register of the pair to avoid problematic
12194 otherops[1] = operands[1];
12195 output_asm_insn ("adr%?\t%0, %1", otherops);
12196 operands[1] = otherops[0];
12198 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12200 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12203 /* ??? This needs checking for thumb2. */
12205 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12206 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12208 otherops[0] = operands[0];
12209 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12210 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12212 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12214 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12216 switch ((int) INTVAL (otherops[2]))
12219 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12224 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12229 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12233 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12234 operands[1] = otherops[0];
12236 && (GET_CODE (otherops[2]) == REG
12238 || (GET_CODE (otherops[2]) == CONST_INT
12239 && INTVAL (otherops[2]) > -256
12240 && INTVAL (otherops[2]) < 256)))
12242 if (reg_overlap_mentioned_p (operands[0],
12246 /* Swap base and index registers over to
12247 avoid a conflict. */
12249 otherops[1] = otherops[2];
12252 /* If both registers conflict, it will usually
12253 have been fixed by a splitter. */
12254 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12255 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12257 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12258 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12262 otherops[0] = operands[0];
12263 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12268 if (GET_CODE (otherops[2]) == CONST_INT)
12270 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12271 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12273 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12276 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12279 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12282 return "ldr%(d%)\t%0, [%1]";
12284 return "ldm%(ia%)\t%1, %M0";
12288 otherops[1] = adjust_address (operands[1], SImode, 4);
12289 /* Take care of overlapping base/data reg. */
12290 if (reg_mentioned_p (operands[0], operands[1]))
12292 output_asm_insn ("ldr%?\t%0, %1", otherops);
12293 output_asm_insn ("ldr%?\t%0, %1", operands);
12297 output_asm_insn ("ldr%?\t%0, %1", operands);
12298 output_asm_insn ("ldr%?\t%0, %1", otherops);
12305 /* Constraints should ensure this. */
12306 gcc_assert (code0 == MEM && code1 == REG);
12307 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12309 switch (GET_CODE (XEXP (operands[0], 0)))
12313 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12315 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12319 gcc_assert (TARGET_LDRD);
12320 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12325 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12327 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12332 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12334 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12338 gcc_assert (TARGET_LDRD);
12339 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12344 otherops[0] = operands[1];
12345 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12346 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12348 /* IWMMXT allows offsets larger than ldrd can handle,
12349 fix these up with a pair of ldr. */
12351 && GET_CODE (otherops[2]) == CONST_INT
12352 && (INTVAL(otherops[2]) <= -256
12353 || INTVAL(otherops[2]) >= 256))
12355 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12357 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12358 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12362 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12363 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12366 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12367 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12369 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12373 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12374 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12376 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12379 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12385 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12391 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12396 && (GET_CODE (otherops[2]) == REG
12398 || (GET_CODE (otherops[2]) == CONST_INT
12399 && INTVAL (otherops[2]) > -256
12400 && INTVAL (otherops[2]) < 256)))
12402 otherops[0] = operands[1];
12403 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12404 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12410 otherops[0] = adjust_address (operands[0], SImode, 4);
12411 otherops[1] = operands[1];
12412 output_asm_insn ("str%?\t%1, %0", operands);
12413 output_asm_insn ("str%?\t%H1, %0", otherops);
12420 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12421 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12424 output_move_quad (rtx *operands)
12426 if (REG_P (operands[0]))
12428 /* Load, or reg->reg move. */
12430 if (MEM_P (operands[1]))
12432 switch (GET_CODE (XEXP (operands[1], 0)))
12435 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12440 output_asm_insn ("adr%?\t%0, %1", operands);
12441 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12445 gcc_unreachable ();
12453 gcc_assert (REG_P (operands[1]));
12455 dest = REGNO (operands[0]);
12456 src = REGNO (operands[1]);
12458 /* This seems pretty dumb, but hopefully GCC won't try to do it
12461 for (i = 0; i < 4; i++)
12463 ops[0] = gen_rtx_REG (SImode, dest + i);
12464 ops[1] = gen_rtx_REG (SImode, src + i);
12465 output_asm_insn ("mov%?\t%0, %1", ops);
12468 for (i = 3; i >= 0; i--)
12470 ops[0] = gen_rtx_REG (SImode, dest + i);
12471 ops[1] = gen_rtx_REG (SImode, src + i);
12472 output_asm_insn ("mov%?\t%0, %1", ops);
12478 gcc_assert (MEM_P (operands[0]));
12479 gcc_assert (REG_P (operands[1]));
12480 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12482 switch (GET_CODE (XEXP (operands[0], 0)))
12485 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12489 gcc_unreachable ();
12496 /* Output a VFP load or store instruction. */
12499 output_move_vfp (rtx *operands)
12501 rtx reg, mem, addr, ops[2];
12502 int load = REG_P (operands[0]);
12503 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12504 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12507 enum machine_mode mode;
12509 reg = operands[!load];
12510 mem = operands[load];
12512 mode = GET_MODE (reg);
12514 gcc_assert (REG_P (reg));
12515 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12516 gcc_assert (mode == SFmode
12520 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12521 gcc_assert (MEM_P (mem));
12523 addr = XEXP (mem, 0);
12525 switch (GET_CODE (addr))
12528 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12529 ops[0] = XEXP (addr, 0);
12534 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12535 ops[0] = XEXP (addr, 0);
12540 templ = "f%s%c%%?\t%%%s0, %%1%s";
12546 sprintf (buff, templ,
12547 load ? "ld" : "st",
12550 integer_p ? "\t%@ int" : "");
12551 output_asm_insn (buff, ops);
12556 /* Output a Neon quad-word load or store, or a load or store for
12557 larger structure modes.
12559 WARNING: The ordering of elements is weird in big-endian mode,
12560 because we use VSTM, as required by the EABI. GCC RTL defines
12561 element ordering based on in-memory order. This can be differ
12562 from the architectural ordering of elements within a NEON register.
12563 The intrinsics defined in arm_neon.h use the NEON register element
12564 ordering, not the GCC RTL element ordering.
12566 For example, the in-memory ordering of a big-endian a quadword
12567 vector with 16-bit elements when stored from register pair {d0,d1}
12568 will be (lowest address first, d0[N] is NEON register element N):
12570 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12572 When necessary, quadword registers (dN, dN+1) are moved to ARM
12573 registers from rN in the order:
12575 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12577 So that STM/LDM can be used on vectors in ARM registers, and the
12578 same memory layout will result as if VSTM/VLDM were used. */
12581 output_move_neon (rtx *operands)
12583 rtx reg, mem, addr, ops[2];
12584 int regno, load = REG_P (operands[0]);
12587 enum machine_mode mode;
12589 reg = operands[!load];
12590 mem = operands[load];
12592 mode = GET_MODE (reg);
12594 gcc_assert (REG_P (reg));
12595 regno = REGNO (reg);
12596 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12597 || NEON_REGNO_OK_FOR_QUAD (regno));
12598 gcc_assert (VALID_NEON_DREG_MODE (mode)
12599 || VALID_NEON_QREG_MODE (mode)
12600 || VALID_NEON_STRUCT_MODE (mode));
12601 gcc_assert (MEM_P (mem));
12603 addr = XEXP (mem, 0);
12605 /* Strip off const from addresses like (const (plus (...))). */
12606 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12607 addr = XEXP (addr, 0);
12609 switch (GET_CODE (addr))
12612 templ = "v%smia%%?\t%%0!, %%h1";
12613 ops[0] = XEXP (addr, 0);
12618 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12619 templ = "v%smdb%%?\t%%0!, %%h1";
12620 ops[0] = XEXP (addr, 0);
12625 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12626 gcc_unreachable ();
12631 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12634 for (i = 0; i < nregs; i++)
12636 /* We're only using DImode here because it's a convenient size. */
12637 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12638 ops[1] = adjust_address (mem, DImode, 8 * i);
12639 if (reg_overlap_mentioned_p (ops[0], mem))
12641 gcc_assert (overlap == -1);
12646 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12647 output_asm_insn (buff, ops);
12652 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12653 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12654 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12655 output_asm_insn (buff, ops);
12662 templ = "v%smia%%?\t%%m0, %%h1";
12667 sprintf (buff, templ, load ? "ld" : "st");
12668 output_asm_insn (buff, ops);
12673 /* Output an ADD r, s, #n where n may be too big for one instruction.
12674 If adding zero to one register, output nothing. */
12676 output_add_immediate (rtx *operands)
12678 HOST_WIDE_INT n = INTVAL (operands[2]);
12680 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12683 output_multi_immediate (operands,
12684 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12687 output_multi_immediate (operands,
12688 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12695 /* Output a multiple immediate operation.
12696 OPERANDS is the vector of operands referred to in the output patterns.
12697 INSTR1 is the output pattern to use for the first constant.
12698 INSTR2 is the output pattern to use for subsequent constants.
12699 IMMED_OP is the index of the constant slot in OPERANDS.
12700 N is the constant value. */
12701 static const char *
12702 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12703 int immed_op, HOST_WIDE_INT n)
12705 #if HOST_BITS_PER_WIDE_INT > 32
12711 /* Quick and easy output. */
12712 operands[immed_op] = const0_rtx;
12713 output_asm_insn (instr1, operands);
12718 const char * instr = instr1;
12720 /* Note that n is never zero here (which would give no output). */
12721 for (i = 0; i < 32; i += 2)
12725 operands[immed_op] = GEN_INT (n & (255 << i));
12726 output_asm_insn (instr, operands);
12736 /* Return the name of a shifter operation. */
12737 static const char *
12738 arm_shift_nmem(enum rtx_code code)
12743 return ARM_LSL_NAME;
12759 /* Return the appropriate ARM instruction for the operation code.
12760 The returned result should not be overwritten. OP is the rtx of the
12761 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12764 arithmetic_instr (rtx op, int shift_first_arg)
12766 switch (GET_CODE (op))
12772 return shift_first_arg ? "rsb" : "sub";
12787 return arm_shift_nmem(GET_CODE(op));
12790 gcc_unreachable ();
12794 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12795 for the operation code. The returned result should not be overwritten.
12796 OP is the rtx code of the shift.
12797 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12799 static const char *
12800 shift_op (rtx op, HOST_WIDE_INT *amountp)
12803 enum rtx_code code = GET_CODE (op);
12805 switch (GET_CODE (XEXP (op, 1)))
12813 *amountp = INTVAL (XEXP (op, 1));
12817 gcc_unreachable ();
12823 gcc_assert (*amountp != -1);
12824 *amountp = 32 - *amountp;
12827 /* Fall through. */
12833 mnem = arm_shift_nmem(code);
12837 /* We never have to worry about the amount being other than a
12838 power of 2, since this case can never be reloaded from a reg. */
12839 gcc_assert (*amountp != -1);
12840 *amountp = int_log2 (*amountp);
12841 return ARM_LSL_NAME;
12844 gcc_unreachable ();
12847 if (*amountp != -1)
12849 /* This is not 100% correct, but follows from the desire to merge
12850 multiplication by a power of 2 with the recognizer for a
12851 shift. >=32 is not a valid shift for "lsl", so we must try and
12852 output a shift that produces the correct arithmetical result.
12853 Using lsr #32 is identical except for the fact that the carry bit
12854 is not set correctly if we set the flags; but we never use the
12855 carry bit from such an operation, so we can ignore that. */
12856 if (code == ROTATERT)
12857 /* Rotate is just modulo 32. */
12859 else if (*amountp != (*amountp & 31))
12861 if (code == ASHIFT)
12866 /* Shifts of 0 are no-ops. */
12874 /* Obtain the shift from the POWER of two. */
12876 static HOST_WIDE_INT
12877 int_log2 (HOST_WIDE_INT power)
12879 HOST_WIDE_INT shift = 0;
12881 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12883 gcc_assert (shift <= 31);
12890 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12891 because /bin/as is horribly restrictive. The judgement about
12892 whether or not each character is 'printable' (and can be output as
12893 is) or not (and must be printed with an octal escape) must be made
12894 with reference to the *host* character set -- the situation is
12895 similar to that discussed in the comments above pp_c_char in
12896 c-pretty-print.c. */
12898 #define MAX_ASCII_LEN 51
12901 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12904 int len_so_far = 0;
12906 fputs ("\t.ascii\t\"", stream);
12908 for (i = 0; i < len; i++)
12912 if (len_so_far >= MAX_ASCII_LEN)
12914 fputs ("\"\n\t.ascii\t\"", stream);
12920 if (c == '\\' || c == '\"')
12922 putc ('\\', stream);
12930 fprintf (stream, "\\%03o", c);
12935 fputs ("\"\n", stream);
12938 /* Compute the register save mask for registers 0 through 12
12939 inclusive. This code is used by arm_compute_save_reg_mask. */
12941 static unsigned long
12942 arm_compute_save_reg0_reg12_mask (void)
12944 unsigned long func_type = arm_current_func_type ();
12945 unsigned long save_reg_mask = 0;
12948 if (IS_INTERRUPT (func_type))
12950 unsigned int max_reg;
12951 /* Interrupt functions must not corrupt any registers,
12952 even call clobbered ones. If this is a leaf function
12953 we can just examine the registers used by the RTL, but
12954 otherwise we have to assume that whatever function is
12955 called might clobber anything, and so we have to save
12956 all the call-clobbered registers as well. */
12957 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12958 /* FIQ handlers have registers r8 - r12 banked, so
12959 we only need to check r0 - r7, Normal ISRs only
12960 bank r14 and r15, so we must check up to r12.
12961 r13 is the stack pointer which is always preserved,
12962 so we do not need to consider it here. */
12967 for (reg = 0; reg <= max_reg; reg++)
12968 if (df_regs_ever_live_p (reg)
12969 || (! current_function_is_leaf && call_used_regs[reg]))
12970 save_reg_mask |= (1 << reg);
12972 /* Also save the pic base register if necessary. */
12974 && !TARGET_SINGLE_PIC_BASE
12975 && arm_pic_register != INVALID_REGNUM
12976 && crtl->uses_pic_offset_table)
12977 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12979 else if (IS_VOLATILE(func_type))
12981 /* For noreturn functions we historically omitted register saves
12982 altogether. However this really messes up debugging. As a
12983 compromise save just the frame pointers. Combined with the link
12984 register saved elsewhere this should be sufficient to get
12986 if (frame_pointer_needed)
12987 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12988 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12989 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12990 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12991 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12995 /* In the normal case we only need to save those registers
12996 which are call saved and which are used by this function. */
12997 for (reg = 0; reg <= 11; reg++)
12998 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12999 save_reg_mask |= (1 << reg);
13001 /* Handle the frame pointer as a special case. */
13002 if (frame_pointer_needed)
13003 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13005 /* If we aren't loading the PIC register,
13006 don't stack it even though it may be live. */
13008 && !TARGET_SINGLE_PIC_BASE
13009 && arm_pic_register != INVALID_REGNUM
13010 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13011 || crtl->uses_pic_offset_table))
13012 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13014 /* The prologue will copy SP into R0, so save it. */
13015 if (IS_STACKALIGN (func_type))
13016 save_reg_mask |= 1;
13019 /* Save registers so the exception handler can modify them. */
13020 if (crtl->calls_eh_return)
13026 reg = EH_RETURN_DATA_REGNO (i);
13027 if (reg == INVALID_REGNUM)
13029 save_reg_mask |= 1 << reg;
13033 return save_reg_mask;
13037 /* Compute the number of bytes used to store the static chain register on the
13038 stack, above the stack frame. We need to know this accurately to get the
13039 alignment of the rest of the stack frame correct. */
13041 static int arm_compute_static_chain_stack_bytes (void)
13043 unsigned long func_type = arm_current_func_type ();
13044 int static_chain_stack_bytes = 0;
13046 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13047 IS_NESTED (func_type) &&
13048 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13049 static_chain_stack_bytes = 4;
13051 return static_chain_stack_bytes;
13055 /* Compute a bit mask of which registers need to be
13056 saved on the stack for the current function.
13057 This is used by arm_get_frame_offsets, which may add extra registers. */
13059 static unsigned long
13060 arm_compute_save_reg_mask (void)
13062 unsigned int save_reg_mask = 0;
13063 unsigned long func_type = arm_current_func_type ();
13066 if (IS_NAKED (func_type))
13067 /* This should never really happen. */
13070 /* If we are creating a stack frame, then we must save the frame pointer,
13071 IP (which will hold the old stack pointer), LR and the PC. */
13072 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13074 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13077 | (1 << PC_REGNUM);
13079 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13081 /* Decide if we need to save the link register.
13082 Interrupt routines have their own banked link register,
13083 so they never need to save it.
13084 Otherwise if we do not use the link register we do not need to save
13085 it. If we are pushing other registers onto the stack however, we
13086 can save an instruction in the epilogue by pushing the link register
13087 now and then popping it back into the PC. This incurs extra memory
13088 accesses though, so we only do it when optimizing for size, and only
13089 if we know that we will not need a fancy return sequence. */
13090 if (df_regs_ever_live_p (LR_REGNUM)
13093 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13094 && !crtl->calls_eh_return))
13095 save_reg_mask |= 1 << LR_REGNUM;
13097 if (cfun->machine->lr_save_eliminated)
13098 save_reg_mask &= ~ (1 << LR_REGNUM);
13100 if (TARGET_REALLY_IWMMXT
13101 && ((bit_count (save_reg_mask)
13102 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13103 arm_compute_static_chain_stack_bytes())
13106 /* The total number of registers that are going to be pushed
13107 onto the stack is odd. We need to ensure that the stack
13108 is 64-bit aligned before we start to save iWMMXt registers,
13109 and also before we start to create locals. (A local variable
13110 might be a double or long long which we will load/store using
13111 an iWMMXt instruction). Therefore we need to push another
13112 ARM register, so that the stack will be 64-bit aligned. We
13113 try to avoid using the arg registers (r0 -r3) as they might be
13114 used to pass values in a tail call. */
13115 for (reg = 4; reg <= 12; reg++)
13116 if ((save_reg_mask & (1 << reg)) == 0)
13120 save_reg_mask |= (1 << reg);
13123 cfun->machine->sibcall_blocked = 1;
13124 save_reg_mask |= (1 << 3);
13128 /* We may need to push an additional register for use initializing the
13129 PIC base register. */
13130 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13131 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13133 reg = thumb_find_work_register (1 << 4);
13134 if (!call_used_regs[reg])
13135 save_reg_mask |= (1 << reg);
13138 return save_reg_mask;
13142 /* Compute a bit mask of which registers need to be
13143 saved on the stack for the current function. */
13144 static unsigned long
13145 thumb1_compute_save_reg_mask (void)
13147 unsigned long mask;
13151 for (reg = 0; reg < 12; reg ++)
13152 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13156 && !TARGET_SINGLE_PIC_BASE
13157 && arm_pic_register != INVALID_REGNUM
13158 && crtl->uses_pic_offset_table)
13159 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13161 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13162 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13163 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13165 /* LR will also be pushed if any lo regs are pushed. */
13166 if (mask & 0xff || thumb_force_lr_save ())
13167 mask |= (1 << LR_REGNUM);
13169 /* Make sure we have a low work register if we need one.
13170 We will need one if we are going to push a high register,
13171 but we are not currently intending to push a low register. */
13172 if ((mask & 0xff) == 0
13173 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13175 /* Use thumb_find_work_register to choose which register
13176 we will use. If the register is live then we will
13177 have to push it. Use LAST_LO_REGNUM as our fallback
13178 choice for the register to select. */
13179 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13180 /* Make sure the register returned by thumb_find_work_register is
13181 not part of the return value. */
13182 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13183 reg = LAST_LO_REGNUM;
13185 if (! call_used_regs[reg])
13189 /* The 504 below is 8 bytes less than 512 because there are two possible
13190 alignment words. We can't tell here if they will be present or not so we
13191 have to play it safe and assume that they are. */
13192 if ((CALLER_INTERWORKING_SLOT_SIZE +
13193 ROUND_UP_WORD (get_frame_size ()) +
13194 crtl->outgoing_args_size) >= 504)
13196 /* This is the same as the code in thumb1_expand_prologue() which
13197 determines which register to use for stack decrement. */
13198 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13199 if (mask & (1 << reg))
13202 if (reg > LAST_LO_REGNUM)
13204 /* Make sure we have a register available for stack decrement. */
13205 mask |= 1 << LAST_LO_REGNUM;
13213 /* Return the number of bytes required to save VFP registers. */
13215 arm_get_vfp_saved_size (void)
13217 unsigned int regno;
13222 /* Space for saved VFP registers. */
13223 if (TARGET_HARD_FLOAT && TARGET_VFP)
13226 for (regno = FIRST_VFP_REGNUM;
13227 regno < LAST_VFP_REGNUM;
13230 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13231 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13235 /* Workaround ARM10 VFPr1 bug. */
13236 if (count == 2 && !arm_arch6)
13238 saved += count * 8;
13247 if (count == 2 && !arm_arch6)
13249 saved += count * 8;
13256 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13257 everything bar the final return instruction. */
13259 output_return_instruction (rtx operand, int really_return, int reverse)
13261 char conditional[10];
13264 unsigned long live_regs_mask;
13265 unsigned long func_type;
13266 arm_stack_offsets *offsets;
13268 func_type = arm_current_func_type ();
13270 if (IS_NAKED (func_type))
13273 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13275 /* If this function was declared non-returning, and we have
13276 found a tail call, then we have to trust that the called
13277 function won't return. */
13282 /* Otherwise, trap an attempted return by aborting. */
13284 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13286 assemble_external_libcall (ops[1]);
13287 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13293 gcc_assert (!cfun->calls_alloca || really_return);
13295 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13297 cfun->machine->return_used_this_function = 1;
13299 offsets = arm_get_frame_offsets ();
13300 live_regs_mask = offsets->saved_regs_mask;
13302 if (live_regs_mask)
13304 const char * return_reg;
13306 /* If we do not have any special requirements for function exit
13307 (e.g. interworking) then we can load the return address
13308 directly into the PC. Otherwise we must load it into LR. */
13310 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13311 return_reg = reg_names[PC_REGNUM];
13313 return_reg = reg_names[LR_REGNUM];
13315 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13317 /* There are three possible reasons for the IP register
13318 being saved. 1) a stack frame was created, in which case
13319 IP contains the old stack pointer, or 2) an ISR routine
13320 corrupted it, or 3) it was saved to align the stack on
13321 iWMMXt. In case 1, restore IP into SP, otherwise just
13323 if (frame_pointer_needed)
13325 live_regs_mask &= ~ (1 << IP_REGNUM);
13326 live_regs_mask |= (1 << SP_REGNUM);
13329 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13332 /* On some ARM architectures it is faster to use LDR rather than
13333 LDM to load a single register. On other architectures, the
13334 cost is the same. In 26 bit mode, or for exception handlers,
13335 we have to use LDM to load the PC so that the CPSR is also
13337 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13338 if (live_regs_mask == (1U << reg))
13341 if (reg <= LAST_ARM_REGNUM
13342 && (reg != LR_REGNUM
13344 || ! IS_INTERRUPT (func_type)))
13346 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13347 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13354 /* Generate the load multiple instruction to restore the
13355 registers. Note we can get here, even if
13356 frame_pointer_needed is true, but only if sp already
13357 points to the base of the saved core registers. */
13358 if (live_regs_mask & (1 << SP_REGNUM))
13360 unsigned HOST_WIDE_INT stack_adjust;
13362 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13363 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13365 if (stack_adjust && arm_arch5 && TARGET_ARM)
13366 if (TARGET_UNIFIED_ASM)
13367 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13369 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13372 /* If we can't use ldmib (SA110 bug),
13373 then try to pop r3 instead. */
13375 live_regs_mask |= 1 << 3;
13377 if (TARGET_UNIFIED_ASM)
13378 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13380 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13384 if (TARGET_UNIFIED_ASM)
13385 sprintf (instr, "pop%s\t{", conditional);
13387 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13389 p = instr + strlen (instr);
13391 for (reg = 0; reg <= SP_REGNUM; reg++)
13392 if (live_regs_mask & (1 << reg))
13394 int l = strlen (reg_names[reg]);
13400 memcpy (p, ", ", 2);
13404 memcpy (p, "%|", 2);
13405 memcpy (p + 2, reg_names[reg], l);
13409 if (live_regs_mask & (1 << LR_REGNUM))
13411 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13412 /* If returning from an interrupt, restore the CPSR. */
13413 if (IS_INTERRUPT (func_type))
13420 output_asm_insn (instr, & operand);
13422 /* See if we need to generate an extra instruction to
13423 perform the actual function return. */
13425 && func_type != ARM_FT_INTERWORKED
13426 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13428 /* The return has already been handled
13429 by loading the LR into the PC. */
13436 switch ((int) ARM_FUNC_TYPE (func_type))
13440 /* ??? This is wrong for unified assembly syntax. */
13441 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13444 case ARM_FT_INTERWORKED:
13445 sprintf (instr, "bx%s\t%%|lr", conditional);
13448 case ARM_FT_EXCEPTION:
13449 /* ??? This is wrong for unified assembly syntax. */
13450 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13454 /* Use bx if it's available. */
13455 if (arm_arch5 || arm_arch4t)
13456 sprintf (instr, "bx%s\t%%|lr", conditional);
13458 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13462 output_asm_insn (instr, & operand);
13468 /* Write the function name into the code section, directly preceding
13469 the function prologue.
13471 Code will be output similar to this:
13473 .ascii "arm_poke_function_name", 0
13476 .word 0xff000000 + (t1 - t0)
13477 arm_poke_function_name
13479 stmfd sp!, {fp, ip, lr, pc}
13482 When performing a stack backtrace, code can inspect the value
13483 of 'pc' stored at 'fp' + 0. If the trace function then looks
13484 at location pc - 12 and the top 8 bits are set, then we know
13485 that there is a function name embedded immediately preceding this
13486 location and has length ((pc[-3]) & 0xff000000).
13488 We assume that pc is declared as a pointer to an unsigned long.
13490 It is of no benefit to output the function name if we are assembling
13491 a leaf function. These function types will not contain a stack
13492 backtrace structure, therefore it is not possible to determine the
13495 arm_poke_function_name (FILE *stream, const char *name)
13497 unsigned long alignlength;
13498 unsigned long length;
13501 length = strlen (name) + 1;
13502 alignlength = ROUND_UP_WORD (length);
13504 ASM_OUTPUT_ASCII (stream, name, length);
13505 ASM_OUTPUT_ALIGN (stream, 2);
13506 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13507 assemble_aligned_integer (UNITS_PER_WORD, x);
13510 /* Place some comments into the assembler stream
13511 describing the current function. */
13513 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13515 unsigned long func_type;
13519 thumb1_output_function_prologue (f, frame_size);
13523 /* Sanity check. */
13524 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13526 func_type = arm_current_func_type ();
13528 switch ((int) ARM_FUNC_TYPE (func_type))
13531 case ARM_FT_NORMAL:
13533 case ARM_FT_INTERWORKED:
13534 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13537 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13540 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13542 case ARM_FT_EXCEPTION:
13543 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13547 if (IS_NAKED (func_type))
13548 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13550 if (IS_VOLATILE (func_type))
13551 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13553 if (IS_NESTED (func_type))
13554 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13555 if (IS_STACKALIGN (func_type))
13556 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13558 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13560 crtl->args.pretend_args_size, frame_size);
13562 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13563 frame_pointer_needed,
13564 cfun->machine->uses_anonymous_args);
13566 if (cfun->machine->lr_save_eliminated)
13567 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13569 if (crtl->calls_eh_return)
13570 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13575 arm_output_epilogue (rtx sibling)
13578 unsigned long saved_regs_mask;
13579 unsigned long func_type;
13580 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13581 frame that is $fp + 4 for a non-variadic function. */
13582 int floats_offset = 0;
13584 FILE * f = asm_out_file;
13585 unsigned int lrm_count = 0;
13586 int really_return = (sibling == NULL);
13588 arm_stack_offsets *offsets;
13590 /* If we have already generated the return instruction
13591 then it is futile to generate anything else. */
13592 if (use_return_insn (FALSE, sibling) &&
13593 (cfun->machine->return_used_this_function != 0))
13596 func_type = arm_current_func_type ();
13598 if (IS_NAKED (func_type))
13599 /* Naked functions don't have epilogues. */
13602 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13606 /* A volatile function should never return. Call abort. */
13607 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13608 assemble_external_libcall (op);
13609 output_asm_insn ("bl\t%a0", &op);
13614 /* If we are throwing an exception, then we really must be doing a
13615 return, so we can't tail-call. */
13616 gcc_assert (!crtl->calls_eh_return || really_return);
13618 offsets = arm_get_frame_offsets ();
13619 saved_regs_mask = offsets->saved_regs_mask;
13622 lrm_count = bit_count (saved_regs_mask);
13624 floats_offset = offsets->saved_args;
13625 /* Compute how far away the floats will be. */
13626 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13627 if (saved_regs_mask & (1 << reg))
13628 floats_offset += 4;
13630 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13632 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13633 int vfp_offset = offsets->frame;
13635 if (TARGET_FPA_EMU2)
13637 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13638 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13640 floats_offset += 12;
13641 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13642 reg, FP_REGNUM, floats_offset - vfp_offset);
13647 start_reg = LAST_FPA_REGNUM;
13649 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13651 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13653 floats_offset += 12;
13655 /* We can't unstack more than four registers at once. */
13656 if (start_reg - reg == 3)
13658 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13659 reg, FP_REGNUM, floats_offset - vfp_offset);
13660 start_reg = reg - 1;
13665 if (reg != start_reg)
13666 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13667 reg + 1, start_reg - reg,
13668 FP_REGNUM, floats_offset - vfp_offset);
13669 start_reg = reg - 1;
13673 /* Just in case the last register checked also needs unstacking. */
13674 if (reg != start_reg)
13675 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13676 reg + 1, start_reg - reg,
13677 FP_REGNUM, floats_offset - vfp_offset);
13680 if (TARGET_HARD_FLOAT && TARGET_VFP)
13684 /* The fldmd insns do not have base+offset addressing
13685 modes, so we use IP to hold the address. */
13686 saved_size = arm_get_vfp_saved_size ();
13688 if (saved_size > 0)
13690 floats_offset += saved_size;
13691 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13692 FP_REGNUM, floats_offset - vfp_offset);
13694 start_reg = FIRST_VFP_REGNUM;
13695 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13697 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13698 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13700 if (start_reg != reg)
13701 vfp_output_fldmd (f, IP_REGNUM,
13702 (start_reg - FIRST_VFP_REGNUM) / 2,
13703 (reg - start_reg) / 2);
13704 start_reg = reg + 2;
13707 if (start_reg != reg)
13708 vfp_output_fldmd (f, IP_REGNUM,
13709 (start_reg - FIRST_VFP_REGNUM) / 2,
13710 (reg - start_reg) / 2);
13715 /* The frame pointer is guaranteed to be non-double-word aligned.
13716 This is because it is set to (old_stack_pointer - 4) and the
13717 old_stack_pointer was double word aligned. Thus the offset to
13718 the iWMMXt registers to be loaded must also be non-double-word
13719 sized, so that the resultant address *is* double-word aligned.
13720 We can ignore floats_offset since that was already included in
13721 the live_regs_mask. */
13722 lrm_count += (lrm_count % 2 ? 2 : 1);
13724 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13725 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13727 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13728 reg, FP_REGNUM, lrm_count * 4);
13733 /* saved_regs_mask should contain the IP, which at the time of stack
13734 frame generation actually contains the old stack pointer. So a
13735 quick way to unwind the stack is just pop the IP register directly
13736 into the stack pointer. */
13737 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13738 saved_regs_mask &= ~ (1 << IP_REGNUM);
13739 saved_regs_mask |= (1 << SP_REGNUM);
13741 /* There are two registers left in saved_regs_mask - LR and PC. We
13742 only need to restore the LR register (the return address), but to
13743 save time we can load it directly into the PC, unless we need a
13744 special function exit sequence, or we are not really returning. */
13746 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13747 && !crtl->calls_eh_return)
13748 /* Delete the LR from the register mask, so that the LR on
13749 the stack is loaded into the PC in the register mask. */
13750 saved_regs_mask &= ~ (1 << LR_REGNUM);
13752 saved_regs_mask &= ~ (1 << PC_REGNUM);
13754 /* We must use SP as the base register, because SP is one of the
13755 registers being restored. If an interrupt or page fault
13756 happens in the ldm instruction, the SP might or might not
13757 have been restored. That would be bad, as then SP will no
13758 longer indicate the safe area of stack, and we can get stack
13759 corruption. Using SP as the base register means that it will
13760 be reset correctly to the original value, should an interrupt
13761 occur. If the stack pointer already points at the right
13762 place, then omit the subtraction. */
13763 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13764 || cfun->calls_alloca)
13765 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13766 4 * bit_count (saved_regs_mask));
13767 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13769 if (IS_INTERRUPT (func_type))
13770 /* Interrupt handlers will have pushed the
13771 IP onto the stack, so restore it now. */
13772 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13776 /* This branch is executed for ARM mode (non-apcs frames) and
13777 Thumb-2 mode. Frame layout is essentially the same for those
13778 cases, except that in ARM mode frame pointer points to the
13779 first saved register, while in Thumb-2 mode the frame pointer points
13780 to the last saved register.
13782 It is possible to make frame pointer point to last saved
13783 register in both cases, and remove some conditionals below.
13784 That means that fp setup in prologue would be just "mov fp, sp"
13785 and sp restore in epilogue would be just "mov sp, fp", whereas
13786 now we have to use add/sub in those cases. However, the value
13787 of that would be marginal, as both mov and add/sub are 32-bit
13788 in ARM mode, and it would require extra conditionals
13789 in arm_expand_prologue to distingish ARM-apcs-frame case
13790 (where frame pointer is required to point at first register)
13791 and ARM-non-apcs-frame. Therefore, such change is postponed
13792 until real need arise. */
13793 unsigned HOST_WIDE_INT amount;
13795 /* Restore stack pointer if necessary. */
13796 if (TARGET_ARM && frame_pointer_needed)
13798 operands[0] = stack_pointer_rtx;
13799 operands[1] = hard_frame_pointer_rtx;
13801 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13802 output_add_immediate (operands);
13806 if (frame_pointer_needed)
13808 /* For Thumb-2 restore sp from the frame pointer.
13809 Operand restrictions mean we have to incrememnt FP, then copy
13811 amount = offsets->locals_base - offsets->saved_regs;
13812 operands[0] = hard_frame_pointer_rtx;
13816 unsigned long count;
13817 operands[0] = stack_pointer_rtx;
13818 amount = offsets->outgoing_args - offsets->saved_regs;
13819 /* pop call clobbered registers if it avoids a
13820 separate stack adjustment. */
13821 count = offsets->saved_regs - offsets->saved_args;
13824 && !crtl->calls_eh_return
13825 && bit_count(saved_regs_mask) * 4 == count
13826 && !IS_INTERRUPT (func_type)
13827 && !crtl->tail_call_emit)
13829 unsigned long mask;
13830 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13832 mask &= ~saved_regs_mask;
13834 while (bit_count (mask) * 4 > amount)
13836 while ((mask & (1 << reg)) == 0)
13838 mask &= ~(1 << reg);
13840 if (bit_count (mask) * 4 == amount) {
13842 saved_regs_mask |= mask;
13849 operands[1] = operands[0];
13850 operands[2] = GEN_INT (amount);
13851 output_add_immediate (operands);
13853 if (frame_pointer_needed)
13854 asm_fprintf (f, "\tmov\t%r, %r\n",
13855 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13858 if (TARGET_FPA_EMU2)
13860 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13861 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13862 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13867 start_reg = FIRST_FPA_REGNUM;
13869 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13871 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13873 if (reg - start_reg == 3)
13875 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13876 start_reg, SP_REGNUM);
13877 start_reg = reg + 1;
13882 if (reg != start_reg)
13883 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13884 start_reg, reg - start_reg,
13887 start_reg = reg + 1;
13891 /* Just in case the last register checked also needs unstacking. */
13892 if (reg != start_reg)
13893 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13894 start_reg, reg - start_reg, SP_REGNUM);
13897 if (TARGET_HARD_FLOAT && TARGET_VFP)
13899 int end_reg = LAST_VFP_REGNUM + 1;
13901 /* Scan the registers in reverse order. We need to match
13902 any groupings made in the prologue and generate matching
13904 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13906 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13907 && (!df_regs_ever_live_p (reg + 1)
13908 || call_used_regs[reg + 1]))
13910 if (end_reg > reg + 2)
13911 vfp_output_fldmd (f, SP_REGNUM,
13912 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13913 (end_reg - (reg + 2)) / 2);
13917 if (end_reg > reg + 2)
13918 vfp_output_fldmd (f, SP_REGNUM, 0,
13919 (end_reg - (reg + 2)) / 2);
13923 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13924 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13925 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13927 /* If we can, restore the LR into the PC. */
13928 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13929 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13930 && !IS_STACKALIGN (func_type)
13932 && crtl->args.pretend_args_size == 0
13933 && saved_regs_mask & (1 << LR_REGNUM)
13934 && !crtl->calls_eh_return)
13936 saved_regs_mask &= ~ (1 << LR_REGNUM);
13937 saved_regs_mask |= (1 << PC_REGNUM);
13938 rfe = IS_INTERRUPT (func_type);
13943 /* Load the registers off the stack. If we only have one register
13944 to load use the LDR instruction - it is faster. For Thumb-2
13945 always use pop and the assembler will pick the best instruction.*/
13946 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13947 && !IS_INTERRUPT(func_type))
13949 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13951 else if (saved_regs_mask)
13953 if (saved_regs_mask & (1 << SP_REGNUM))
13954 /* Note - write back to the stack register is not enabled
13955 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13956 in the list of registers and if we add writeback the
13957 instruction becomes UNPREDICTABLE. */
13958 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13960 else if (TARGET_ARM)
13961 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13964 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13967 if (crtl->args.pretend_args_size)
13969 /* Unwind the pre-pushed regs. */
13970 operands[0] = operands[1] = stack_pointer_rtx;
13971 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13972 output_add_immediate (operands);
13976 /* We may have already restored PC directly from the stack. */
13977 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13980 /* Stack adjustment for exception handler. */
13981 if (crtl->calls_eh_return)
13982 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13983 ARM_EH_STACKADJ_REGNUM);
13985 /* Generate the return instruction. */
13986 switch ((int) ARM_FUNC_TYPE (func_type))
13990 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13993 case ARM_FT_EXCEPTION:
13994 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13997 case ARM_FT_INTERWORKED:
13998 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14002 if (IS_STACKALIGN (func_type))
14004 /* See comment in arm_expand_prologue. */
14005 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14007 if (arm_arch5 || arm_arch4t)
14008 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14010 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14018 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14019 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14021 arm_stack_offsets *offsets;
14027 /* Emit any call-via-reg trampolines that are needed for v4t support
14028 of call_reg and call_value_reg type insns. */
14029 for (regno = 0; regno < LR_REGNUM; regno++)
14031 rtx label = cfun->machine->call_via[regno];
14035 switch_to_section (function_section (current_function_decl));
14036 targetm.asm_out.internal_label (asm_out_file, "L",
14037 CODE_LABEL_NUMBER (label));
14038 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14042 /* ??? Probably not safe to set this here, since it assumes that a
14043 function will be emitted as assembly immediately after we generate
14044 RTL for it. This does not happen for inline functions. */
14045 cfun->machine->return_used_this_function = 0;
14047 else /* TARGET_32BIT */
14049 /* We need to take into account any stack-frame rounding. */
14050 offsets = arm_get_frame_offsets ();
14052 gcc_assert (!use_return_insn (FALSE, NULL)
14053 || (cfun->machine->return_used_this_function != 0)
14054 || offsets->saved_regs == offsets->outgoing_args
14055 || frame_pointer_needed);
14057 /* Reset the ARM-specific per-function variables. */
14058 after_arm_reorg = 0;
14062 /* Generate and emit an insn that we will recognize as a push_multi.
14063 Unfortunately, since this insn does not reflect very well the actual
14064 semantics of the operation, we need to annotate the insn for the benefit
14065 of DWARF2 frame unwind information. */
14067 emit_multi_reg_push (unsigned long mask)
14070 int num_dwarf_regs;
14074 int dwarf_par_index;
14077 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14078 if (mask & (1 << i))
14081 gcc_assert (num_regs && num_regs <= 16);
14083 /* We don't record the PC in the dwarf frame information. */
14084 num_dwarf_regs = num_regs;
14085 if (mask & (1 << PC_REGNUM))
14088 /* For the body of the insn we are going to generate an UNSPEC in
14089 parallel with several USEs. This allows the insn to be recognized
14090 by the push_multi pattern in the arm.md file.
14092 The body of the insn looks something like this:
14095 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14096 (const_int:SI <num>)))
14097 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14103 For the frame note however, we try to be more explicit and actually
14104 show each register being stored into the stack frame, plus a (single)
14105 decrement of the stack pointer. We do it this way in order to be
14106 friendly to the stack unwinding code, which only wants to see a single
14107 stack decrement per instruction. The RTL we generate for the note looks
14108 something like this:
14111 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14112 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14113 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14114 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14118 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14119 instead we'd have a parallel expression detailing all
14120 the stores to the various memory addresses so that debug
14121 information is more up-to-date. Remember however while writing
14122 this to take care of the constraints with the push instruction.
14124 Note also that this has to be taken care of for the VFP registers.
14126 For more see PR43399. */
14128 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14129 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14130 dwarf_par_index = 1;
14132 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14134 if (mask & (1 << i))
14136 reg = gen_rtx_REG (SImode, i);
14138 XVECEXP (par, 0, 0)
14139 = gen_rtx_SET (VOIDmode,
14142 gen_rtx_PRE_MODIFY (Pmode,
14145 (stack_pointer_rtx,
14148 gen_rtx_UNSPEC (BLKmode,
14149 gen_rtvec (1, reg),
14150 UNSPEC_PUSH_MULT));
14152 if (i != PC_REGNUM)
14154 tmp = gen_rtx_SET (VOIDmode,
14155 gen_frame_mem (SImode, stack_pointer_rtx),
14157 RTX_FRAME_RELATED_P (tmp) = 1;
14158 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14166 for (j = 1, i++; j < num_regs; i++)
14168 if (mask & (1 << i))
14170 reg = gen_rtx_REG (SImode, i);
14172 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14174 if (i != PC_REGNUM)
14177 = gen_rtx_SET (VOIDmode,
14180 plus_constant (stack_pointer_rtx,
14183 RTX_FRAME_RELATED_P (tmp) = 1;
14184 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14191 par = emit_insn (par);
14193 tmp = gen_rtx_SET (VOIDmode,
14195 plus_constant (stack_pointer_rtx, -4 * num_regs));
14196 RTX_FRAME_RELATED_P (tmp) = 1;
14197 XVECEXP (dwarf, 0, 0) = tmp;
14199 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14204 /* Calculate the size of the return value that is passed in registers. */
14206 arm_size_return_regs (void)
14208 enum machine_mode mode;
14210 if (crtl->return_rtx != 0)
14211 mode = GET_MODE (crtl->return_rtx);
14213 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14215 return GET_MODE_SIZE (mode);
14219 emit_sfm (int base_reg, int count)
14226 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14227 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14229 reg = gen_rtx_REG (XFmode, base_reg++);
14231 XVECEXP (par, 0, 0)
14232 = gen_rtx_SET (VOIDmode,
14235 gen_rtx_PRE_MODIFY (Pmode,
14238 (stack_pointer_rtx,
14241 gen_rtx_UNSPEC (BLKmode,
14242 gen_rtvec (1, reg),
14243 UNSPEC_PUSH_MULT));
14244 tmp = gen_rtx_SET (VOIDmode,
14245 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14246 RTX_FRAME_RELATED_P (tmp) = 1;
14247 XVECEXP (dwarf, 0, 1) = tmp;
14249 for (i = 1; i < count; i++)
14251 reg = gen_rtx_REG (XFmode, base_reg++);
14252 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14254 tmp = gen_rtx_SET (VOIDmode,
14255 gen_frame_mem (XFmode,
14256 plus_constant (stack_pointer_rtx,
14259 RTX_FRAME_RELATED_P (tmp) = 1;
14260 XVECEXP (dwarf, 0, i + 1) = tmp;
14263 tmp = gen_rtx_SET (VOIDmode,
14265 plus_constant (stack_pointer_rtx, -12 * count));
14267 RTX_FRAME_RELATED_P (tmp) = 1;
14268 XVECEXP (dwarf, 0, 0) = tmp;
14270 par = emit_insn (par);
14271 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14277 /* Return true if the current function needs to save/restore LR. */
14280 thumb_force_lr_save (void)
14282 return !cfun->machine->lr_save_eliminated
14283 && (!leaf_function_p ()
14284 || thumb_far_jump_used_p ()
14285 || df_regs_ever_live_p (LR_REGNUM));
14289 /* Compute the distance from register FROM to register TO.
14290 These can be the arg pointer (26), the soft frame pointer (25),
14291 the stack pointer (13) or the hard frame pointer (11).
14292 In thumb mode r7 is used as the soft frame pointer, if needed.
14293 Typical stack layout looks like this:
14295 old stack pointer -> | |
14298 | | saved arguments for
14299 | | vararg functions
14302 hard FP & arg pointer -> | | \
14310 soft frame pointer -> | | /
14315 locals base pointer -> | | /
14320 current stack pointer -> | | /
14323 For a given function some or all of these stack components
14324 may not be needed, giving rise to the possibility of
14325 eliminating some of the registers.
14327 The values returned by this function must reflect the behavior
14328 of arm_expand_prologue() and arm_compute_save_reg_mask().
14330 The sign of the number returned reflects the direction of stack
14331 growth, so the values are positive for all eliminations except
14332 from the soft frame pointer to the hard frame pointer.
14334 SFP may point just inside the local variables block to ensure correct
14338 /* Calculate stack offsets. These are used to calculate register elimination
14339 offsets and in prologue/epilogue code. Also calculates which registers
14340 should be saved. */
14342 static arm_stack_offsets *
14343 arm_get_frame_offsets (void)
14345 struct arm_stack_offsets *offsets;
14346 unsigned long func_type;
14350 HOST_WIDE_INT frame_size;
14353 offsets = &cfun->machine->stack_offsets;
14355 /* We need to know if we are a leaf function. Unfortunately, it
14356 is possible to be called after start_sequence has been called,
14357 which causes get_insns to return the insns for the sequence,
14358 not the function, which will cause leaf_function_p to return
14359 the incorrect result.
14361 to know about leaf functions once reload has completed, and the
14362 frame size cannot be changed after that time, so we can safely
14363 use the cached value. */
14365 if (reload_completed)
14368 /* Initially this is the size of the local variables. It will translated
14369 into an offset once we have determined the size of preceding data. */
14370 frame_size = ROUND_UP_WORD (get_frame_size ());
14372 leaf = leaf_function_p ();
14374 /* Space for variadic functions. */
14375 offsets->saved_args = crtl->args.pretend_args_size;
14377 /* In Thumb mode this is incorrect, but never used. */
14378 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14379 arm_compute_static_chain_stack_bytes();
14383 unsigned int regno;
14385 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14386 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14387 saved = core_saved;
14389 /* We know that SP will be doubleword aligned on entry, and we must
14390 preserve that condition at any subroutine call. We also require the
14391 soft frame pointer to be doubleword aligned. */
14393 if (TARGET_REALLY_IWMMXT)
14395 /* Check for the call-saved iWMMXt registers. */
14396 for (regno = FIRST_IWMMXT_REGNUM;
14397 regno <= LAST_IWMMXT_REGNUM;
14399 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14403 func_type = arm_current_func_type ();
14404 if (! IS_VOLATILE (func_type))
14406 /* Space for saved FPA registers. */
14407 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14408 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14411 /* Space for saved VFP registers. */
14412 if (TARGET_HARD_FLOAT && TARGET_VFP)
14413 saved += arm_get_vfp_saved_size ();
14416 else /* TARGET_THUMB1 */
14418 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14419 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14420 saved = core_saved;
14421 if (TARGET_BACKTRACE)
14425 /* Saved registers include the stack frame. */
14426 offsets->saved_regs = offsets->saved_args + saved +
14427 arm_compute_static_chain_stack_bytes();
14428 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14429 /* A leaf function does not need any stack alignment if it has nothing
14431 if (leaf && frame_size == 0)
14433 offsets->outgoing_args = offsets->soft_frame;
14434 offsets->locals_base = offsets->soft_frame;
14438 /* Ensure SFP has the correct alignment. */
14439 if (ARM_DOUBLEWORD_ALIGN
14440 && (offsets->soft_frame & 7))
14442 offsets->soft_frame += 4;
14443 /* Try to align stack by pushing an extra reg. Don't bother doing this
14444 when there is a stack frame as the alignment will be rolled into
14445 the normal stack adjustment. */
14446 if (frame_size + crtl->outgoing_args_size == 0)
14450 /* If it is safe to use r3, then do so. This sometimes
14451 generates better code on Thumb-2 by avoiding the need to
14452 use 32-bit push/pop instructions. */
14453 if (!crtl->tail_call_emit
14454 && arm_size_return_regs () <= 12)
14459 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14461 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14470 offsets->saved_regs += 4;
14471 offsets->saved_regs_mask |= (1 << reg);
14476 offsets->locals_base = offsets->soft_frame + frame_size;
14477 offsets->outgoing_args = (offsets->locals_base
14478 + crtl->outgoing_args_size);
14480 if (ARM_DOUBLEWORD_ALIGN)
14482 /* Ensure SP remains doubleword aligned. */
14483 if (offsets->outgoing_args & 7)
14484 offsets->outgoing_args += 4;
14485 gcc_assert (!(offsets->outgoing_args & 7));
14492 /* Calculate the relative offsets for the different stack pointers. Positive
14493 offsets are in the direction of stack growth. */
14496 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14498 arm_stack_offsets *offsets;
14500 offsets = arm_get_frame_offsets ();
14502 /* OK, now we have enough information to compute the distances.
14503 There must be an entry in these switch tables for each pair
14504 of registers in ELIMINABLE_REGS, even if some of the entries
14505 seem to be redundant or useless. */
14508 case ARG_POINTER_REGNUM:
14511 case THUMB_HARD_FRAME_POINTER_REGNUM:
14514 case FRAME_POINTER_REGNUM:
14515 /* This is the reverse of the soft frame pointer
14516 to hard frame pointer elimination below. */
14517 return offsets->soft_frame - offsets->saved_args;
14519 case ARM_HARD_FRAME_POINTER_REGNUM:
14520 /* This is only non-zero in the case where the static chain register
14521 is stored above the frame. */
14522 return offsets->frame - offsets->saved_args - 4;
14524 case STACK_POINTER_REGNUM:
14525 /* If nothing has been pushed on the stack at all
14526 then this will return -4. This *is* correct! */
14527 return offsets->outgoing_args - (offsets->saved_args + 4);
14530 gcc_unreachable ();
14532 gcc_unreachable ();
14534 case FRAME_POINTER_REGNUM:
14537 case THUMB_HARD_FRAME_POINTER_REGNUM:
14540 case ARM_HARD_FRAME_POINTER_REGNUM:
14541 /* The hard frame pointer points to the top entry in the
14542 stack frame. The soft frame pointer to the bottom entry
14543 in the stack frame. If there is no stack frame at all,
14544 then they are identical. */
14546 return offsets->frame - offsets->soft_frame;
14548 case STACK_POINTER_REGNUM:
14549 return offsets->outgoing_args - offsets->soft_frame;
14552 gcc_unreachable ();
14554 gcc_unreachable ();
14557 /* You cannot eliminate from the stack pointer.
14558 In theory you could eliminate from the hard frame
14559 pointer to the stack pointer, but this will never
14560 happen, since if a stack frame is not needed the
14561 hard frame pointer will never be used. */
14562 gcc_unreachable ();
14566 /* Given FROM and TO register numbers, say whether this elimination is
14567 allowed. Frame pointer elimination is automatically handled.
14569 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14570 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14571 pointer, we must eliminate FRAME_POINTER_REGNUM into
14572 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14573 ARG_POINTER_REGNUM. */
14576 arm_can_eliminate (const int from, const int to)
14578 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14579 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14580 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14581 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14585 /* Emit RTL to save coprocessor registers on function entry. Returns the
14586 number of bytes pushed. */
14589 arm_save_coproc_regs(void)
14591 int saved_size = 0;
14593 unsigned start_reg;
14596 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14597 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14599 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14600 insn = gen_rtx_MEM (V2SImode, insn);
14601 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14602 RTX_FRAME_RELATED_P (insn) = 1;
14606 /* Save any floating point call-saved registers used by this
14608 if (TARGET_FPA_EMU2)
14610 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14611 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14613 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14614 insn = gen_rtx_MEM (XFmode, insn);
14615 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14616 RTX_FRAME_RELATED_P (insn) = 1;
14622 start_reg = LAST_FPA_REGNUM;
14624 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14626 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14628 if (start_reg - reg == 3)
14630 insn = emit_sfm (reg, 4);
14631 RTX_FRAME_RELATED_P (insn) = 1;
14633 start_reg = reg - 1;
14638 if (start_reg != reg)
14640 insn = emit_sfm (reg + 1, start_reg - reg);
14641 RTX_FRAME_RELATED_P (insn) = 1;
14642 saved_size += (start_reg - reg) * 12;
14644 start_reg = reg - 1;
14648 if (start_reg != reg)
14650 insn = emit_sfm (reg + 1, start_reg - reg);
14651 saved_size += (start_reg - reg) * 12;
14652 RTX_FRAME_RELATED_P (insn) = 1;
14655 if (TARGET_HARD_FLOAT && TARGET_VFP)
14657 start_reg = FIRST_VFP_REGNUM;
14659 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14661 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14662 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14664 if (start_reg != reg)
14665 saved_size += vfp_emit_fstmd (start_reg,
14666 (reg - start_reg) / 2);
14667 start_reg = reg + 2;
14670 if (start_reg != reg)
14671 saved_size += vfp_emit_fstmd (start_reg,
14672 (reg - start_reg) / 2);
14678 /* Set the Thumb frame pointer from the stack pointer. */
14681 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14683 HOST_WIDE_INT amount;
14686 amount = offsets->outgoing_args - offsets->locals_base;
14688 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14689 stack_pointer_rtx, GEN_INT (amount)));
14692 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14693 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14694 expects the first two operands to be the same. */
14697 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14699 hard_frame_pointer_rtx));
14703 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14704 hard_frame_pointer_rtx,
14705 stack_pointer_rtx));
14707 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14708 plus_constant (stack_pointer_rtx, amount));
14709 RTX_FRAME_RELATED_P (dwarf) = 1;
14710 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14713 RTX_FRAME_RELATED_P (insn) = 1;
14716 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14719 arm_expand_prologue (void)
14724 unsigned long live_regs_mask;
14725 unsigned long func_type;
14727 int saved_pretend_args = 0;
14728 int saved_regs = 0;
14729 unsigned HOST_WIDE_INT args_to_push;
14730 arm_stack_offsets *offsets;
14732 func_type = arm_current_func_type ();
14734 /* Naked functions don't have prologues. */
14735 if (IS_NAKED (func_type))
14738 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14739 args_to_push = crtl->args.pretend_args_size;
14741 /* Compute which register we will have to save onto the stack. */
14742 offsets = arm_get_frame_offsets ();
14743 live_regs_mask = offsets->saved_regs_mask;
14745 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14747 if (IS_STACKALIGN (func_type))
14752 /* Handle a word-aligned stack pointer. We generate the following:
14757 <save and restore r0 in normal prologue/epilogue>
14761 The unwinder doesn't need to know about the stack realignment.
14762 Just tell it we saved SP in r0. */
14763 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14765 r0 = gen_rtx_REG (SImode, 0);
14766 r1 = gen_rtx_REG (SImode, 1);
14767 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14768 compiler won't choke. */
14769 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14770 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14771 insn = gen_movsi (r0, stack_pointer_rtx);
14772 RTX_FRAME_RELATED_P (insn) = 1;
14773 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14775 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14776 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14779 /* For APCS frames, if IP register is clobbered
14780 when creating frame, save that register in a special
14782 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14784 if (IS_INTERRUPT (func_type))
14786 /* Interrupt functions must not corrupt any registers.
14787 Creating a frame pointer however, corrupts the IP
14788 register, so we must push it first. */
14789 insn = emit_multi_reg_push (1 << IP_REGNUM);
14791 /* Do not set RTX_FRAME_RELATED_P on this insn.
14792 The dwarf stack unwinding code only wants to see one
14793 stack decrement per function, and this is not it. If
14794 this instruction is labeled as being part of the frame
14795 creation sequence then dwarf2out_frame_debug_expr will
14796 die when it encounters the assignment of IP to FP
14797 later on, since the use of SP here establishes SP as
14798 the CFA register and not IP.
14800 Anyway this instruction is not really part of the stack
14801 frame creation although it is part of the prologue. */
14803 else if (IS_NESTED (func_type))
14805 /* The Static chain register is the same as the IP register
14806 used as a scratch register during stack frame creation.
14807 To get around this need to find somewhere to store IP
14808 whilst the frame is being created. We try the following
14811 1. The last argument register.
14812 2. A slot on the stack above the frame. (This only
14813 works if the function is not a varargs function).
14814 3. Register r3, after pushing the argument registers
14817 Note - we only need to tell the dwarf2 backend about the SP
14818 adjustment in the second variant; the static chain register
14819 doesn't need to be unwound, as it doesn't contain a value
14820 inherited from the caller. */
14822 if (df_regs_ever_live_p (3) == false)
14823 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14824 else if (args_to_push == 0)
14828 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14831 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14832 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14835 /* Just tell the dwarf backend that we adjusted SP. */
14836 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14837 plus_constant (stack_pointer_rtx,
14839 RTX_FRAME_RELATED_P (insn) = 1;
14840 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14844 /* Store the args on the stack. */
14845 if (cfun->machine->uses_anonymous_args)
14846 insn = emit_multi_reg_push
14847 ((0xf0 >> (args_to_push / 4)) & 0xf);
14850 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14851 GEN_INT (- args_to_push)));
14853 RTX_FRAME_RELATED_P (insn) = 1;
14855 saved_pretend_args = 1;
14856 fp_offset = args_to_push;
14859 /* Now reuse r3 to preserve IP. */
14860 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14864 insn = emit_set_insn (ip_rtx,
14865 plus_constant (stack_pointer_rtx, fp_offset));
14866 RTX_FRAME_RELATED_P (insn) = 1;
14871 /* Push the argument registers, or reserve space for them. */
14872 if (cfun->machine->uses_anonymous_args)
14873 insn = emit_multi_reg_push
14874 ((0xf0 >> (args_to_push / 4)) & 0xf);
14877 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14878 GEN_INT (- args_to_push)));
14879 RTX_FRAME_RELATED_P (insn) = 1;
14882 /* If this is an interrupt service routine, and the link register
14883 is going to be pushed, and we're not generating extra
14884 push of IP (needed when frame is needed and frame layout if apcs),
14885 subtracting four from LR now will mean that the function return
14886 can be done with a single instruction. */
14887 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14888 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14889 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14892 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14894 emit_set_insn (lr, plus_constant (lr, -4));
14897 if (live_regs_mask)
14899 saved_regs += bit_count (live_regs_mask) * 4;
14900 if (optimize_size && !frame_pointer_needed
14901 && saved_regs == offsets->saved_regs - offsets->saved_args)
14903 /* If no coprocessor registers are being pushed and we don't have
14904 to worry about a frame pointer then push extra registers to
14905 create the stack frame. This is done is a way that does not
14906 alter the frame layout, so is independent of the epilogue. */
14910 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14912 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14913 if (frame && n * 4 >= frame)
14916 live_regs_mask |= (1 << n) - 1;
14917 saved_regs += frame;
14920 insn = emit_multi_reg_push (live_regs_mask);
14921 RTX_FRAME_RELATED_P (insn) = 1;
14924 if (! IS_VOLATILE (func_type))
14925 saved_regs += arm_save_coproc_regs ();
14927 if (frame_pointer_needed && TARGET_ARM)
14929 /* Create the new frame pointer. */
14930 if (TARGET_APCS_FRAME)
14932 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14933 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14934 RTX_FRAME_RELATED_P (insn) = 1;
14936 if (IS_NESTED (func_type))
14938 /* Recover the static chain register. */
14939 if (!df_regs_ever_live_p (3)
14940 || saved_pretend_args)
14941 insn = gen_rtx_REG (SImode, 3);
14942 else /* if (crtl->args.pretend_args_size == 0) */
14944 insn = plus_constant (hard_frame_pointer_rtx, 4);
14945 insn = gen_frame_mem (SImode, insn);
14947 emit_set_insn (ip_rtx, insn);
14948 /* Add a USE to stop propagate_one_insn() from barfing. */
14949 emit_insn (gen_prologue_use (ip_rtx));
14954 insn = GEN_INT (saved_regs - 4);
14955 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14956 stack_pointer_rtx, insn));
14957 RTX_FRAME_RELATED_P (insn) = 1;
14961 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14963 /* This add can produce multiple insns for a large constant, so we
14964 need to get tricky. */
14965 rtx last = get_last_insn ();
14967 amount = GEN_INT (offsets->saved_args + saved_regs
14968 - offsets->outgoing_args);
14970 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14974 last = last ? NEXT_INSN (last) : get_insns ();
14975 RTX_FRAME_RELATED_P (last) = 1;
14977 while (last != insn);
14979 /* If the frame pointer is needed, emit a special barrier that
14980 will prevent the scheduler from moving stores to the frame
14981 before the stack adjustment. */
14982 if (frame_pointer_needed)
14983 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14984 hard_frame_pointer_rtx));
14988 if (frame_pointer_needed && TARGET_THUMB2)
14989 thumb_set_frame_pointer (offsets);
14991 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14993 unsigned long mask;
14995 mask = live_regs_mask;
14996 mask &= THUMB2_WORK_REGS;
14997 if (!IS_NESTED (func_type))
14998 mask |= (1 << IP_REGNUM);
14999 arm_load_pic_register (mask);
15002 /* If we are profiling, make sure no instructions are scheduled before
15003 the call to mcount. Similarly if the user has requested no
15004 scheduling in the prolog. Similarly if we want non-call exceptions
15005 using the EABI unwinder, to prevent faulting instructions from being
15006 swapped with a stack adjustment. */
15007 if (crtl->profile || !TARGET_SCHED_PROLOG
15008 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
15009 emit_insn (gen_blockage ());
15011 /* If the link register is being kept alive, with the return address in it,
15012 then make sure that it does not get reused by the ce2 pass. */
15013 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15014 cfun->machine->lr_save_eliminated = 1;
15017 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15019 arm_print_condition (FILE *stream)
15021 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15023 /* Branch conversion is not implemented for Thumb-2. */
15026 output_operand_lossage ("predicated Thumb instruction");
15029 if (current_insn_predicate != NULL)
15031 output_operand_lossage
15032 ("predicated instruction in conditional sequence");
15036 fputs (arm_condition_codes[arm_current_cc], stream);
15038 else if (current_insn_predicate)
15040 enum arm_cond_code code;
15044 output_operand_lossage ("predicated Thumb instruction");
15048 code = get_arm_condition_code (current_insn_predicate);
15049 fputs (arm_condition_codes[code], stream);
15054 /* If CODE is 'd', then the X is a condition operand and the instruction
15055 should only be executed if the condition is true.
15056 if CODE is 'D', then the X is a condition operand and the instruction
15057 should only be executed if the condition is false: however, if the mode
15058 of the comparison is CCFPEmode, then always execute the instruction -- we
15059 do this because in these circumstances !GE does not necessarily imply LT;
15060 in these cases the instruction pattern will take care to make sure that
15061 an instruction containing %d will follow, thereby undoing the effects of
15062 doing this instruction unconditionally.
15063 If CODE is 'N' then X is a floating point operand that must be negated
15065 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15066 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15068 arm_print_operand (FILE *stream, rtx x, int code)
15073 fputs (ASM_COMMENT_START, stream);
15077 fputs (user_label_prefix, stream);
15081 fputs (REGISTER_PREFIX, stream);
15085 arm_print_condition (stream);
15089 /* Nothing in unified syntax, otherwise the current condition code. */
15090 if (!TARGET_UNIFIED_ASM)
15091 arm_print_condition (stream);
15095 /* The current condition code in unified syntax, otherwise nothing. */
15096 if (TARGET_UNIFIED_ASM)
15097 arm_print_condition (stream);
15101 /* The current condition code for a condition code setting instruction.
15102 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15103 if (TARGET_UNIFIED_ASM)
15105 fputc('s', stream);
15106 arm_print_condition (stream);
15110 arm_print_condition (stream);
15111 fputc('s', stream);
15116 /* If the instruction is conditionally executed then print
15117 the current condition code, otherwise print 's'. */
15118 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15119 if (current_insn_predicate)
15120 arm_print_condition (stream);
15122 fputc('s', stream);
15125 /* %# is a "break" sequence. It doesn't output anything, but is used to
15126 separate e.g. operand numbers from following text, if that text consists
15127 of further digits which we don't want to be part of the operand
15135 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15136 r = real_value_negate (&r);
15137 fprintf (stream, "%s", fp_const_from_val (&r));
15141 /* An integer or symbol address without a preceding # sign. */
15143 switch (GET_CODE (x))
15146 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15150 output_addr_const (stream, x);
15154 gcc_unreachable ();
15159 if (GET_CODE (x) == CONST_INT)
15162 val = ARM_SIGN_EXTEND (~INTVAL (x));
15163 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15167 putc ('~', stream);
15168 output_addr_const (stream, x);
15173 /* The low 16 bits of an immediate constant. */
15174 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15178 fprintf (stream, "%s", arithmetic_instr (x, 1));
15181 /* Truncate Cirrus shift counts. */
15183 if (GET_CODE (x) == CONST_INT)
15185 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15188 arm_print_operand (stream, x, 0);
15192 fprintf (stream, "%s", arithmetic_instr (x, 0));
15200 if (!shift_operator (x, SImode))
15202 output_operand_lossage ("invalid shift operand");
15206 shift = shift_op (x, &val);
15210 fprintf (stream, ", %s ", shift);
15212 arm_print_operand (stream, XEXP (x, 1), 0);
15214 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15219 /* An explanation of the 'Q', 'R' and 'H' register operands:
15221 In a pair of registers containing a DI or DF value the 'Q'
15222 operand returns the register number of the register containing
15223 the least significant part of the value. The 'R' operand returns
15224 the register number of the register containing the most
15225 significant part of the value.
15227 The 'H' operand returns the higher of the two register numbers.
15228 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15229 same as the 'Q' operand, since the most significant part of the
15230 value is held in the lower number register. The reverse is true
15231 on systems where WORDS_BIG_ENDIAN is false.
15233 The purpose of these operands is to distinguish between cases
15234 where the endian-ness of the values is important (for example
15235 when they are added together), and cases where the endian-ness
15236 is irrelevant, but the order of register operations is important.
15237 For example when loading a value from memory into a register
15238 pair, the endian-ness does not matter. Provided that the value
15239 from the lower memory address is put into the lower numbered
15240 register, and the value from the higher address is put into the
15241 higher numbered register, the load will work regardless of whether
15242 the value being loaded is big-wordian or little-wordian. The
15243 order of the two register loads can matter however, if the address
15244 of the memory location is actually held in one of the registers
15245 being overwritten by the load. */
15247 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15249 output_operand_lossage ("invalid operand for code '%c'", code);
15253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15257 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15259 output_operand_lossage ("invalid operand for code '%c'", code);
15263 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15267 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15269 output_operand_lossage ("invalid operand for code '%c'", code);
15273 asm_fprintf (stream, "%r", REGNO (x) + 1);
15277 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15279 output_operand_lossage ("invalid operand for code '%c'", code);
15283 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15287 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15289 output_operand_lossage ("invalid operand for code '%c'", code);
15293 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15297 asm_fprintf (stream, "%r",
15298 GET_CODE (XEXP (x, 0)) == REG
15299 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15303 asm_fprintf (stream, "{%r-%r}",
15305 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15308 /* Like 'M', but writing doubleword vector registers, for use by Neon
15312 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15313 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15315 asm_fprintf (stream, "{d%d}", regno);
15317 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15322 /* CONST_TRUE_RTX means always -- that's the default. */
15323 if (x == const_true_rtx)
15326 if (!COMPARISON_P (x))
15328 output_operand_lossage ("invalid operand for code '%c'", code);
15332 fputs (arm_condition_codes[get_arm_condition_code (x)],
15337 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15338 want to do that. */
15339 if (x == const_true_rtx)
15341 output_operand_lossage ("instruction never executed");
15344 if (!COMPARISON_P (x))
15346 output_operand_lossage ("invalid operand for code '%c'", code);
15350 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15351 (get_arm_condition_code (x))],
15355 /* Cirrus registers can be accessed in a variety of ways:
15356 single floating point (f)
15357 double floating point (d)
15359 64bit integer (dx). */
15360 case 'W': /* Cirrus register in F mode. */
15361 case 'X': /* Cirrus register in D mode. */
15362 case 'Y': /* Cirrus register in FX mode. */
15363 case 'Z': /* Cirrus register in DX mode. */
15364 gcc_assert (GET_CODE (x) == REG
15365 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15367 fprintf (stream, "mv%s%s",
15369 : code == 'X' ? "d"
15370 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15374 /* Print cirrus register in the mode specified by the register's mode. */
15377 int mode = GET_MODE (x);
15379 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15381 output_operand_lossage ("invalid operand for code '%c'", code);
15385 fprintf (stream, "mv%s%s",
15386 mode == DFmode ? "d"
15387 : mode == SImode ? "fx"
15388 : mode == DImode ? "dx"
15389 : "f", reg_names[REGNO (x)] + 2);
15395 if (GET_CODE (x) != REG
15396 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15397 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15398 /* Bad value for wCG register number. */
15400 output_operand_lossage ("invalid operand for code '%c'", code);
15405 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15408 /* Print an iWMMXt control register name. */
15410 if (GET_CODE (x) != CONST_INT
15412 || INTVAL (x) >= 16)
15413 /* Bad value for wC register number. */
15415 output_operand_lossage ("invalid operand for code '%c'", code);
15421 static const char * wc_reg_names [16] =
15423 "wCID", "wCon", "wCSSF", "wCASF",
15424 "wC4", "wC5", "wC6", "wC7",
15425 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15426 "wC12", "wC13", "wC14", "wC15"
15429 fprintf (stream, wc_reg_names [INTVAL (x)]);
15433 /* Print the high single-precision register of a VFP double-precision
15437 int mode = GET_MODE (x);
15440 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15442 output_operand_lossage ("invalid operand for code '%c'", code);
15447 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15449 output_operand_lossage ("invalid operand for code '%c'", code);
15453 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15457 /* Print a VFP/Neon double precision or quad precision register name. */
15461 int mode = GET_MODE (x);
15462 int is_quad = (code == 'q');
15465 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15467 output_operand_lossage ("invalid operand for code '%c'", code);
15471 if (GET_CODE (x) != REG
15472 || !IS_VFP_REGNUM (REGNO (x)))
15474 output_operand_lossage ("invalid operand for code '%c'", code);
15479 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15480 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15482 output_operand_lossage ("invalid operand for code '%c'", code);
15486 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15487 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15491 /* These two codes print the low/high doubleword register of a Neon quad
15492 register, respectively. For pair-structure types, can also print
15493 low/high quadword registers. */
15497 int mode = GET_MODE (x);
15500 if ((GET_MODE_SIZE (mode) != 16
15501 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15503 output_operand_lossage ("invalid operand for code '%c'", code);
15508 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15510 output_operand_lossage ("invalid operand for code '%c'", code);
15514 if (GET_MODE_SIZE (mode) == 16)
15515 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15516 + (code == 'f' ? 1 : 0));
15518 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15519 + (code == 'f' ? 1 : 0));
15523 /* Print a VFPv3 floating-point constant, represented as an integer
15527 int index = vfp3_const_double_index (x);
15528 gcc_assert (index != -1);
15529 fprintf (stream, "%d", index);
15533 /* Print bits representing opcode features for Neon.
15535 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15536 and polynomials as unsigned.
15538 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15540 Bit 2 is 1 for rounding functions, 0 otherwise. */
15542 /* Identify the type as 's', 'u', 'p' or 'f'. */
15545 HOST_WIDE_INT bits = INTVAL (x);
15546 fputc ("uspf"[bits & 3], stream);
15550 /* Likewise, but signed and unsigned integers are both 'i'. */
15553 HOST_WIDE_INT bits = INTVAL (x);
15554 fputc ("iipf"[bits & 3], stream);
15558 /* As for 'T', but emit 'u' instead of 'p'. */
15561 HOST_WIDE_INT bits = INTVAL (x);
15562 fputc ("usuf"[bits & 3], stream);
15566 /* Bit 2: rounding (vs none). */
15569 HOST_WIDE_INT bits = INTVAL (x);
15570 fputs ((bits & 4) != 0 ? "r" : "", stream);
15574 /* Memory operand for vld1/vst1 instruction. */
15578 bool postinc = FALSE;
15579 gcc_assert (GET_CODE (x) == MEM);
15580 addr = XEXP (x, 0);
15581 if (GET_CODE (addr) == POST_INC)
15584 addr = XEXP (addr, 0);
15586 asm_fprintf (stream, "[%r]", REGNO (addr));
15588 fputs("!", stream);
15592 /* Translate an S register number into a D register number and element index. */
15595 int mode = GET_MODE (x);
15598 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15600 output_operand_lossage ("invalid operand for code '%c'", code);
15605 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15607 output_operand_lossage ("invalid operand for code '%c'", code);
15611 regno = regno - FIRST_VFP_REGNUM;
15612 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15616 /* Register specifier for vld1.16/vst1.16. Translate the S register
15617 number into a D register number and element index. */
15620 int mode = GET_MODE (x);
15623 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15625 output_operand_lossage ("invalid operand for code '%c'", code);
15630 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15632 output_operand_lossage ("invalid operand for code '%c'", code);
15636 regno = regno - FIRST_VFP_REGNUM;
15637 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15644 output_operand_lossage ("missing operand");
15648 switch (GET_CODE (x))
15651 asm_fprintf (stream, "%r", REGNO (x));
15655 output_memory_reference_mode = GET_MODE (x);
15656 output_address (XEXP (x, 0));
15663 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15664 sizeof (fpstr), 0, 1);
15665 fprintf (stream, "#%s", fpstr);
15668 fprintf (stream, "#%s", fp_immediate_constant (x));
15672 gcc_assert (GET_CODE (x) != NEG);
15673 fputc ('#', stream);
15674 if (GET_CODE (x) == HIGH)
15676 fputs (":lower16:", stream);
15680 output_addr_const (stream, x);
15686 /* Target hook for assembling integer objects. The ARM version needs to
15687 handle word-sized values specially. */
15689 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15691 enum machine_mode mode;
15693 if (size == UNITS_PER_WORD && aligned_p)
15695 fputs ("\t.word\t", asm_out_file);
15696 output_addr_const (asm_out_file, x);
15698 /* Mark symbols as position independent. We only do this in the
15699 .text segment, not in the .data segment. */
15700 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15701 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15703 /* See legitimize_pic_address for an explanation of the
15704 TARGET_VXWORKS_RTP check. */
15705 if (TARGET_VXWORKS_RTP
15706 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15707 fputs ("(GOT)", asm_out_file);
15709 fputs ("(GOTOFF)", asm_out_file);
15711 fputc ('\n', asm_out_file);
15715 mode = GET_MODE (x);
15717 if (arm_vector_mode_supported_p (mode))
15721 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15723 units = CONST_VECTOR_NUNITS (x);
15724 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15726 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15727 for (i = 0; i < units; i++)
15729 rtx elt = CONST_VECTOR_ELT (x, i);
15731 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15734 for (i = 0; i < units; i++)
15736 rtx elt = CONST_VECTOR_ELT (x, i);
15737 REAL_VALUE_TYPE rval;
15739 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15742 (rval, GET_MODE_INNER (mode),
15743 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15749 return default_assemble_integer (x, size, aligned_p);
15753 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15757 if (!TARGET_AAPCS_BASED)
15760 default_named_section_asm_out_constructor
15761 : default_named_section_asm_out_destructor) (symbol, priority);
15765 /* Put these in the .init_array section, using a special relocation. */
15766 if (priority != DEFAULT_INIT_PRIORITY)
15769 sprintf (buf, "%s.%.5u",
15770 is_ctor ? ".init_array" : ".fini_array",
15772 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15779 switch_to_section (s);
15780 assemble_align (POINTER_SIZE);
15781 fputs ("\t.word\t", asm_out_file);
15782 output_addr_const (asm_out_file, symbol);
15783 fputs ("(target1)\n", asm_out_file);
15786 /* Add a function to the list of static constructors. */
15789 arm_elf_asm_constructor (rtx symbol, int priority)
15791 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15794 /* Add a function to the list of static destructors. */
15797 arm_elf_asm_destructor (rtx symbol, int priority)
15799 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15802 /* A finite state machine takes care of noticing whether or not instructions
15803 can be conditionally executed, and thus decrease execution time and code
15804 size by deleting branch instructions. The fsm is controlled by
15805 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15807 /* The state of the fsm controlling condition codes are:
15808 0: normal, do nothing special
15809 1: make ASM_OUTPUT_OPCODE not output this instruction
15810 2: make ASM_OUTPUT_OPCODE not output this instruction
15811 3: make instructions conditional
15812 4: make instructions conditional
15814 State transitions (state->state by whom under condition):
15815 0 -> 1 final_prescan_insn if the `target' is a label
15816 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15817 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15818 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15819 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15820 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15821 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15822 (the target insn is arm_target_insn).
15824 If the jump clobbers the conditions then we use states 2 and 4.
15826 A similar thing can be done with conditional return insns.
15828 XXX In case the `target' is an unconditional branch, this conditionalising
15829 of the instructions always reduces code size, but not always execution
15830 time. But then, I want to reduce the code size to somewhere near what
15831 /bin/cc produces. */
15833 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15834 instructions. When a COND_EXEC instruction is seen the subsequent
15835 instructions are scanned so that multiple conditional instructions can be
15836 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15837 specify the length and true/false mask for the IT block. These will be
15838 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15840 /* Returns the index of the ARM condition code string in
15841 `arm_condition_codes'. COMPARISON should be an rtx like
15842 `(eq (...) (...))'. */
15843 static enum arm_cond_code
15844 get_arm_condition_code (rtx comparison)
15846 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15847 enum arm_cond_code code;
15848 enum rtx_code comp_code = GET_CODE (comparison);
15850 if (GET_MODE_CLASS (mode) != MODE_CC)
15851 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15852 XEXP (comparison, 1));
15856 case CC_DNEmode: code = ARM_NE; goto dominance;
15857 case CC_DEQmode: code = ARM_EQ; goto dominance;
15858 case CC_DGEmode: code = ARM_GE; goto dominance;
15859 case CC_DGTmode: code = ARM_GT; goto dominance;
15860 case CC_DLEmode: code = ARM_LE; goto dominance;
15861 case CC_DLTmode: code = ARM_LT; goto dominance;
15862 case CC_DGEUmode: code = ARM_CS; goto dominance;
15863 case CC_DGTUmode: code = ARM_HI; goto dominance;
15864 case CC_DLEUmode: code = ARM_LS; goto dominance;
15865 case CC_DLTUmode: code = ARM_CC;
15868 gcc_assert (comp_code == EQ || comp_code == NE);
15870 if (comp_code == EQ)
15871 return ARM_INVERSE_CONDITION_CODE (code);
15877 case NE: return ARM_NE;
15878 case EQ: return ARM_EQ;
15879 case GE: return ARM_PL;
15880 case LT: return ARM_MI;
15881 default: gcc_unreachable ();
15887 case NE: return ARM_NE;
15888 case EQ: return ARM_EQ;
15889 default: gcc_unreachable ();
15895 case NE: return ARM_MI;
15896 case EQ: return ARM_PL;
15897 default: gcc_unreachable ();
15902 /* These encodings assume that AC=1 in the FPA system control
15903 byte. This allows us to handle all cases except UNEQ and
15907 case GE: return ARM_GE;
15908 case GT: return ARM_GT;
15909 case LE: return ARM_LS;
15910 case LT: return ARM_MI;
15911 case NE: return ARM_NE;
15912 case EQ: return ARM_EQ;
15913 case ORDERED: return ARM_VC;
15914 case UNORDERED: return ARM_VS;
15915 case UNLT: return ARM_LT;
15916 case UNLE: return ARM_LE;
15917 case UNGT: return ARM_HI;
15918 case UNGE: return ARM_PL;
15919 /* UNEQ and LTGT do not have a representation. */
15920 case UNEQ: /* Fall through. */
15921 case LTGT: /* Fall through. */
15922 default: gcc_unreachable ();
15928 case NE: return ARM_NE;
15929 case EQ: return ARM_EQ;
15930 case GE: return ARM_LE;
15931 case GT: return ARM_LT;
15932 case LE: return ARM_GE;
15933 case LT: return ARM_GT;
15934 case GEU: return ARM_LS;
15935 case GTU: return ARM_CC;
15936 case LEU: return ARM_CS;
15937 case LTU: return ARM_HI;
15938 default: gcc_unreachable ();
15944 case LTU: return ARM_CS;
15945 case GEU: return ARM_CC;
15946 default: gcc_unreachable ();
15952 case NE: return ARM_NE;
15953 case EQ: return ARM_EQ;
15954 case GE: return ARM_GE;
15955 case GT: return ARM_GT;
15956 case LE: return ARM_LE;
15957 case LT: return ARM_LT;
15958 case GEU: return ARM_CS;
15959 case GTU: return ARM_HI;
15960 case LEU: return ARM_LS;
15961 case LTU: return ARM_CC;
15962 default: gcc_unreachable ();
15965 default: gcc_unreachable ();
15969 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15972 thumb2_final_prescan_insn (rtx insn)
15974 rtx first_insn = insn;
15975 rtx body = PATTERN (insn);
15977 enum arm_cond_code code;
15981 /* Remove the previous insn from the count of insns to be output. */
15982 if (arm_condexec_count)
15983 arm_condexec_count--;
15985 /* Nothing to do if we are already inside a conditional block. */
15986 if (arm_condexec_count)
15989 if (GET_CODE (body) != COND_EXEC)
15992 /* Conditional jumps are implemented directly. */
15993 if (GET_CODE (insn) == JUMP_INSN)
15996 predicate = COND_EXEC_TEST (body);
15997 arm_current_cc = get_arm_condition_code (predicate);
15999 n = get_attr_ce_count (insn);
16000 arm_condexec_count = 1;
16001 arm_condexec_mask = (1 << n) - 1;
16002 arm_condexec_masklen = n;
16003 /* See if subsequent instructions can be combined into the same block. */
16006 insn = next_nonnote_insn (insn);
16008 /* Jumping into the middle of an IT block is illegal, so a label or
16009 barrier terminates the block. */
16010 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16013 body = PATTERN (insn);
16014 /* USE and CLOBBER aren't really insns, so just skip them. */
16015 if (GET_CODE (body) == USE
16016 || GET_CODE (body) == CLOBBER)
16019 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16020 if (GET_CODE (body) != COND_EXEC)
16022 /* Allow up to 4 conditionally executed instructions in a block. */
16023 n = get_attr_ce_count (insn);
16024 if (arm_condexec_masklen + n > 4)
16027 predicate = COND_EXEC_TEST (body);
16028 code = get_arm_condition_code (predicate);
16029 mask = (1 << n) - 1;
16030 if (arm_current_cc == code)
16031 arm_condexec_mask |= (mask << arm_condexec_masklen);
16032 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16035 arm_condexec_count++;
16036 arm_condexec_masklen += n;
16038 /* A jump must be the last instruction in a conditional block. */
16039 if (GET_CODE(insn) == JUMP_INSN)
16042 /* Restore recog_data (getting the attributes of other insns can
16043 destroy this array, but final.c assumes that it remains intact
16044 across this call). */
16045 extract_constrain_insn_cached (first_insn);
16049 arm_final_prescan_insn (rtx insn)
16051 /* BODY will hold the body of INSN. */
16052 rtx body = PATTERN (insn);
16054 /* This will be 1 if trying to repeat the trick, and things need to be
16055 reversed if it appears to fail. */
16058 /* If we start with a return insn, we only succeed if we find another one. */
16059 int seeking_return = 0;
16061 /* START_INSN will hold the insn from where we start looking. This is the
16062 first insn after the following code_label if REVERSE is true. */
16063 rtx start_insn = insn;
16065 /* If in state 4, check if the target branch is reached, in order to
16066 change back to state 0. */
16067 if (arm_ccfsm_state == 4)
16069 if (insn == arm_target_insn)
16071 arm_target_insn = NULL;
16072 arm_ccfsm_state = 0;
16077 /* If in state 3, it is possible to repeat the trick, if this insn is an
16078 unconditional branch to a label, and immediately following this branch
16079 is the previous target label which is only used once, and the label this
16080 branch jumps to is not too far off. */
16081 if (arm_ccfsm_state == 3)
16083 if (simplejump_p (insn))
16085 start_insn = next_nonnote_insn (start_insn);
16086 if (GET_CODE (start_insn) == BARRIER)
16088 /* XXX Isn't this always a barrier? */
16089 start_insn = next_nonnote_insn (start_insn);
16091 if (GET_CODE (start_insn) == CODE_LABEL
16092 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16093 && LABEL_NUSES (start_insn) == 1)
16098 else if (GET_CODE (body) == RETURN)
16100 start_insn = next_nonnote_insn (start_insn);
16101 if (GET_CODE (start_insn) == BARRIER)
16102 start_insn = next_nonnote_insn (start_insn);
16103 if (GET_CODE (start_insn) == CODE_LABEL
16104 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16105 && LABEL_NUSES (start_insn) == 1)
16108 seeking_return = 1;
16117 gcc_assert (!arm_ccfsm_state || reverse);
16118 if (GET_CODE (insn) != JUMP_INSN)
16121 /* This jump might be paralleled with a clobber of the condition codes
16122 the jump should always come first */
16123 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16124 body = XVECEXP (body, 0, 0);
16127 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16128 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16131 int fail = FALSE, succeed = FALSE;
16132 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16133 int then_not_else = TRUE;
16134 rtx this_insn = start_insn, label = 0;
16136 /* Register the insn jumped to. */
16139 if (!seeking_return)
16140 label = XEXP (SET_SRC (body), 0);
16142 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16143 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16144 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16146 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16147 then_not_else = FALSE;
16149 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16150 seeking_return = 1;
16151 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16153 seeking_return = 1;
16154 then_not_else = FALSE;
16157 gcc_unreachable ();
16159 /* See how many insns this branch skips, and what kind of insns. If all
16160 insns are okay, and the label or unconditional branch to the same
16161 label is not too far away, succeed. */
16162 for (insns_skipped = 0;
16163 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16167 this_insn = next_nonnote_insn (this_insn);
16171 switch (GET_CODE (this_insn))
16174 /* Succeed if it is the target label, otherwise fail since
16175 control falls in from somewhere else. */
16176 if (this_insn == label)
16178 arm_ccfsm_state = 1;
16186 /* Succeed if the following insn is the target label.
16188 If return insns are used then the last insn in a function
16189 will be a barrier. */
16190 this_insn = next_nonnote_insn (this_insn);
16191 if (this_insn && this_insn == label)
16193 arm_ccfsm_state = 1;
16201 /* The AAPCS says that conditional calls should not be
16202 used since they make interworking inefficient (the
16203 linker can't transform BL<cond> into BLX). That's
16204 only a problem if the machine has BLX. */
16211 /* Succeed if the following insn is the target label, or
16212 if the following two insns are a barrier and the
16214 this_insn = next_nonnote_insn (this_insn);
16215 if (this_insn && GET_CODE (this_insn) == BARRIER)
16216 this_insn = next_nonnote_insn (this_insn);
16218 if (this_insn && this_insn == label
16219 && insns_skipped < max_insns_skipped)
16221 arm_ccfsm_state = 1;
16229 /* If this is an unconditional branch to the same label, succeed.
16230 If it is to another label, do nothing. If it is conditional,
16232 /* XXX Probably, the tests for SET and the PC are
16235 scanbody = PATTERN (this_insn);
16236 if (GET_CODE (scanbody) == SET
16237 && GET_CODE (SET_DEST (scanbody)) == PC)
16239 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16240 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16242 arm_ccfsm_state = 2;
16245 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16248 /* Fail if a conditional return is undesirable (e.g. on a
16249 StrongARM), but still allow this if optimizing for size. */
16250 else if (GET_CODE (scanbody) == RETURN
16251 && !use_return_insn (TRUE, NULL)
16254 else if (GET_CODE (scanbody) == RETURN
16257 arm_ccfsm_state = 2;
16260 else if (GET_CODE (scanbody) == PARALLEL)
16262 switch (get_attr_conds (this_insn))
16272 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16277 /* Instructions using or affecting the condition codes make it
16279 scanbody = PATTERN (this_insn);
16280 if (!(GET_CODE (scanbody) == SET
16281 || GET_CODE (scanbody) == PARALLEL)
16282 || get_attr_conds (this_insn) != CONDS_NOCOND)
16285 /* A conditional cirrus instruction must be followed by
16286 a non Cirrus instruction. However, since we
16287 conditionalize instructions in this function and by
16288 the time we get here we can't add instructions
16289 (nops), because shorten_branches() has already been
16290 called, we will disable conditionalizing Cirrus
16291 instructions to be safe. */
16292 if (GET_CODE (scanbody) != USE
16293 && GET_CODE (scanbody) != CLOBBER
16294 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16304 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16305 arm_target_label = CODE_LABEL_NUMBER (label);
16308 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16310 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16312 this_insn = next_nonnote_insn (this_insn);
16313 gcc_assert (!this_insn
16314 || (GET_CODE (this_insn) != BARRIER
16315 && GET_CODE (this_insn) != CODE_LABEL));
16319 /* Oh, dear! we ran off the end.. give up. */
16320 extract_constrain_insn_cached (insn);
16321 arm_ccfsm_state = 0;
16322 arm_target_insn = NULL;
16325 arm_target_insn = this_insn;
16328 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16331 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16333 if (reverse || then_not_else)
16334 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16337 /* Restore recog_data (getting the attributes of other insns can
16338 destroy this array, but final.c assumes that it remains intact
16339 across this call. */
16340 extract_constrain_insn_cached (insn);
16344 /* Output IT instructions. */
16346 thumb2_asm_output_opcode (FILE * stream)
16351 if (arm_condexec_mask)
16353 for (n = 0; n < arm_condexec_masklen; n++)
16354 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16356 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16357 arm_condition_codes[arm_current_cc]);
16358 arm_condexec_mask = 0;
16362 /* Returns true if REGNO is a valid register
16363 for holding a quantity of type MODE. */
16365 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16367 if (GET_MODE_CLASS (mode) == MODE_CC)
16368 return (regno == CC_REGNUM
16369 || (TARGET_HARD_FLOAT && TARGET_VFP
16370 && regno == VFPCC_REGNUM));
16373 /* For the Thumb we only allow values bigger than SImode in
16374 registers 0 - 6, so that there is always a second low
16375 register available to hold the upper part of the value.
16376 We probably we ought to ensure that the register is the
16377 start of an even numbered register pair. */
16378 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16380 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16381 && IS_CIRRUS_REGNUM (regno))
16382 /* We have outlawed SI values in Cirrus registers because they
16383 reside in the lower 32 bits, but SF values reside in the
16384 upper 32 bits. This causes gcc all sorts of grief. We can't
16385 even split the registers into pairs because Cirrus SI values
16386 get sign extended to 64bits-- aldyh. */
16387 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16389 if (TARGET_HARD_FLOAT && TARGET_VFP
16390 && IS_VFP_REGNUM (regno))
16392 if (mode == SFmode || mode == SImode)
16393 return VFP_REGNO_OK_FOR_SINGLE (regno);
16395 if (mode == DFmode)
16396 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16398 /* VFP registers can hold HFmode values, but there is no point in
16399 putting them there unless we have hardware conversion insns. */
16400 if (mode == HFmode)
16401 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16404 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16405 || (VALID_NEON_QREG_MODE (mode)
16406 && NEON_REGNO_OK_FOR_QUAD (regno))
16407 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16408 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16409 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16410 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16411 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16416 if (TARGET_REALLY_IWMMXT)
16418 if (IS_IWMMXT_GR_REGNUM (regno))
16419 return mode == SImode;
16421 if (IS_IWMMXT_REGNUM (regno))
16422 return VALID_IWMMXT_REG_MODE (mode);
16425 /* We allow almost any value to be stored in the general registers.
16426 Restrict doubleword quantities to even register pairs so that we can
16427 use ldrd. Do not allow very large Neon structure opaque modes in
16428 general registers; they would use too many. */
16429 if (regno <= LAST_ARM_REGNUM)
16430 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16431 && ARM_NUM_REGS (mode) <= 4;
16433 if (regno == FRAME_POINTER_REGNUM
16434 || regno == ARG_POINTER_REGNUM)
16435 /* We only allow integers in the fake hard registers. */
16436 return GET_MODE_CLASS (mode) == MODE_INT;
16438 /* The only registers left are the FPA registers
16439 which we only allow to hold FP values. */
16440 return (TARGET_HARD_FLOAT && TARGET_FPA
16441 && GET_MODE_CLASS (mode) == MODE_FLOAT
16442 && regno >= FIRST_FPA_REGNUM
16443 && regno <= LAST_FPA_REGNUM);
16446 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16447 not used in arm mode. */
16450 arm_regno_class (int regno)
16454 if (regno == STACK_POINTER_REGNUM)
16456 if (regno == CC_REGNUM)
16463 if (TARGET_THUMB2 && regno < 8)
16466 if ( regno <= LAST_ARM_REGNUM
16467 || regno == FRAME_POINTER_REGNUM
16468 || regno == ARG_POINTER_REGNUM)
16469 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16471 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16472 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16474 if (IS_CIRRUS_REGNUM (regno))
16475 return CIRRUS_REGS;
16477 if (IS_VFP_REGNUM (regno))
16479 if (regno <= D7_VFP_REGNUM)
16480 return VFP_D0_D7_REGS;
16481 else if (regno <= LAST_LO_VFP_REGNUM)
16482 return VFP_LO_REGS;
16484 return VFP_HI_REGS;
16487 if (IS_IWMMXT_REGNUM (regno))
16488 return IWMMXT_REGS;
16490 if (IS_IWMMXT_GR_REGNUM (regno))
16491 return IWMMXT_GR_REGS;
16496 /* Handle a special case when computing the offset
16497 of an argument from the frame pointer. */
16499 arm_debugger_arg_offset (int value, rtx addr)
16503 /* We are only interested if dbxout_parms() failed to compute the offset. */
16507 /* We can only cope with the case where the address is held in a register. */
16508 if (GET_CODE (addr) != REG)
16511 /* If we are using the frame pointer to point at the argument, then
16512 an offset of 0 is correct. */
16513 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16516 /* If we are using the stack pointer to point at the
16517 argument, then an offset of 0 is correct. */
16518 /* ??? Check this is consistent with thumb2 frame layout. */
16519 if ((TARGET_THUMB || !frame_pointer_needed)
16520 && REGNO (addr) == SP_REGNUM)
16523 /* Oh dear. The argument is pointed to by a register rather
16524 than being held in a register, or being stored at a known
16525 offset from the frame pointer. Since GDB only understands
16526 those two kinds of argument we must translate the address
16527 held in the register into an offset from the frame pointer.
16528 We do this by searching through the insns for the function
16529 looking to see where this register gets its value. If the
16530 register is initialized from the frame pointer plus an offset
16531 then we are in luck and we can continue, otherwise we give up.
16533 This code is exercised by producing debugging information
16534 for a function with arguments like this:
16536 double func (double a, double b, int c, double d) {return d;}
16538 Without this code the stab for parameter 'd' will be set to
16539 an offset of 0 from the frame pointer, rather than 8. */
16541 /* The if() statement says:
16543 If the insn is a normal instruction
16544 and if the insn is setting the value in a register
16545 and if the register being set is the register holding the address of the argument
16546 and if the address is computing by an addition
16547 that involves adding to a register
16548 which is the frame pointer
16553 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16555 if ( GET_CODE (insn) == INSN
16556 && GET_CODE (PATTERN (insn)) == SET
16557 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16558 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16559 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16560 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16561 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16564 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16573 warning (0, "unable to compute real location of stacked parameter");
16574 value = 8; /* XXX magic hack */
16580 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16583 if ((MASK) & insn_flags) \
16584 add_builtin_function ((NAME), (TYPE), (CODE), \
16585 BUILT_IN_MD, NULL, NULL_TREE); \
16589 struct builtin_description
16591 const unsigned int mask;
16592 const enum insn_code icode;
16593 const char * const name;
16594 const enum arm_builtins code;
16595 const enum rtx_code comparison;
16596 const unsigned int flag;
16599 static const struct builtin_description bdesc_2arg[] =
16601 #define IWMMXT_BUILTIN(code, string, builtin) \
16602 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16603 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16605 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16606 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16607 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16608 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16609 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16610 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16611 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16612 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16613 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16614 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16615 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16616 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16617 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16618 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16619 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16620 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16621 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16622 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16623 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16624 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16625 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16626 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16627 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16628 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16629 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16630 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16631 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16632 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16633 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16634 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16635 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16636 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16637 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16638 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16639 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16640 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16641 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16642 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16643 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16644 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16645 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16646 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16647 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16648 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16649 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16650 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16651 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16652 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16653 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16654 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16655 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16656 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16657 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16658 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16659 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16660 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16661 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16662 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16664 #define IWMMXT_BUILTIN2(code, builtin) \
16665 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16667 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16668 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16669 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16670 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16671 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16672 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16673 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16674 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16675 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16676 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16677 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16678 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16679 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16680 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16681 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16682 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16683 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16684 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16685 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16686 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16687 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16688 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16689 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16690 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16691 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16692 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16693 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16694 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16695 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16696 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16697 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16698 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16701 static const struct builtin_description bdesc_1arg[] =
16703 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16704 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16705 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16706 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16707 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16708 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16709 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16710 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16711 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16712 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16713 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16714 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16715 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16716 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16717 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16718 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16719 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16720 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16723 /* Set up all the iWMMXt builtins. This is
16724 not called if TARGET_IWMMXT is zero. */
16727 arm_init_iwmmxt_builtins (void)
16729 const struct builtin_description * d;
16731 tree endlink = void_list_node;
16733 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16734 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16735 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16738 = build_function_type (integer_type_node,
16739 tree_cons (NULL_TREE, integer_type_node, endlink));
16740 tree v8qi_ftype_v8qi_v8qi_int
16741 = build_function_type (V8QI_type_node,
16742 tree_cons (NULL_TREE, V8QI_type_node,
16743 tree_cons (NULL_TREE, V8QI_type_node,
16744 tree_cons (NULL_TREE,
16747 tree v4hi_ftype_v4hi_int
16748 = build_function_type (V4HI_type_node,
16749 tree_cons (NULL_TREE, V4HI_type_node,
16750 tree_cons (NULL_TREE, integer_type_node,
16752 tree v2si_ftype_v2si_int
16753 = build_function_type (V2SI_type_node,
16754 tree_cons (NULL_TREE, V2SI_type_node,
16755 tree_cons (NULL_TREE, integer_type_node,
16757 tree v2si_ftype_di_di
16758 = build_function_type (V2SI_type_node,
16759 tree_cons (NULL_TREE, long_long_integer_type_node,
16760 tree_cons (NULL_TREE, long_long_integer_type_node,
16762 tree di_ftype_di_int
16763 = build_function_type (long_long_integer_type_node,
16764 tree_cons (NULL_TREE, long_long_integer_type_node,
16765 tree_cons (NULL_TREE, integer_type_node,
16767 tree di_ftype_di_int_int
16768 = build_function_type (long_long_integer_type_node,
16769 tree_cons (NULL_TREE, long_long_integer_type_node,
16770 tree_cons (NULL_TREE, integer_type_node,
16771 tree_cons (NULL_TREE,
16774 tree int_ftype_v8qi
16775 = build_function_type (integer_type_node,
16776 tree_cons (NULL_TREE, V8QI_type_node,
16778 tree int_ftype_v4hi
16779 = build_function_type (integer_type_node,
16780 tree_cons (NULL_TREE, V4HI_type_node,
16782 tree int_ftype_v2si
16783 = build_function_type (integer_type_node,
16784 tree_cons (NULL_TREE, V2SI_type_node,
16786 tree int_ftype_v8qi_int
16787 = build_function_type (integer_type_node,
16788 tree_cons (NULL_TREE, V8QI_type_node,
16789 tree_cons (NULL_TREE, integer_type_node,
16791 tree int_ftype_v4hi_int
16792 = build_function_type (integer_type_node,
16793 tree_cons (NULL_TREE, V4HI_type_node,
16794 tree_cons (NULL_TREE, integer_type_node,
16796 tree int_ftype_v2si_int
16797 = build_function_type (integer_type_node,
16798 tree_cons (NULL_TREE, V2SI_type_node,
16799 tree_cons (NULL_TREE, integer_type_node,
16801 tree v8qi_ftype_v8qi_int_int
16802 = build_function_type (V8QI_type_node,
16803 tree_cons (NULL_TREE, V8QI_type_node,
16804 tree_cons (NULL_TREE, integer_type_node,
16805 tree_cons (NULL_TREE,
16808 tree v4hi_ftype_v4hi_int_int
16809 = build_function_type (V4HI_type_node,
16810 tree_cons (NULL_TREE, V4HI_type_node,
16811 tree_cons (NULL_TREE, integer_type_node,
16812 tree_cons (NULL_TREE,
16815 tree v2si_ftype_v2si_int_int
16816 = build_function_type (V2SI_type_node,
16817 tree_cons (NULL_TREE, V2SI_type_node,
16818 tree_cons (NULL_TREE, integer_type_node,
16819 tree_cons (NULL_TREE,
16822 /* Miscellaneous. */
16823 tree v8qi_ftype_v4hi_v4hi
16824 = build_function_type (V8QI_type_node,
16825 tree_cons (NULL_TREE, V4HI_type_node,
16826 tree_cons (NULL_TREE, V4HI_type_node,
16828 tree v4hi_ftype_v2si_v2si
16829 = build_function_type (V4HI_type_node,
16830 tree_cons (NULL_TREE, V2SI_type_node,
16831 tree_cons (NULL_TREE, V2SI_type_node,
16833 tree v2si_ftype_v4hi_v4hi
16834 = build_function_type (V2SI_type_node,
16835 tree_cons (NULL_TREE, V4HI_type_node,
16836 tree_cons (NULL_TREE, V4HI_type_node,
16838 tree v2si_ftype_v8qi_v8qi
16839 = build_function_type (V2SI_type_node,
16840 tree_cons (NULL_TREE, V8QI_type_node,
16841 tree_cons (NULL_TREE, V8QI_type_node,
16843 tree v4hi_ftype_v4hi_di
16844 = build_function_type (V4HI_type_node,
16845 tree_cons (NULL_TREE, V4HI_type_node,
16846 tree_cons (NULL_TREE,
16847 long_long_integer_type_node,
16849 tree v2si_ftype_v2si_di
16850 = build_function_type (V2SI_type_node,
16851 tree_cons (NULL_TREE, V2SI_type_node,
16852 tree_cons (NULL_TREE,
16853 long_long_integer_type_node,
16855 tree void_ftype_int_int
16856 = build_function_type (void_type_node,
16857 tree_cons (NULL_TREE, integer_type_node,
16858 tree_cons (NULL_TREE, integer_type_node,
16861 = build_function_type (long_long_unsigned_type_node, endlink);
16863 = build_function_type (long_long_integer_type_node,
16864 tree_cons (NULL_TREE, V8QI_type_node,
16867 = build_function_type (long_long_integer_type_node,
16868 tree_cons (NULL_TREE, V4HI_type_node,
16871 = build_function_type (long_long_integer_type_node,
16872 tree_cons (NULL_TREE, V2SI_type_node,
16874 tree v2si_ftype_v4hi
16875 = build_function_type (V2SI_type_node,
16876 tree_cons (NULL_TREE, V4HI_type_node,
16878 tree v4hi_ftype_v8qi
16879 = build_function_type (V4HI_type_node,
16880 tree_cons (NULL_TREE, V8QI_type_node,
16883 tree di_ftype_di_v4hi_v4hi
16884 = build_function_type (long_long_unsigned_type_node,
16885 tree_cons (NULL_TREE,
16886 long_long_unsigned_type_node,
16887 tree_cons (NULL_TREE, V4HI_type_node,
16888 tree_cons (NULL_TREE,
16892 tree di_ftype_v4hi_v4hi
16893 = build_function_type (long_long_unsigned_type_node,
16894 tree_cons (NULL_TREE, V4HI_type_node,
16895 tree_cons (NULL_TREE, V4HI_type_node,
16898 /* Normal vector binops. */
16899 tree v8qi_ftype_v8qi_v8qi
16900 = build_function_type (V8QI_type_node,
16901 tree_cons (NULL_TREE, V8QI_type_node,
16902 tree_cons (NULL_TREE, V8QI_type_node,
16904 tree v4hi_ftype_v4hi_v4hi
16905 = build_function_type (V4HI_type_node,
16906 tree_cons (NULL_TREE, V4HI_type_node,
16907 tree_cons (NULL_TREE, V4HI_type_node,
16909 tree v2si_ftype_v2si_v2si
16910 = build_function_type (V2SI_type_node,
16911 tree_cons (NULL_TREE, V2SI_type_node,
16912 tree_cons (NULL_TREE, V2SI_type_node,
16914 tree di_ftype_di_di
16915 = build_function_type (long_long_unsigned_type_node,
16916 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16917 tree_cons (NULL_TREE,
16918 long_long_unsigned_type_node,
16921 /* Add all builtins that are more or less simple operations on two
16923 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16925 /* Use one of the operands; the target can have a different mode for
16926 mask-generating compares. */
16927 enum machine_mode mode;
16933 mode = insn_data[d->icode].operand[1].mode;
16938 type = v8qi_ftype_v8qi_v8qi;
16941 type = v4hi_ftype_v4hi_v4hi;
16944 type = v2si_ftype_v2si_v2si;
16947 type = di_ftype_di_di;
16951 gcc_unreachable ();
16954 def_mbuiltin (d->mask, d->name, type, d->code);
16957 /* Add the remaining MMX insns with somewhat more complicated types. */
16958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16959 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16966 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16981 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16994 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17001 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17050 arm_init_tls_builtins (void)
17054 ftype = build_function_type (ptr_type_node, void_list_node);
17055 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17056 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17058 TREE_NOTHROW (decl) = 1;
17059 TREE_READONLY (decl) = 1;
17062 enum neon_builtin_type_bits {
17078 #define v8qi_UP T_V8QI
17079 #define v4hi_UP T_V4HI
17080 #define v2si_UP T_V2SI
17081 #define v2sf_UP T_V2SF
17083 #define v16qi_UP T_V16QI
17084 #define v8hi_UP T_V8HI
17085 #define v4si_UP T_V4SI
17086 #define v4sf_UP T_V4SF
17087 #define v2di_UP T_V2DI
17092 #define UP(X) X##_UP
17127 NEON_LOADSTRUCTLANE,
17129 NEON_STORESTRUCTLANE,
17138 const neon_itype itype;
17140 const enum insn_code codes[T_MAX];
17141 const unsigned int num_vars;
17142 unsigned int base_fcode;
17143 } neon_builtin_datum;
17145 #define CF(N,X) CODE_FOR_neon_##N##X
17147 #define VAR1(T, N, A) \
17148 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17149 #define VAR2(T, N, A, B) \
17150 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17151 #define VAR3(T, N, A, B, C) \
17152 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17153 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17154 #define VAR4(T, N, A, B, C, D) \
17155 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17156 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17157 #define VAR5(T, N, A, B, C, D, E) \
17158 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17159 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17160 #define VAR6(T, N, A, B, C, D, E, F) \
17161 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17162 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17163 #define VAR7(T, N, A, B, C, D, E, F, G) \
17164 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17165 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17167 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17168 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17170 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17171 CF (N, G), CF (N, H) }, 8, 0
17172 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17173 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17174 | UP (H) | UP (I), \
17175 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17176 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17177 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17178 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17179 | UP (H) | UP (I) | UP (J), \
17180 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17181 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17183 /* The mode entries in the following table correspond to the "key" type of the
17184 instruction variant, i.e. equivalent to that which would be specified after
17185 the assembler mnemonic, which usually refers to the last vector operand.
17186 (Signed/unsigned/polynomial types are not differentiated between though, and
17187 are all mapped onto the same mode for a given element size.) The modes
17188 listed per instruction should be the same as those defined for that
17189 instruction's pattern in neon.md.
17190 WARNING: Variants should be listed in the same increasing order as
17191 neon_builtin_type_bits. */
17193 static neon_builtin_datum neon_builtin_data[] =
17195 { VAR10 (BINOP, vadd,
17196 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17197 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17198 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17199 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17200 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17201 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17202 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17203 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17204 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17205 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17206 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17207 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17208 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17209 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17210 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17211 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17212 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17213 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17214 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17215 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17216 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17217 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17218 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17219 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17220 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17221 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17222 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17223 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17224 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17225 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17226 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17227 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17228 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17229 { VAR10 (BINOP, vsub,
17230 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17231 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17232 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17233 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17234 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17235 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17236 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17237 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17238 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17239 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17240 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17241 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17242 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17243 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17244 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17245 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17246 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17247 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17248 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17249 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17250 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17251 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17252 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17253 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17254 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17255 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17256 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17257 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17258 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17259 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17260 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17261 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17262 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17263 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17264 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17265 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17266 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17267 /* FIXME: vget_lane supports more variants than this! */
17268 { VAR10 (GETLANE, vget_lane,
17269 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17270 { VAR10 (SETLANE, vset_lane,
17271 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17272 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17273 { VAR10 (DUP, vdup_n,
17274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17275 { VAR10 (DUPLANE, vdup_lane,
17276 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17277 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17278 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17279 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17280 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17281 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17282 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17283 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17284 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17285 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17286 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17287 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17288 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17289 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17290 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17291 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17292 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17293 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17294 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17295 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17296 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17297 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17298 { VAR10 (BINOP, vext,
17299 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17300 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17301 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17302 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17303 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17304 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17305 { VAR10 (SELECT, vbsl,
17306 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17307 { VAR1 (VTBL, vtbl1, v8qi) },
17308 { VAR1 (VTBL, vtbl2, v8qi) },
17309 { VAR1 (VTBL, vtbl3, v8qi) },
17310 { VAR1 (VTBL, vtbl4, v8qi) },
17311 { VAR1 (VTBX, vtbx1, v8qi) },
17312 { VAR1 (VTBX, vtbx2, v8qi) },
17313 { VAR1 (VTBX, vtbx3, v8qi) },
17314 { VAR1 (VTBX, vtbx4, v8qi) },
17315 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17316 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17317 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17318 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17319 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17320 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17321 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17322 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17323 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17324 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17325 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17326 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17327 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17328 { VAR10 (LOAD1, vld1,
17329 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17330 { VAR10 (LOAD1LANE, vld1_lane,
17331 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17332 { VAR10 (LOAD1, vld1_dup,
17333 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17334 { VAR10 (STORE1, vst1,
17335 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17336 { VAR10 (STORE1LANE, vst1_lane,
17337 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17338 { VAR9 (LOADSTRUCT,
17339 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17340 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17341 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17342 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17343 { VAR9 (STORESTRUCT, vst2,
17344 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17345 { VAR7 (STORESTRUCTLANE, vst2_lane,
17346 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17347 { VAR9 (LOADSTRUCT,
17348 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17349 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17350 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17351 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17352 { VAR9 (STORESTRUCT, vst3,
17353 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17354 { VAR7 (STORESTRUCTLANE, vst3_lane,
17355 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17356 { VAR9 (LOADSTRUCT, vld4,
17357 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17358 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17359 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17360 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17361 { VAR9 (STORESTRUCT, vst4,
17362 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17363 { VAR7 (STORESTRUCTLANE, vst4_lane,
17364 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17365 { VAR10 (LOGICBINOP, vand,
17366 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17367 { VAR10 (LOGICBINOP, vorr,
17368 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17369 { VAR10 (BINOP, veor,
17370 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17371 { VAR10 (LOGICBINOP, vbic,
17372 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17373 { VAR10 (LOGICBINOP, vorn,
17374 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17390 arm_init_neon_builtins (void)
17392 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17394 tree neon_intQI_type_node;
17395 tree neon_intHI_type_node;
17396 tree neon_polyQI_type_node;
17397 tree neon_polyHI_type_node;
17398 tree neon_intSI_type_node;
17399 tree neon_intDI_type_node;
17400 tree neon_float_type_node;
17402 tree intQI_pointer_node;
17403 tree intHI_pointer_node;
17404 tree intSI_pointer_node;
17405 tree intDI_pointer_node;
17406 tree float_pointer_node;
17408 tree const_intQI_node;
17409 tree const_intHI_node;
17410 tree const_intSI_node;
17411 tree const_intDI_node;
17412 tree const_float_node;
17414 tree const_intQI_pointer_node;
17415 tree const_intHI_pointer_node;
17416 tree const_intSI_pointer_node;
17417 tree const_intDI_pointer_node;
17418 tree const_float_pointer_node;
17420 tree V8QI_type_node;
17421 tree V4HI_type_node;
17422 tree V2SI_type_node;
17423 tree V2SF_type_node;
17424 tree V16QI_type_node;
17425 tree V8HI_type_node;
17426 tree V4SI_type_node;
17427 tree V4SF_type_node;
17428 tree V2DI_type_node;
17430 tree intUQI_type_node;
17431 tree intUHI_type_node;
17432 tree intUSI_type_node;
17433 tree intUDI_type_node;
17435 tree intEI_type_node;
17436 tree intOI_type_node;
17437 tree intCI_type_node;
17438 tree intXI_type_node;
17440 tree V8QI_pointer_node;
17441 tree V4HI_pointer_node;
17442 tree V2SI_pointer_node;
17443 tree V2SF_pointer_node;
17444 tree V16QI_pointer_node;
17445 tree V8HI_pointer_node;
17446 tree V4SI_pointer_node;
17447 tree V4SF_pointer_node;
17448 tree V2DI_pointer_node;
17450 tree void_ftype_pv8qi_v8qi_v8qi;
17451 tree void_ftype_pv4hi_v4hi_v4hi;
17452 tree void_ftype_pv2si_v2si_v2si;
17453 tree void_ftype_pv2sf_v2sf_v2sf;
17454 tree void_ftype_pdi_di_di;
17455 tree void_ftype_pv16qi_v16qi_v16qi;
17456 tree void_ftype_pv8hi_v8hi_v8hi;
17457 tree void_ftype_pv4si_v4si_v4si;
17458 tree void_ftype_pv4sf_v4sf_v4sf;
17459 tree void_ftype_pv2di_v2di_v2di;
17461 tree reinterp_ftype_dreg[5][5];
17462 tree reinterp_ftype_qreg[5][5];
17463 tree dreg_types[5], qreg_types[5];
17465 /* Create distinguished type nodes for NEON vector element types,
17466 and pointers to values of such types, so we can detect them later. */
17467 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17468 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17469 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17470 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17471 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17472 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17473 neon_float_type_node = make_node (REAL_TYPE);
17474 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17475 layout_type (neon_float_type_node);
17477 /* Define typedefs which exactly correspond to the modes we are basing vector
17478 types on. If you change these names you'll need to change
17479 the table used by arm_mangle_type too. */
17480 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17481 "__builtin_neon_qi");
17482 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17483 "__builtin_neon_hi");
17484 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17485 "__builtin_neon_si");
17486 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17487 "__builtin_neon_sf");
17488 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17489 "__builtin_neon_di");
17490 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17491 "__builtin_neon_poly8");
17492 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17493 "__builtin_neon_poly16");
17495 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17496 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17497 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17498 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17499 float_pointer_node = build_pointer_type (neon_float_type_node);
17501 /* Next create constant-qualified versions of the above types. */
17502 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17504 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17506 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17508 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17510 const_float_node = build_qualified_type (neon_float_type_node,
17513 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17514 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17515 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17516 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17517 const_float_pointer_node = build_pointer_type (const_float_node);
17519 /* Now create vector types based on our NEON element types. */
17520 /* 64-bit vectors. */
17522 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17524 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17526 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17528 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17529 /* 128-bit vectors. */
17531 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17533 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17535 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17537 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17539 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17541 /* Unsigned integer types for various mode sizes. */
17542 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17543 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17544 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17545 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17547 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17548 "__builtin_neon_uqi");
17549 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17550 "__builtin_neon_uhi");
17551 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17552 "__builtin_neon_usi");
17553 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17554 "__builtin_neon_udi");
17556 /* Opaque integer types for structures of vectors. */
17557 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17558 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17559 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17560 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17562 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17563 "__builtin_neon_ti");
17564 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17565 "__builtin_neon_ei");
17566 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17567 "__builtin_neon_oi");
17568 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17569 "__builtin_neon_ci");
17570 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17571 "__builtin_neon_xi");
17573 /* Pointers to vector types. */
17574 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17575 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17576 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17577 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17578 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17579 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17580 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17581 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17582 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17584 /* Operations which return results as pairs. */
17585 void_ftype_pv8qi_v8qi_v8qi =
17586 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17587 V8QI_type_node, NULL);
17588 void_ftype_pv4hi_v4hi_v4hi =
17589 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17590 V4HI_type_node, NULL);
17591 void_ftype_pv2si_v2si_v2si =
17592 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17593 V2SI_type_node, NULL);
17594 void_ftype_pv2sf_v2sf_v2sf =
17595 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17596 V2SF_type_node, NULL);
17597 void_ftype_pdi_di_di =
17598 build_function_type_list (void_type_node, intDI_pointer_node,
17599 neon_intDI_type_node, neon_intDI_type_node, NULL);
17600 void_ftype_pv16qi_v16qi_v16qi =
17601 build_function_type_list (void_type_node, V16QI_pointer_node,
17602 V16QI_type_node, V16QI_type_node, NULL);
17603 void_ftype_pv8hi_v8hi_v8hi =
17604 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17605 V8HI_type_node, NULL);
17606 void_ftype_pv4si_v4si_v4si =
17607 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17608 V4SI_type_node, NULL);
17609 void_ftype_pv4sf_v4sf_v4sf =
17610 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17611 V4SF_type_node, NULL);
17612 void_ftype_pv2di_v2di_v2di =
17613 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17614 V2DI_type_node, NULL);
17616 dreg_types[0] = V8QI_type_node;
17617 dreg_types[1] = V4HI_type_node;
17618 dreg_types[2] = V2SI_type_node;
17619 dreg_types[3] = V2SF_type_node;
17620 dreg_types[4] = neon_intDI_type_node;
17622 qreg_types[0] = V16QI_type_node;
17623 qreg_types[1] = V8HI_type_node;
17624 qreg_types[2] = V4SI_type_node;
17625 qreg_types[3] = V4SF_type_node;
17626 qreg_types[4] = V2DI_type_node;
17628 for (i = 0; i < 5; i++)
17631 for (j = 0; j < 5; j++)
17633 reinterp_ftype_dreg[i][j]
17634 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17635 reinterp_ftype_qreg[i][j]
17636 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17640 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17642 neon_builtin_datum *d = &neon_builtin_data[i];
17643 unsigned int j, codeidx = 0;
17645 d->base_fcode = fcode;
17647 for (j = 0; j < T_MAX; j++)
17649 const char* const modenames[] = {
17650 "v8qi", "v4hi", "v2si", "v2sf", "di",
17651 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17655 enum insn_code icode;
17656 int is_load = 0, is_store = 0;
17658 if ((d->bits & (1 << j)) == 0)
17661 icode = d->codes[codeidx++];
17666 case NEON_LOAD1LANE:
17667 case NEON_LOADSTRUCT:
17668 case NEON_LOADSTRUCTLANE:
17670 /* Fall through. */
17672 case NEON_STORE1LANE:
17673 case NEON_STORESTRUCT:
17674 case NEON_STORESTRUCTLANE:
17677 /* Fall through. */
17680 case NEON_LOGICBINOP:
17681 case NEON_SHIFTINSERT:
17688 case NEON_SHIFTIMM:
17689 case NEON_SHIFTACC:
17695 case NEON_LANEMULL:
17696 case NEON_LANEMULH:
17698 case NEON_SCALARMUL:
17699 case NEON_SCALARMULL:
17700 case NEON_SCALARMULH:
17701 case NEON_SCALARMAC:
17707 tree return_type = void_type_node, args = void_list_node;
17709 /* Build a function type directly from the insn_data for this
17710 builtin. The build_function_type() function takes care of
17711 removing duplicates for us. */
17712 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17716 if (is_load && k == 1)
17718 /* Neon load patterns always have the memory operand
17719 (a SImode pointer) in the operand 1 position. We
17720 want a const pointer to the element type in that
17722 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17728 eltype = const_intQI_pointer_node;
17733 eltype = const_intHI_pointer_node;
17738 eltype = const_intSI_pointer_node;
17743 eltype = const_float_pointer_node;
17748 eltype = const_intDI_pointer_node;
17751 default: gcc_unreachable ();
17754 else if (is_store && k == 0)
17756 /* Similarly, Neon store patterns use operand 0 as
17757 the memory location to store to (a SImode pointer).
17758 Use a pointer to the element type of the store in
17760 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17766 eltype = intQI_pointer_node;
17771 eltype = intHI_pointer_node;
17776 eltype = intSI_pointer_node;
17781 eltype = float_pointer_node;
17786 eltype = intDI_pointer_node;
17789 default: gcc_unreachable ();
17794 switch (insn_data[icode].operand[k].mode)
17796 case VOIDmode: eltype = void_type_node; break;
17798 case QImode: eltype = neon_intQI_type_node; break;
17799 case HImode: eltype = neon_intHI_type_node; break;
17800 case SImode: eltype = neon_intSI_type_node; break;
17801 case SFmode: eltype = neon_float_type_node; break;
17802 case DImode: eltype = neon_intDI_type_node; break;
17803 case TImode: eltype = intTI_type_node; break;
17804 case EImode: eltype = intEI_type_node; break;
17805 case OImode: eltype = intOI_type_node; break;
17806 case CImode: eltype = intCI_type_node; break;
17807 case XImode: eltype = intXI_type_node; break;
17808 /* 64-bit vectors. */
17809 case V8QImode: eltype = V8QI_type_node; break;
17810 case V4HImode: eltype = V4HI_type_node; break;
17811 case V2SImode: eltype = V2SI_type_node; break;
17812 case V2SFmode: eltype = V2SF_type_node; break;
17813 /* 128-bit vectors. */
17814 case V16QImode: eltype = V16QI_type_node; break;
17815 case V8HImode: eltype = V8HI_type_node; break;
17816 case V4SImode: eltype = V4SI_type_node; break;
17817 case V4SFmode: eltype = V4SF_type_node; break;
17818 case V2DImode: eltype = V2DI_type_node; break;
17819 default: gcc_unreachable ();
17823 if (k == 0 && !is_store)
17824 return_type = eltype;
17826 args = tree_cons (NULL_TREE, eltype, args);
17829 ftype = build_function_type (return_type, args);
17833 case NEON_RESULTPAIR:
17835 switch (insn_data[icode].operand[1].mode)
17837 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17838 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17839 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17840 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17841 case DImode: ftype = void_ftype_pdi_di_di; break;
17842 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17843 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17844 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17845 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17846 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17847 default: gcc_unreachable ();
17852 case NEON_REINTERP:
17854 /* We iterate over 5 doubleword types, then 5 quadword
17857 switch (insn_data[icode].operand[0].mode)
17859 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17860 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17861 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17862 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17863 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17864 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17865 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17866 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17867 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17868 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17869 default: gcc_unreachable ();
17875 gcc_unreachable ();
17878 gcc_assert (ftype != NULL);
17880 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17882 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17889 arm_init_fp16_builtins (void)
17891 tree fp16_type = make_node (REAL_TYPE);
17892 TYPE_PRECISION (fp16_type) = 16;
17893 layout_type (fp16_type);
17894 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17898 arm_init_builtins (void)
17900 arm_init_tls_builtins ();
17902 if (TARGET_REALLY_IWMMXT)
17903 arm_init_iwmmxt_builtins ();
17906 arm_init_neon_builtins ();
17908 if (arm_fp16_format)
17909 arm_init_fp16_builtins ();
17912 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17914 static const char *
17915 arm_invalid_parameter_type (const_tree t)
17917 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17918 return N_("function parameters cannot have __fp16 type");
17922 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17924 static const char *
17925 arm_invalid_return_type (const_tree t)
17927 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17928 return N_("functions cannot return __fp16 type");
17932 /* Implement TARGET_PROMOTED_TYPE. */
17935 arm_promoted_type (const_tree t)
17937 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17938 return float_type_node;
17942 /* Implement TARGET_CONVERT_TO_TYPE.
17943 Specifically, this hook implements the peculiarity of the ARM
17944 half-precision floating-point C semantics that requires conversions between
17945 __fp16 to or from double to do an intermediate conversion to float. */
17948 arm_convert_to_type (tree type, tree expr)
17950 tree fromtype = TREE_TYPE (expr);
17951 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17953 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17954 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17955 return convert (type, convert (float_type_node, expr));
17959 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17960 This simply adds HFmode as a supported mode; even though we don't
17961 implement arithmetic on this type directly, it's supported by
17962 optabs conversions, much the way the double-word arithmetic is
17963 special-cased in the default hook. */
17966 arm_scalar_mode_supported_p (enum machine_mode mode)
17968 if (mode == HFmode)
17969 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17971 return default_scalar_mode_supported_p (mode);
17974 /* Errors in the source file can cause expand_expr to return const0_rtx
17975 where we expect a vector. To avoid crashing, use one of the vector
17976 clear instructions. */
17979 safe_vector_operand (rtx x, enum machine_mode mode)
17981 if (x != const0_rtx)
17983 x = gen_reg_rtx (mode);
17985 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17986 : gen_rtx_SUBREG (DImode, x, 0)));
17990 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17993 arm_expand_binop_builtin (enum insn_code icode,
17994 tree exp, rtx target)
17997 tree arg0 = CALL_EXPR_ARG (exp, 0);
17998 tree arg1 = CALL_EXPR_ARG (exp, 1);
17999 rtx op0 = expand_normal (arg0);
18000 rtx op1 = expand_normal (arg1);
18001 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18002 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18003 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18005 if (VECTOR_MODE_P (mode0))
18006 op0 = safe_vector_operand (op0, mode0);
18007 if (VECTOR_MODE_P (mode1))
18008 op1 = safe_vector_operand (op1, mode1);
18011 || GET_MODE (target) != tmode
18012 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18013 target = gen_reg_rtx (tmode);
18015 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18018 op0 = copy_to_mode_reg (mode0, op0);
18019 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18020 op1 = copy_to_mode_reg (mode1, op1);
18022 pat = GEN_FCN (icode) (target, op0, op1);
18029 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18032 arm_expand_unop_builtin (enum insn_code icode,
18033 tree exp, rtx target, int do_load)
18036 tree arg0 = CALL_EXPR_ARG (exp, 0);
18037 rtx op0 = expand_normal (arg0);
18038 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18039 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18042 || GET_MODE (target) != tmode
18043 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18044 target = gen_reg_rtx (tmode);
18046 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18049 if (VECTOR_MODE_P (mode0))
18050 op0 = safe_vector_operand (op0, mode0);
18052 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18053 op0 = copy_to_mode_reg (mode0, op0);
18056 pat = GEN_FCN (icode) (target, op0);
18064 neon_builtin_compare (const void *a, const void *b)
18066 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18067 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18068 unsigned int soughtcode = key->base_fcode;
18070 if (soughtcode >= memb->base_fcode
18071 && soughtcode < memb->base_fcode + memb->num_vars)
18073 else if (soughtcode < memb->base_fcode)
18079 static enum insn_code
18080 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18082 neon_builtin_datum key, *found;
18085 key.base_fcode = fcode;
18086 found = (neon_builtin_datum *)
18087 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18088 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18089 gcc_assert (found);
18090 idx = fcode - (int) found->base_fcode;
18091 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18094 *itype = found->itype;
18096 return found->codes[idx];
18100 NEON_ARG_COPY_TO_REG,
18105 #define NEON_MAX_BUILTIN_ARGS 5
18107 /* Expand a Neon builtin. */
18109 arm_expand_neon_args (rtx target, int icode, int have_retval,
18114 tree arg[NEON_MAX_BUILTIN_ARGS];
18115 rtx op[NEON_MAX_BUILTIN_ARGS];
18116 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18117 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18122 || GET_MODE (target) != tmode
18123 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18124 target = gen_reg_rtx (tmode);
18126 va_start (ap, exp);
18130 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18132 if (thisarg == NEON_ARG_STOP)
18136 arg[argc] = CALL_EXPR_ARG (exp, argc);
18137 op[argc] = expand_normal (arg[argc]);
18138 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18142 case NEON_ARG_COPY_TO_REG:
18143 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18144 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18145 (op[argc], mode[argc]))
18146 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18149 case NEON_ARG_CONSTANT:
18150 /* FIXME: This error message is somewhat unhelpful. */
18151 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18152 (op[argc], mode[argc]))
18153 error ("argument must be a constant");
18156 case NEON_ARG_STOP:
18157 gcc_unreachable ();
18170 pat = GEN_FCN (icode) (target, op[0]);
18174 pat = GEN_FCN (icode) (target, op[0], op[1]);
18178 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18182 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18186 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18190 gcc_unreachable ();
18196 pat = GEN_FCN (icode) (op[0]);
18200 pat = GEN_FCN (icode) (op[0], op[1]);
18204 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18208 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18212 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18216 gcc_unreachable ();
18227 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18228 constants defined per-instruction or per instruction-variant. Instead, the
18229 required info is looked up in the table neon_builtin_data. */
18231 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18234 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18241 return arm_expand_neon_args (target, icode, 1, exp,
18242 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18246 case NEON_SCALARMUL:
18247 case NEON_SCALARMULL:
18248 case NEON_SCALARMULH:
18249 case NEON_SHIFTINSERT:
18250 case NEON_LOGICBINOP:
18251 return arm_expand_neon_args (target, icode, 1, exp,
18252 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18256 return arm_expand_neon_args (target, icode, 1, exp,
18257 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18258 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18262 case NEON_SHIFTIMM:
18263 return arm_expand_neon_args (target, icode, 1, exp,
18264 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18268 return arm_expand_neon_args (target, icode, 1, exp,
18269 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18273 case NEON_REINTERP:
18274 return arm_expand_neon_args (target, icode, 1, exp,
18275 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18279 return arm_expand_neon_args (target, icode, 1, exp,
18280 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18282 case NEON_RESULTPAIR:
18283 return arm_expand_neon_args (target, icode, 0, exp,
18284 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18288 case NEON_LANEMULL:
18289 case NEON_LANEMULH:
18290 return arm_expand_neon_args (target, icode, 1, exp,
18291 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18292 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18295 return arm_expand_neon_args (target, icode, 1, exp,
18296 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18297 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18299 case NEON_SHIFTACC:
18300 return arm_expand_neon_args (target, icode, 1, exp,
18301 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18302 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18304 case NEON_SCALARMAC:
18305 return arm_expand_neon_args (target, icode, 1, exp,
18306 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18307 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18311 return arm_expand_neon_args (target, icode, 1, exp,
18312 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18316 case NEON_LOADSTRUCT:
18317 return arm_expand_neon_args (target, icode, 1, exp,
18318 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18320 case NEON_LOAD1LANE:
18321 case NEON_LOADSTRUCTLANE:
18322 return arm_expand_neon_args (target, icode, 1, exp,
18323 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18327 case NEON_STORESTRUCT:
18328 return arm_expand_neon_args (target, icode, 0, exp,
18329 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18331 case NEON_STORE1LANE:
18332 case NEON_STORESTRUCTLANE:
18333 return arm_expand_neon_args (target, icode, 0, exp,
18334 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18338 gcc_unreachable ();
18341 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18343 neon_reinterpret (rtx dest, rtx src)
18345 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18348 /* Emit code to place a Neon pair result in memory locations (with equal
18351 neon_emit_pair_result_insn (enum machine_mode mode,
18352 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18355 rtx mem = gen_rtx_MEM (mode, destaddr);
18356 rtx tmp1 = gen_reg_rtx (mode);
18357 rtx tmp2 = gen_reg_rtx (mode);
18359 emit_insn (intfn (tmp1, op1, tmp2, op2));
18361 emit_move_insn (mem, tmp1);
18362 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18363 emit_move_insn (mem, tmp2);
18366 /* Set up operands for a register copy from src to dest, taking care not to
18367 clobber registers in the process.
18368 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18369 be called with a large N, so that should be OK. */
18372 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18374 unsigned int copied = 0, opctr = 0;
18375 unsigned int done = (1 << count) - 1;
18378 while (copied != done)
18380 for (i = 0; i < count; i++)
18384 for (j = 0; good && j < count; j++)
18385 if (i != j && (copied & (1 << j)) == 0
18386 && reg_overlap_mentioned_p (src[j], dest[i]))
18391 operands[opctr++] = dest[i];
18392 operands[opctr++] = src[i];
18398 gcc_assert (opctr == count * 2);
18401 /* Expand an expression EXP that calls a built-in function,
18402 with result going to TARGET if that's convenient
18403 (and in mode MODE if that's convenient).
18404 SUBTARGET may be used as the target for computing one of EXP's operands.
18405 IGNORE is nonzero if the value is to be ignored. */
18408 arm_expand_builtin (tree exp,
18410 rtx subtarget ATTRIBUTE_UNUSED,
18411 enum machine_mode mode ATTRIBUTE_UNUSED,
18412 int ignore ATTRIBUTE_UNUSED)
18414 const struct builtin_description * d;
18415 enum insn_code icode;
18416 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18424 int fcode = DECL_FUNCTION_CODE (fndecl);
18426 enum machine_mode tmode;
18427 enum machine_mode mode0;
18428 enum machine_mode mode1;
18429 enum machine_mode mode2;
18431 if (fcode >= ARM_BUILTIN_NEON_BASE)
18432 return arm_expand_neon_builtin (fcode, exp, target);
18436 case ARM_BUILTIN_TEXTRMSB:
18437 case ARM_BUILTIN_TEXTRMUB:
18438 case ARM_BUILTIN_TEXTRMSH:
18439 case ARM_BUILTIN_TEXTRMUH:
18440 case ARM_BUILTIN_TEXTRMSW:
18441 case ARM_BUILTIN_TEXTRMUW:
18442 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18443 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18444 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18445 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18446 : CODE_FOR_iwmmxt_textrmw);
18448 arg0 = CALL_EXPR_ARG (exp, 0);
18449 arg1 = CALL_EXPR_ARG (exp, 1);
18450 op0 = expand_normal (arg0);
18451 op1 = expand_normal (arg1);
18452 tmode = insn_data[icode].operand[0].mode;
18453 mode0 = insn_data[icode].operand[1].mode;
18454 mode1 = insn_data[icode].operand[2].mode;
18456 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18457 op0 = copy_to_mode_reg (mode0, op0);
18458 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18460 /* @@@ better error message */
18461 error ("selector must be an immediate");
18462 return gen_reg_rtx (tmode);
18465 || GET_MODE (target) != tmode
18466 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18467 target = gen_reg_rtx (tmode);
18468 pat = GEN_FCN (icode) (target, op0, op1);
18474 case ARM_BUILTIN_TINSRB:
18475 case ARM_BUILTIN_TINSRH:
18476 case ARM_BUILTIN_TINSRW:
18477 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18478 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18479 : CODE_FOR_iwmmxt_tinsrw);
18480 arg0 = CALL_EXPR_ARG (exp, 0);
18481 arg1 = CALL_EXPR_ARG (exp, 1);
18482 arg2 = CALL_EXPR_ARG (exp, 2);
18483 op0 = expand_normal (arg0);
18484 op1 = expand_normal (arg1);
18485 op2 = expand_normal (arg2);
18486 tmode = insn_data[icode].operand[0].mode;
18487 mode0 = insn_data[icode].operand[1].mode;
18488 mode1 = insn_data[icode].operand[2].mode;
18489 mode2 = insn_data[icode].operand[3].mode;
18491 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18492 op0 = copy_to_mode_reg (mode0, op0);
18493 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18494 op1 = copy_to_mode_reg (mode1, op1);
18495 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18497 /* @@@ better error message */
18498 error ("selector must be an immediate");
18502 || GET_MODE (target) != tmode
18503 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18504 target = gen_reg_rtx (tmode);
18505 pat = GEN_FCN (icode) (target, op0, op1, op2);
18511 case ARM_BUILTIN_SETWCX:
18512 arg0 = CALL_EXPR_ARG (exp, 0);
18513 arg1 = CALL_EXPR_ARG (exp, 1);
18514 op0 = force_reg (SImode, expand_normal (arg0));
18515 op1 = expand_normal (arg1);
18516 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18519 case ARM_BUILTIN_GETWCX:
18520 arg0 = CALL_EXPR_ARG (exp, 0);
18521 op0 = expand_normal (arg0);
18522 target = gen_reg_rtx (SImode);
18523 emit_insn (gen_iwmmxt_tmrc (target, op0));
18526 case ARM_BUILTIN_WSHUFH:
18527 icode = CODE_FOR_iwmmxt_wshufh;
18528 arg0 = CALL_EXPR_ARG (exp, 0);
18529 arg1 = CALL_EXPR_ARG (exp, 1);
18530 op0 = expand_normal (arg0);
18531 op1 = expand_normal (arg1);
18532 tmode = insn_data[icode].operand[0].mode;
18533 mode1 = insn_data[icode].operand[1].mode;
18534 mode2 = insn_data[icode].operand[2].mode;
18536 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18537 op0 = copy_to_mode_reg (mode1, op0);
18538 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18540 /* @@@ better error message */
18541 error ("mask must be an immediate");
18545 || GET_MODE (target) != tmode
18546 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18547 target = gen_reg_rtx (tmode);
18548 pat = GEN_FCN (icode) (target, op0, op1);
18554 case ARM_BUILTIN_WSADB:
18555 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18556 case ARM_BUILTIN_WSADH:
18557 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18558 case ARM_BUILTIN_WSADBZ:
18559 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18560 case ARM_BUILTIN_WSADHZ:
18561 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18563 /* Several three-argument builtins. */
18564 case ARM_BUILTIN_WMACS:
18565 case ARM_BUILTIN_WMACU:
18566 case ARM_BUILTIN_WALIGN:
18567 case ARM_BUILTIN_TMIA:
18568 case ARM_BUILTIN_TMIAPH:
18569 case ARM_BUILTIN_TMIATT:
18570 case ARM_BUILTIN_TMIATB:
18571 case ARM_BUILTIN_TMIABT:
18572 case ARM_BUILTIN_TMIABB:
18573 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18574 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18575 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18576 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18577 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18578 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18579 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18580 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18581 : CODE_FOR_iwmmxt_walign);
18582 arg0 = CALL_EXPR_ARG (exp, 0);
18583 arg1 = CALL_EXPR_ARG (exp, 1);
18584 arg2 = CALL_EXPR_ARG (exp, 2);
18585 op0 = expand_normal (arg0);
18586 op1 = expand_normal (arg1);
18587 op2 = expand_normal (arg2);
18588 tmode = insn_data[icode].operand[0].mode;
18589 mode0 = insn_data[icode].operand[1].mode;
18590 mode1 = insn_data[icode].operand[2].mode;
18591 mode2 = insn_data[icode].operand[3].mode;
18593 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18594 op0 = copy_to_mode_reg (mode0, op0);
18595 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18596 op1 = copy_to_mode_reg (mode1, op1);
18597 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18598 op2 = copy_to_mode_reg (mode2, op2);
18600 || GET_MODE (target) != tmode
18601 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18602 target = gen_reg_rtx (tmode);
18603 pat = GEN_FCN (icode) (target, op0, op1, op2);
18609 case ARM_BUILTIN_WZERO:
18610 target = gen_reg_rtx (DImode);
18611 emit_insn (gen_iwmmxt_clrdi (target));
18614 case ARM_BUILTIN_THREAD_POINTER:
18615 return arm_load_tp (target);
18621 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18622 if (d->code == (const enum arm_builtins) fcode)
18623 return arm_expand_binop_builtin (d->icode, exp, target);
18625 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18626 if (d->code == (const enum arm_builtins) fcode)
18627 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18629 /* @@@ Should really do something sensible here. */
18633 /* Return the number (counting from 0) of
18634 the least significant set bit in MASK. */
18637 number_of_first_bit_set (unsigned mask)
18642 (mask & (1 << bit)) == 0;
18649 /* Emit code to push or pop registers to or from the stack. F is the
18650 assembly file. MASK is the registers to push or pop. PUSH is
18651 nonzero if we should push, and zero if we should pop. For debugging
18652 output, if pushing, adjust CFA_OFFSET by the amount of space added
18653 to the stack. REAL_REGS should have the same number of bits set as
18654 MASK, and will be used instead (in the same order) to describe which
18655 registers were saved - this is used to mark the save slots when we
18656 push high registers after moving them to low registers. */
18658 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18659 unsigned long real_regs)
18662 int lo_mask = mask & 0xFF;
18663 int pushed_words = 0;
18667 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18669 /* Special case. Do not generate a POP PC statement here, do it in
18671 thumb_exit (f, -1);
18675 if (ARM_EABI_UNWIND_TABLES && push)
18677 fprintf (f, "\t.save\t{");
18678 for (regno = 0; regno < 15; regno++)
18680 if (real_regs & (1 << regno))
18682 if (real_regs & ((1 << regno) -1))
18684 asm_fprintf (f, "%r", regno);
18687 fprintf (f, "}\n");
18690 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18692 /* Look at the low registers first. */
18693 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18697 asm_fprintf (f, "%r", regno);
18699 if ((lo_mask & ~1) != 0)
18706 if (push && (mask & (1 << LR_REGNUM)))
18708 /* Catch pushing the LR. */
18712 asm_fprintf (f, "%r", LR_REGNUM);
18716 else if (!push && (mask & (1 << PC_REGNUM)))
18718 /* Catch popping the PC. */
18719 if (TARGET_INTERWORK || TARGET_BACKTRACE
18720 || crtl->calls_eh_return)
18722 /* The PC is never poped directly, instead
18723 it is popped into r3 and then BX is used. */
18724 fprintf (f, "}\n");
18726 thumb_exit (f, -1);
18735 asm_fprintf (f, "%r", PC_REGNUM);
18739 fprintf (f, "}\n");
18741 if (push && pushed_words && dwarf2out_do_frame ())
18743 char *l = dwarf2out_cfi_label (false);
18744 int pushed_mask = real_regs;
18746 *cfa_offset += pushed_words * 4;
18747 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18750 pushed_mask = real_regs;
18751 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18753 if (pushed_mask & 1)
18754 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18759 /* Generate code to return from a thumb function.
18760 If 'reg_containing_return_addr' is -1, then the return address is
18761 actually on the stack, at the stack pointer. */
18763 thumb_exit (FILE *f, int reg_containing_return_addr)
18765 unsigned regs_available_for_popping;
18766 unsigned regs_to_pop;
18768 unsigned available;
18772 int restore_a4 = FALSE;
18774 /* Compute the registers we need to pop. */
18778 if (reg_containing_return_addr == -1)
18780 regs_to_pop |= 1 << LR_REGNUM;
18784 if (TARGET_BACKTRACE)
18786 /* Restore the (ARM) frame pointer and stack pointer. */
18787 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18791 /* If there is nothing to pop then just emit the BX instruction and
18793 if (pops_needed == 0)
18795 if (crtl->calls_eh_return)
18796 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18798 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18801 /* Otherwise if we are not supporting interworking and we have not created
18802 a backtrace structure and the function was not entered in ARM mode then
18803 just pop the return address straight into the PC. */
18804 else if (!TARGET_INTERWORK
18805 && !TARGET_BACKTRACE
18806 && !is_called_in_ARM_mode (current_function_decl)
18807 && !crtl->calls_eh_return)
18809 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18813 /* Find out how many of the (return) argument registers we can corrupt. */
18814 regs_available_for_popping = 0;
18816 /* If returning via __builtin_eh_return, the bottom three registers
18817 all contain information needed for the return. */
18818 if (crtl->calls_eh_return)
18822 /* If we can deduce the registers used from the function's
18823 return value. This is more reliable that examining
18824 df_regs_ever_live_p () because that will be set if the register is
18825 ever used in the function, not just if the register is used
18826 to hold a return value. */
18828 if (crtl->return_rtx != 0)
18829 mode = GET_MODE (crtl->return_rtx);
18831 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18833 size = GET_MODE_SIZE (mode);
18837 /* In a void function we can use any argument register.
18838 In a function that returns a structure on the stack
18839 we can use the second and third argument registers. */
18840 if (mode == VOIDmode)
18841 regs_available_for_popping =
18842 (1 << ARG_REGISTER (1))
18843 | (1 << ARG_REGISTER (2))
18844 | (1 << ARG_REGISTER (3));
18846 regs_available_for_popping =
18847 (1 << ARG_REGISTER (2))
18848 | (1 << ARG_REGISTER (3));
18850 else if (size <= 4)
18851 regs_available_for_popping =
18852 (1 << ARG_REGISTER (2))
18853 | (1 << ARG_REGISTER (3));
18854 else if (size <= 8)
18855 regs_available_for_popping =
18856 (1 << ARG_REGISTER (3));
18859 /* Match registers to be popped with registers into which we pop them. */
18860 for (available = regs_available_for_popping,
18861 required = regs_to_pop;
18862 required != 0 && available != 0;
18863 available &= ~(available & - available),
18864 required &= ~(required & - required))
18867 /* If we have any popping registers left over, remove them. */
18869 regs_available_for_popping &= ~available;
18871 /* Otherwise if we need another popping register we can use
18872 the fourth argument register. */
18873 else if (pops_needed)
18875 /* If we have not found any free argument registers and
18876 reg a4 contains the return address, we must move it. */
18877 if (regs_available_for_popping == 0
18878 && reg_containing_return_addr == LAST_ARG_REGNUM)
18880 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18881 reg_containing_return_addr = LR_REGNUM;
18883 else if (size > 12)
18885 /* Register a4 is being used to hold part of the return value,
18886 but we have dire need of a free, low register. */
18889 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18892 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18894 /* The fourth argument register is available. */
18895 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18901 /* Pop as many registers as we can. */
18902 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18903 regs_available_for_popping);
18905 /* Process the registers we popped. */
18906 if (reg_containing_return_addr == -1)
18908 /* The return address was popped into the lowest numbered register. */
18909 regs_to_pop &= ~(1 << LR_REGNUM);
18911 reg_containing_return_addr =
18912 number_of_first_bit_set (regs_available_for_popping);
18914 /* Remove this register for the mask of available registers, so that
18915 the return address will not be corrupted by further pops. */
18916 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18919 /* If we popped other registers then handle them here. */
18920 if (regs_available_for_popping)
18924 /* Work out which register currently contains the frame pointer. */
18925 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18927 /* Move it into the correct place. */
18928 asm_fprintf (f, "\tmov\t%r, %r\n",
18929 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18931 /* (Temporarily) remove it from the mask of popped registers. */
18932 regs_available_for_popping &= ~(1 << frame_pointer);
18933 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18935 if (regs_available_for_popping)
18939 /* We popped the stack pointer as well,
18940 find the register that contains it. */
18941 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18943 /* Move it into the stack register. */
18944 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18946 /* At this point we have popped all necessary registers, so
18947 do not worry about restoring regs_available_for_popping
18948 to its correct value:
18950 assert (pops_needed == 0)
18951 assert (regs_available_for_popping == (1 << frame_pointer))
18952 assert (regs_to_pop == (1 << STACK_POINTER)) */
18956 /* Since we have just move the popped value into the frame
18957 pointer, the popping register is available for reuse, and
18958 we know that we still have the stack pointer left to pop. */
18959 regs_available_for_popping |= (1 << frame_pointer);
18963 /* If we still have registers left on the stack, but we no longer have
18964 any registers into which we can pop them, then we must move the return
18965 address into the link register and make available the register that
18967 if (regs_available_for_popping == 0 && pops_needed > 0)
18969 regs_available_for_popping |= 1 << reg_containing_return_addr;
18971 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18972 reg_containing_return_addr);
18974 reg_containing_return_addr = LR_REGNUM;
18977 /* If we have registers left on the stack then pop some more.
18978 We know that at most we will want to pop FP and SP. */
18979 if (pops_needed > 0)
18984 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18985 regs_available_for_popping);
18987 /* We have popped either FP or SP.
18988 Move whichever one it is into the correct register. */
18989 popped_into = number_of_first_bit_set (regs_available_for_popping);
18990 move_to = number_of_first_bit_set (regs_to_pop);
18992 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18994 regs_to_pop &= ~(1 << move_to);
18999 /* If we still have not popped everything then we must have only
19000 had one register available to us and we are now popping the SP. */
19001 if (pops_needed > 0)
19005 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19006 regs_available_for_popping);
19008 popped_into = number_of_first_bit_set (regs_available_for_popping);
19010 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19012 assert (regs_to_pop == (1 << STACK_POINTER))
19013 assert (pops_needed == 1)
19017 /* If necessary restore the a4 register. */
19020 if (reg_containing_return_addr != LR_REGNUM)
19022 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19023 reg_containing_return_addr = LR_REGNUM;
19026 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19029 if (crtl->calls_eh_return)
19030 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19032 /* Return to caller. */
19033 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19038 thumb1_final_prescan_insn (rtx insn)
19040 if (flag_print_asm_name)
19041 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19042 INSN_ADDRESSES (INSN_UID (insn)));
19046 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19048 unsigned HOST_WIDE_INT mask = 0xff;
19051 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19052 if (val == 0) /* XXX */
19055 for (i = 0; i < 25; i++)
19056 if ((val & (mask << i)) == val)
19062 /* Returns nonzero if the current function contains,
19063 or might contain a far jump. */
19065 thumb_far_jump_used_p (void)
19069 /* This test is only important for leaf functions. */
19070 /* assert (!leaf_function_p ()); */
19072 /* If we have already decided that far jumps may be used,
19073 do not bother checking again, and always return true even if
19074 it turns out that they are not being used. Once we have made
19075 the decision that far jumps are present (and that hence the link
19076 register will be pushed onto the stack) we cannot go back on it. */
19077 if (cfun->machine->far_jump_used)
19080 /* If this function is not being called from the prologue/epilogue
19081 generation code then it must be being called from the
19082 INITIAL_ELIMINATION_OFFSET macro. */
19083 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19085 /* In this case we know that we are being asked about the elimination
19086 of the arg pointer register. If that register is not being used,
19087 then there are no arguments on the stack, and we do not have to
19088 worry that a far jump might force the prologue to push the link
19089 register, changing the stack offsets. In this case we can just
19090 return false, since the presence of far jumps in the function will
19091 not affect stack offsets.
19093 If the arg pointer is live (or if it was live, but has now been
19094 eliminated and so set to dead) then we do have to test to see if
19095 the function might contain a far jump. This test can lead to some
19096 false negatives, since before reload is completed, then length of
19097 branch instructions is not known, so gcc defaults to returning their
19098 longest length, which in turn sets the far jump attribute to true.
19100 A false negative will not result in bad code being generated, but it
19101 will result in a needless push and pop of the link register. We
19102 hope that this does not occur too often.
19104 If we need doubleword stack alignment this could affect the other
19105 elimination offsets so we can't risk getting it wrong. */
19106 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19107 cfun->machine->arg_pointer_live = 1;
19108 else if (!cfun->machine->arg_pointer_live)
19112 /* Check to see if the function contains a branch
19113 insn with the far jump attribute set. */
19114 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19116 if (GET_CODE (insn) == JUMP_INSN
19117 /* Ignore tablejump patterns. */
19118 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19119 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19120 && get_attr_far_jump (insn) == FAR_JUMP_YES
19123 /* Record the fact that we have decided that
19124 the function does use far jumps. */
19125 cfun->machine->far_jump_used = 1;
19133 /* Return nonzero if FUNC must be entered in ARM mode. */
19135 is_called_in_ARM_mode (tree func)
19137 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19139 /* Ignore the problem about functions whose address is taken. */
19140 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19144 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19150 /* The bits which aren't usefully expanded as rtl. */
19152 thumb_unexpanded_epilogue (void)
19154 arm_stack_offsets *offsets;
19156 unsigned long live_regs_mask = 0;
19157 int high_regs_pushed = 0;
19158 int had_to_push_lr;
19161 if (cfun->machine->return_used_this_function != 0)
19164 if (IS_NAKED (arm_current_func_type ()))
19167 offsets = arm_get_frame_offsets ();
19168 live_regs_mask = offsets->saved_regs_mask;
19169 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19171 /* If we can deduce the registers used from the function's return value.
19172 This is more reliable that examining df_regs_ever_live_p () because that
19173 will be set if the register is ever used in the function, not just if
19174 the register is used to hold a return value. */
19175 size = arm_size_return_regs ();
19177 /* The prolog may have pushed some high registers to use as
19178 work registers. e.g. the testsuite file:
19179 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19180 compiles to produce:
19181 push {r4, r5, r6, r7, lr}
19185 as part of the prolog. We have to undo that pushing here. */
19187 if (high_regs_pushed)
19189 unsigned long mask = live_regs_mask & 0xff;
19192 /* The available low registers depend on the size of the value we are
19200 /* Oh dear! We have no low registers into which we can pop
19203 ("no low registers available for popping high registers");
19205 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19206 if (live_regs_mask & (1 << next_hi_reg))
19209 while (high_regs_pushed)
19211 /* Find lo register(s) into which the high register(s) can
19213 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19215 if (mask & (1 << regno))
19216 high_regs_pushed--;
19217 if (high_regs_pushed == 0)
19221 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19223 /* Pop the values into the low register(s). */
19224 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19226 /* Move the value(s) into the high registers. */
19227 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19229 if (mask & (1 << regno))
19231 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19234 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19235 if (live_regs_mask & (1 << next_hi_reg))
19240 live_regs_mask &= ~0x0f00;
19243 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19244 live_regs_mask &= 0xff;
19246 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19248 /* Pop the return address into the PC. */
19249 if (had_to_push_lr)
19250 live_regs_mask |= 1 << PC_REGNUM;
19252 /* Either no argument registers were pushed or a backtrace
19253 structure was created which includes an adjusted stack
19254 pointer, so just pop everything. */
19255 if (live_regs_mask)
19256 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19259 /* We have either just popped the return address into the
19260 PC or it is was kept in LR for the entire function. */
19261 if (!had_to_push_lr)
19262 thumb_exit (asm_out_file, LR_REGNUM);
19266 /* Pop everything but the return address. */
19267 if (live_regs_mask)
19268 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19271 if (had_to_push_lr)
19275 /* We have no free low regs, so save one. */
19276 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19280 /* Get the return address into a temporary register. */
19281 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19282 1 << LAST_ARG_REGNUM);
19286 /* Move the return address to lr. */
19287 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19289 /* Restore the low register. */
19290 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19295 regno = LAST_ARG_REGNUM;
19300 /* Remove the argument registers that were pushed onto the stack. */
19301 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19302 SP_REGNUM, SP_REGNUM,
19303 crtl->args.pretend_args_size);
19305 thumb_exit (asm_out_file, regno);
19311 /* Functions to save and restore machine-specific function data. */
19312 static struct machine_function *
19313 arm_init_machine_status (void)
19315 struct machine_function *machine;
19316 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19318 #if ARM_FT_UNKNOWN != 0
19319 machine->func_type = ARM_FT_UNKNOWN;
19324 /* Return an RTX indicating where the return address to the
19325 calling function can be found. */
19327 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19332 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19335 /* Do anything needed before RTL is emitted for each function. */
19337 arm_init_expanders (void)
19339 /* Arrange to initialize and mark the machine per-function status. */
19340 init_machine_status = arm_init_machine_status;
19342 /* This is to stop the combine pass optimizing away the alignment
19343 adjustment of va_arg. */
19344 /* ??? It is claimed that this should not be necessary. */
19346 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19350 /* Like arm_compute_initial_elimination offset. Simpler because there
19351 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19352 to point at the base of the local variables after static stack
19353 space for a function has been allocated. */
19356 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19358 arm_stack_offsets *offsets;
19360 offsets = arm_get_frame_offsets ();
19364 case ARG_POINTER_REGNUM:
19367 case STACK_POINTER_REGNUM:
19368 return offsets->outgoing_args - offsets->saved_args;
19370 case FRAME_POINTER_REGNUM:
19371 return offsets->soft_frame - offsets->saved_args;
19373 case ARM_HARD_FRAME_POINTER_REGNUM:
19374 return offsets->saved_regs - offsets->saved_args;
19376 case THUMB_HARD_FRAME_POINTER_REGNUM:
19377 return offsets->locals_base - offsets->saved_args;
19380 gcc_unreachable ();
19384 case FRAME_POINTER_REGNUM:
19387 case STACK_POINTER_REGNUM:
19388 return offsets->outgoing_args - offsets->soft_frame;
19390 case ARM_HARD_FRAME_POINTER_REGNUM:
19391 return offsets->saved_regs - offsets->soft_frame;
19393 case THUMB_HARD_FRAME_POINTER_REGNUM:
19394 return offsets->locals_base - offsets->soft_frame;
19397 gcc_unreachable ();
19402 gcc_unreachable ();
19406 /* Given the stack offsets and register mask in OFFSETS, decide
19407 how many additional registers to push instead of subtracting
19408 a constant from SP. */
19410 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19412 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19413 unsigned long live_regs_mask = offsets->saved_regs_mask;
19414 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19415 unsigned long l_mask = live_regs_mask & 0x40ff;
19416 /* Then count how many other high registers will need to be pushed. */
19417 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19420 /* If the stack frame size is 512 exactly, we can save one load
19421 instruction, which should make this a win even when optimizing
19423 if (!optimize_size && amount != 512)
19426 /* Can't do this if there are high registers to push, or if we
19427 are not going to do a push at all. */
19428 if (high_regs_pushed != 0 || l_mask == 0)
19431 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19432 between the push and the stack frame allocation. */
19433 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19434 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19437 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19442 gcc_assert (amount / 4 * 4 == amount);
19444 if (amount >= 512 && (amount - n_free * 4) < 512)
19445 return (amount - 508) / 4;
19446 if (amount <= n_free * 4)
19451 /* Generate the rest of a function's prologue. */
19453 thumb1_expand_prologue (void)
19457 HOST_WIDE_INT amount;
19458 arm_stack_offsets *offsets;
19459 unsigned long func_type;
19461 unsigned long live_regs_mask;
19463 func_type = arm_current_func_type ();
19465 /* Naked functions don't have prologues. */
19466 if (IS_NAKED (func_type))
19469 if (IS_INTERRUPT (func_type))
19471 error ("interrupt Service Routines cannot be coded in Thumb mode");
19475 offsets = arm_get_frame_offsets ();
19476 live_regs_mask = offsets->saved_regs_mask;
19477 /* Load the pic register before setting the frame pointer,
19478 so we can use r7 as a temporary work register. */
19479 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19480 arm_load_pic_register (live_regs_mask);
19482 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19483 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19484 stack_pointer_rtx);
19486 amount = offsets->outgoing_args - offsets->saved_regs;
19487 amount -= 4 * thumb1_extra_regs_pushed (offsets);
19492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19493 GEN_INT (- amount)));
19494 RTX_FRAME_RELATED_P (insn) = 1;
19500 /* The stack decrement is too big for an immediate value in a single
19501 insn. In theory we could issue multiple subtracts, but after
19502 three of them it becomes more space efficient to place the full
19503 value in the constant pool and load into a register. (Also the
19504 ARM debugger really likes to see only one stack decrement per
19505 function). So instead we look for a scratch register into which
19506 we can load the decrement, and then we subtract this from the
19507 stack pointer. Unfortunately on the thumb the only available
19508 scratch registers are the argument registers, and we cannot use
19509 these as they may hold arguments to the function. Instead we
19510 attempt to locate a call preserved register which is used by this
19511 function. If we can find one, then we know that it will have
19512 been pushed at the start of the prologue and so we can corrupt
19514 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19515 if (live_regs_mask & (1 << regno))
19518 gcc_assert(regno <= LAST_LO_REGNUM);
19520 reg = gen_rtx_REG (SImode, regno);
19522 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19524 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19525 stack_pointer_rtx, reg));
19526 RTX_FRAME_RELATED_P (insn) = 1;
19527 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19528 plus_constant (stack_pointer_rtx,
19530 RTX_FRAME_RELATED_P (dwarf) = 1;
19531 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19535 if (frame_pointer_needed)
19536 thumb_set_frame_pointer (offsets);
19538 /* If we are profiling, make sure no instructions are scheduled before
19539 the call to mcount. Similarly if the user has requested no
19540 scheduling in the prolog. Similarly if we want non-call exceptions
19541 using the EABI unwinder, to prevent faulting instructions from being
19542 swapped with a stack adjustment. */
19543 if (crtl->profile || !TARGET_SCHED_PROLOG
19544 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19545 emit_insn (gen_blockage ());
19547 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19548 if (live_regs_mask & 0xff)
19549 cfun->machine->lr_save_eliminated = 0;
19554 thumb1_expand_epilogue (void)
19556 HOST_WIDE_INT amount;
19557 arm_stack_offsets *offsets;
19560 /* Naked functions don't have prologues. */
19561 if (IS_NAKED (arm_current_func_type ()))
19564 offsets = arm_get_frame_offsets ();
19565 amount = offsets->outgoing_args - offsets->saved_regs;
19567 if (frame_pointer_needed)
19569 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19570 amount = offsets->locals_base - offsets->saved_regs;
19573 gcc_assert (amount >= 0);
19577 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19578 GEN_INT (amount)));
19581 /* r3 is always free in the epilogue. */
19582 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19584 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19585 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19589 /* Emit a USE (stack_pointer_rtx), so that
19590 the stack adjustment will not be deleted. */
19591 emit_insn (gen_prologue_use (stack_pointer_rtx));
19593 if (crtl->profile || !TARGET_SCHED_PROLOG)
19594 emit_insn (gen_blockage ());
19596 /* Emit a clobber for each insn that will be restored in the epilogue,
19597 so that flow2 will get register lifetimes correct. */
19598 for (regno = 0; regno < 13; regno++)
19599 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19600 emit_clobber (gen_rtx_REG (SImode, regno));
19602 if (! df_regs_ever_live_p (LR_REGNUM))
19603 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19607 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19609 arm_stack_offsets *offsets;
19610 unsigned long live_regs_mask = 0;
19611 unsigned long l_mask;
19612 unsigned high_regs_pushed = 0;
19613 int cfa_offset = 0;
19616 if (IS_NAKED (arm_current_func_type ()))
19619 if (is_called_in_ARM_mode (current_function_decl))
19623 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19624 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19626 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19628 /* Generate code sequence to switch us into Thumb mode. */
19629 /* The .code 32 directive has already been emitted by
19630 ASM_DECLARE_FUNCTION_NAME. */
19631 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19632 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19634 /* Generate a label, so that the debugger will notice the
19635 change in instruction sets. This label is also used by
19636 the assembler to bypass the ARM code when this function
19637 is called from a Thumb encoded function elsewhere in the
19638 same file. Hence the definition of STUB_NAME here must
19639 agree with the definition in gas/config/tc-arm.c. */
19641 #define STUB_NAME ".real_start_of"
19643 fprintf (f, "\t.code\t16\n");
19645 if (arm_dllexport_name_p (name))
19646 name = arm_strip_name_encoding (name);
19648 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19649 fprintf (f, "\t.thumb_func\n");
19650 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19653 if (crtl->args.pretend_args_size)
19655 /* Output unwind directive for the stack adjustment. */
19656 if (ARM_EABI_UNWIND_TABLES)
19657 fprintf (f, "\t.pad #%d\n",
19658 crtl->args.pretend_args_size);
19660 if (cfun->machine->uses_anonymous_args)
19664 fprintf (f, "\tpush\t{");
19666 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19668 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19669 regno <= LAST_ARG_REGNUM;
19671 asm_fprintf (f, "%r%s", regno,
19672 regno == LAST_ARG_REGNUM ? "" : ", ");
19674 fprintf (f, "}\n");
19677 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19678 SP_REGNUM, SP_REGNUM,
19679 crtl->args.pretend_args_size);
19681 /* We don't need to record the stores for unwinding (would it
19682 help the debugger any if we did?), but record the change in
19683 the stack pointer. */
19684 if (dwarf2out_do_frame ())
19686 char *l = dwarf2out_cfi_label (false);
19688 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19689 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19693 /* Get the registers we are going to push. */
19694 offsets = arm_get_frame_offsets ();
19695 live_regs_mask = offsets->saved_regs_mask;
19696 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19697 l_mask = live_regs_mask & 0x40ff;
19698 /* Then count how many other high registers will need to be pushed. */
19699 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19701 if (TARGET_BACKTRACE)
19704 unsigned work_register;
19706 /* We have been asked to create a stack backtrace structure.
19707 The code looks like this:
19711 0 sub SP, #16 Reserve space for 4 registers.
19712 2 push {R7} Push low registers.
19713 4 add R7, SP, #20 Get the stack pointer before the push.
19714 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19715 8 mov R7, PC Get hold of the start of this code plus 12.
19716 10 str R7, [SP, #16] Store it.
19717 12 mov R7, FP Get hold of the current frame pointer.
19718 14 str R7, [SP, #4] Store it.
19719 16 mov R7, LR Get hold of the current return address.
19720 18 str R7, [SP, #12] Store it.
19721 20 add R7, SP, #16 Point at the start of the backtrace structure.
19722 22 mov FP, R7 Put this value into the frame pointer. */
19724 work_register = thumb_find_work_register (live_regs_mask);
19726 if (ARM_EABI_UNWIND_TABLES)
19727 asm_fprintf (f, "\t.pad #16\n");
19730 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19731 SP_REGNUM, SP_REGNUM);
19733 if (dwarf2out_do_frame ())
19735 char *l = dwarf2out_cfi_label (false);
19737 cfa_offset = cfa_offset + 16;
19738 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19743 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19744 offset = bit_count (l_mask) * UNITS_PER_WORD;
19749 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19750 offset + 16 + crtl->args.pretend_args_size);
19752 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19755 /* Make sure that the instruction fetching the PC is in the right place
19756 to calculate "start of backtrace creation code + 12". */
19759 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19760 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19762 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19763 ARM_HARD_FRAME_POINTER_REGNUM);
19764 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19769 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19770 ARM_HARD_FRAME_POINTER_REGNUM);
19771 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19773 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19774 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19778 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19779 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19781 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19783 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19784 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19786 /* Optimization: If we are not pushing any low registers but we are going
19787 to push some high registers then delay our first push. This will just
19788 be a push of LR and we can combine it with the push of the first high
19790 else if ((l_mask & 0xff) != 0
19791 || (high_regs_pushed == 0 && l_mask))
19793 unsigned long mask = l_mask;
19794 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
19795 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
19798 if (high_regs_pushed)
19800 unsigned pushable_regs;
19801 unsigned next_hi_reg;
19803 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19804 if (live_regs_mask & (1 << next_hi_reg))
19807 pushable_regs = l_mask & 0xff;
19809 if (pushable_regs == 0)
19810 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19812 while (high_regs_pushed > 0)
19814 unsigned long real_regs_mask = 0;
19816 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19818 if (pushable_regs & (1 << regno))
19820 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19822 high_regs_pushed --;
19823 real_regs_mask |= (1 << next_hi_reg);
19825 if (high_regs_pushed)
19827 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19829 if (live_regs_mask & (1 << next_hi_reg))
19834 pushable_regs &= ~((1 << regno) - 1);
19840 /* If we had to find a work register and we have not yet
19841 saved the LR then add it to the list of regs to push. */
19842 if (l_mask == (1 << LR_REGNUM))
19844 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19846 real_regs_mask | (1 << LR_REGNUM));
19850 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19855 /* Handle the case of a double word load into a low register from
19856 a computed memory address. The computed address may involve a
19857 register which is overwritten by the load. */
19859 thumb_load_double_from_address (rtx *operands)
19867 gcc_assert (GET_CODE (operands[0]) == REG);
19868 gcc_assert (GET_CODE (operands[1]) == MEM);
19870 /* Get the memory address. */
19871 addr = XEXP (operands[1], 0);
19873 /* Work out how the memory address is computed. */
19874 switch (GET_CODE (addr))
19877 operands[2] = adjust_address (operands[1], SImode, 4);
19879 if (REGNO (operands[0]) == REGNO (addr))
19881 output_asm_insn ("ldr\t%H0, %2", operands);
19882 output_asm_insn ("ldr\t%0, %1", operands);
19886 output_asm_insn ("ldr\t%0, %1", operands);
19887 output_asm_insn ("ldr\t%H0, %2", operands);
19892 /* Compute <address> + 4 for the high order load. */
19893 operands[2] = adjust_address (operands[1], SImode, 4);
19895 output_asm_insn ("ldr\t%0, %1", operands);
19896 output_asm_insn ("ldr\t%H0, %2", operands);
19900 arg1 = XEXP (addr, 0);
19901 arg2 = XEXP (addr, 1);
19903 if (CONSTANT_P (arg1))
19904 base = arg2, offset = arg1;
19906 base = arg1, offset = arg2;
19908 gcc_assert (GET_CODE (base) == REG);
19910 /* Catch the case of <address> = <reg> + <reg> */
19911 if (GET_CODE (offset) == REG)
19913 int reg_offset = REGNO (offset);
19914 int reg_base = REGNO (base);
19915 int reg_dest = REGNO (operands[0]);
19917 /* Add the base and offset registers together into the
19918 higher destination register. */
19919 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19920 reg_dest + 1, reg_base, reg_offset);
19922 /* Load the lower destination register from the address in
19923 the higher destination register. */
19924 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19925 reg_dest, reg_dest + 1);
19927 /* Load the higher destination register from its own address
19929 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19930 reg_dest + 1, reg_dest + 1);
19934 /* Compute <address> + 4 for the high order load. */
19935 operands[2] = adjust_address (operands[1], SImode, 4);
19937 /* If the computed address is held in the low order register
19938 then load the high order register first, otherwise always
19939 load the low order register first. */
19940 if (REGNO (operands[0]) == REGNO (base))
19942 output_asm_insn ("ldr\t%H0, %2", operands);
19943 output_asm_insn ("ldr\t%0, %1", operands);
19947 output_asm_insn ("ldr\t%0, %1", operands);
19948 output_asm_insn ("ldr\t%H0, %2", operands);
19954 /* With no registers to worry about we can just load the value
19956 operands[2] = adjust_address (operands[1], SImode, 4);
19958 output_asm_insn ("ldr\t%H0, %2", operands);
19959 output_asm_insn ("ldr\t%0, %1", operands);
19963 gcc_unreachable ();
19970 thumb_output_move_mem_multiple (int n, rtx *operands)
19977 if (REGNO (operands[4]) > REGNO (operands[5]))
19980 operands[4] = operands[5];
19983 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19984 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19988 if (REGNO (operands[4]) > REGNO (operands[5]))
19991 operands[4] = operands[5];
19994 if (REGNO (operands[5]) > REGNO (operands[6]))
19997 operands[5] = operands[6];
20000 if (REGNO (operands[4]) > REGNO (operands[5]))
20003 operands[4] = operands[5];
20007 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20008 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20012 gcc_unreachable ();
20018 /* Output a call-via instruction for thumb state. */
20020 thumb_call_via_reg (rtx reg)
20022 int regno = REGNO (reg);
20025 gcc_assert (regno < LR_REGNUM);
20027 /* If we are in the normal text section we can use a single instance
20028 per compilation unit. If we are doing function sections, then we need
20029 an entry per section, since we can't rely on reachability. */
20030 if (in_section == text_section)
20032 thumb_call_reg_needed = 1;
20034 if (thumb_call_via_label[regno] == NULL)
20035 thumb_call_via_label[regno] = gen_label_rtx ();
20036 labelp = thumb_call_via_label + regno;
20040 if (cfun->machine->call_via[regno] == NULL)
20041 cfun->machine->call_via[regno] = gen_label_rtx ();
20042 labelp = cfun->machine->call_via + regno;
20045 output_asm_insn ("bl\t%a0", labelp);
20049 /* Routines for generating rtl. */
20051 thumb_expand_movmemqi (rtx *operands)
20053 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20054 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20055 HOST_WIDE_INT len = INTVAL (operands[2]);
20056 HOST_WIDE_INT offset = 0;
20060 emit_insn (gen_movmem12b (out, in, out, in));
20066 emit_insn (gen_movmem8b (out, in, out, in));
20072 rtx reg = gen_reg_rtx (SImode);
20073 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20074 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20081 rtx reg = gen_reg_rtx (HImode);
20082 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20083 plus_constant (in, offset))));
20084 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20092 rtx reg = gen_reg_rtx (QImode);
20093 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20094 plus_constant (in, offset))));
20095 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20101 thumb_reload_out_hi (rtx *operands)
20103 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20106 /* Handle reading a half-word from memory during reload. */
20108 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20110 gcc_unreachable ();
20113 /* Return the length of a function name prefix
20114 that starts with the character 'c'. */
20116 arm_get_strip_length (int c)
20120 ARM_NAME_ENCODING_LENGTHS
20125 /* Return a pointer to a function's name with any
20126 and all prefix encodings stripped from it. */
20128 arm_strip_name_encoding (const char *name)
20132 while ((skip = arm_get_strip_length (* name)))
20138 /* If there is a '*' anywhere in the name's prefix, then
20139 emit the stripped name verbatim, otherwise prepend an
20140 underscore if leading underscores are being used. */
20142 arm_asm_output_labelref (FILE *stream, const char *name)
20147 while ((skip = arm_get_strip_length (* name)))
20149 verbatim |= (*name == '*');
20154 fputs (name, stream);
20156 asm_fprintf (stream, "%U%s", name);
20160 arm_file_start (void)
20164 if (TARGET_UNIFIED_ASM)
20165 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20169 const char *fpu_name;
20170 if (arm_select[0].string)
20171 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
20172 else if (arm_select[1].string)
20173 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
20175 asm_fprintf (asm_out_file, "\t.cpu %s\n",
20176 all_cores[arm_default_cpu].name);
20178 if (TARGET_SOFT_FLOAT)
20181 fpu_name = "softvfp";
20183 fpu_name = "softfpa";
20187 fpu_name = arm_fpu_desc->name;
20188 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20190 if (TARGET_HARD_FLOAT)
20191 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20192 if (TARGET_HARD_FLOAT_ABI)
20193 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20196 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20198 /* Some of these attributes only apply when the corresponding features
20199 are used. However we don't have any easy way of figuring this out.
20200 Conservatively record the setting that would have been used. */
20202 /* Tag_ABI_FP_rounding. */
20203 if (flag_rounding_math)
20204 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20205 if (!flag_unsafe_math_optimizations)
20207 /* Tag_ABI_FP_denomal. */
20208 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20209 /* Tag_ABI_FP_exceptions. */
20210 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20212 /* Tag_ABI_FP_user_exceptions. */
20213 if (flag_signaling_nans)
20214 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20215 /* Tag_ABI_FP_number_model. */
20216 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20217 flag_finite_math_only ? 1 : 3);
20219 /* Tag_ABI_align8_needed. */
20220 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20221 /* Tag_ABI_align8_preserved. */
20222 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20223 /* Tag_ABI_enum_size. */
20224 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20225 flag_short_enums ? 1 : 2);
20227 /* Tag_ABI_optimization_goals. */
20230 else if (optimize >= 2)
20236 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20238 /* Tag_ABI_FP_16bit_format. */
20239 if (arm_fp16_format)
20240 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20241 (int)arm_fp16_format);
20243 if (arm_lang_output_object_attributes_hook)
20244 arm_lang_output_object_attributes_hook();
20246 default_file_start();
20250 arm_file_end (void)
20254 if (NEED_INDICATE_EXEC_STACK)
20255 /* Add .note.GNU-stack. */
20256 file_end_indicate_exec_stack ();
20258 if (! thumb_call_reg_needed)
20261 switch_to_section (text_section);
20262 asm_fprintf (asm_out_file, "\t.code 16\n");
20263 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20265 for (regno = 0; regno < LR_REGNUM; regno++)
20267 rtx label = thumb_call_via_label[regno];
20271 targetm.asm_out.internal_label (asm_out_file, "L",
20272 CODE_LABEL_NUMBER (label));
20273 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20279 /* Symbols in the text segment can be accessed without indirecting via the
20280 constant pool; it may take an extra binary operation, but this is still
20281 faster than indirecting via memory. Don't do this when not optimizing,
20282 since we won't be calculating al of the offsets necessary to do this
20286 arm_encode_section_info (tree decl, rtx rtl, int first)
20288 if (optimize > 0 && TREE_CONSTANT (decl))
20289 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20291 default_encode_section_info (decl, rtl, first);
20293 #endif /* !ARM_PE */
20296 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20298 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20299 && !strcmp (prefix, "L"))
20301 arm_ccfsm_state = 0;
20302 arm_target_insn = NULL;
20304 default_internal_label (stream, prefix, labelno);
20307 /* Output code to add DELTA to the first argument, and then jump
20308 to FUNCTION. Used for C++ multiple inheritance. */
20310 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20311 HOST_WIDE_INT delta,
20312 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20315 static int thunk_label = 0;
20318 int mi_delta = delta;
20319 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20321 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20324 mi_delta = - mi_delta;
20328 int labelno = thunk_label++;
20329 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20330 /* Thunks are entered in arm mode when avaiable. */
20331 if (TARGET_THUMB1_ONLY)
20333 /* push r3 so we can use it as a temporary. */
20334 /* TODO: Omit this save if r3 is not used. */
20335 fputs ("\tpush {r3}\n", file);
20336 fputs ("\tldr\tr3, ", file);
20340 fputs ("\tldr\tr12, ", file);
20342 assemble_name (file, label);
20343 fputc ('\n', file);
20346 /* If we are generating PIC, the ldr instruction below loads
20347 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20348 the address of the add + 8, so we have:
20350 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20353 Note that we have "+ 1" because some versions of GNU ld
20354 don't set the low bit of the result for R_ARM_REL32
20355 relocations against thumb function symbols.
20356 On ARMv6M this is +4, not +8. */
20357 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20358 assemble_name (file, labelpc);
20359 fputs (":\n", file);
20360 if (TARGET_THUMB1_ONLY)
20362 /* This is 2 insns after the start of the thunk, so we know it
20363 is 4-byte aligned. */
20364 fputs ("\tadd\tr3, pc, r3\n", file);
20365 fputs ("\tmov r12, r3\n", file);
20368 fputs ("\tadd\tr12, pc, r12\n", file);
20370 else if (TARGET_THUMB1_ONLY)
20371 fputs ("\tmov r12, r3\n", file);
20373 if (TARGET_THUMB1_ONLY)
20375 if (mi_delta > 255)
20377 fputs ("\tldr\tr3, ", file);
20378 assemble_name (file, label);
20379 fputs ("+4\n", file);
20380 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20381 mi_op, this_regno, this_regno);
20383 else if (mi_delta != 0)
20385 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20386 mi_op, this_regno, this_regno,
20392 /* TODO: Use movw/movt for large constants when available. */
20393 while (mi_delta != 0)
20395 if ((mi_delta & (3 << shift)) == 0)
20399 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20400 mi_op, this_regno, this_regno,
20401 mi_delta & (0xff << shift));
20402 mi_delta &= ~(0xff << shift);
20409 if (TARGET_THUMB1_ONLY)
20410 fputs ("\tpop\t{r3}\n", file);
20412 fprintf (file, "\tbx\tr12\n");
20413 ASM_OUTPUT_ALIGN (file, 2);
20414 assemble_name (file, label);
20415 fputs (":\n", file);
20418 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20419 rtx tem = XEXP (DECL_RTL (function), 0);
20420 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20421 tem = gen_rtx_MINUS (GET_MODE (tem),
20423 gen_rtx_SYMBOL_REF (Pmode,
20424 ggc_strdup (labelpc)));
20425 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20428 /* Output ".word .LTHUNKn". */
20429 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20431 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20432 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20436 fputs ("\tb\t", file);
20437 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20438 if (NEED_PLT_RELOC)
20439 fputs ("(PLT)", file);
20440 fputc ('\n', file);
20445 arm_emit_vector_const (FILE *file, rtx x)
20448 const char * pattern;
20450 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20452 switch (GET_MODE (x))
20454 case V2SImode: pattern = "%08x"; break;
20455 case V4HImode: pattern = "%04x"; break;
20456 case V8QImode: pattern = "%02x"; break;
20457 default: gcc_unreachable ();
20460 fprintf (file, "0x");
20461 for (i = CONST_VECTOR_NUNITS (x); i--;)
20465 element = CONST_VECTOR_ELT (x, i);
20466 fprintf (file, pattern, INTVAL (element));
20472 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20473 HFmode constant pool entries are actually loaded with ldr. */
20475 arm_emit_fp16_const (rtx c)
20480 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20481 bits = real_to_target (NULL, &r, HFmode);
20482 if (WORDS_BIG_ENDIAN)
20483 assemble_zeros (2);
20484 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20485 if (!WORDS_BIG_ENDIAN)
20486 assemble_zeros (2);
20490 arm_output_load_gr (rtx *operands)
20497 if (GET_CODE (operands [1]) != MEM
20498 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20499 || GET_CODE (reg = XEXP (sum, 0)) != REG
20500 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20501 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20502 return "wldrw%?\t%0, %1";
20504 /* Fix up an out-of-range load of a GR register. */
20505 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20506 wcgr = operands[0];
20508 output_asm_insn ("ldr%?\t%0, %1", operands);
20510 operands[0] = wcgr;
20512 output_asm_insn ("tmcr%?\t%0, %1", operands);
20513 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20518 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20520 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20521 named arg and all anonymous args onto the stack.
20522 XXX I know the prologue shouldn't be pushing registers, but it is faster
20526 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20527 enum machine_mode mode,
20530 int second_time ATTRIBUTE_UNUSED)
20534 cfun->machine->uses_anonymous_args = 1;
20535 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20537 nregs = pcum->aapcs_ncrn;
20538 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20542 nregs = pcum->nregs;
20544 if (nregs < NUM_ARG_REGS)
20545 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20548 /* Return nonzero if the CONSUMER instruction (a store) does not need
20549 PRODUCER's value to calculate the address. */
20552 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20554 rtx value = PATTERN (producer);
20555 rtx addr = PATTERN (consumer);
20557 if (GET_CODE (value) == COND_EXEC)
20558 value = COND_EXEC_CODE (value);
20559 if (GET_CODE (value) == PARALLEL)
20560 value = XVECEXP (value, 0, 0);
20561 value = XEXP (value, 0);
20562 if (GET_CODE (addr) == COND_EXEC)
20563 addr = COND_EXEC_CODE (addr);
20564 if (GET_CODE (addr) == PARALLEL)
20565 addr = XVECEXP (addr, 0, 0);
20566 addr = XEXP (addr, 0);
20568 return !reg_overlap_mentioned_p (value, addr);
20571 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20572 have an early register shift value or amount dependency on the
20573 result of PRODUCER. */
20576 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20578 rtx value = PATTERN (producer);
20579 rtx op = PATTERN (consumer);
20582 if (GET_CODE (value) == COND_EXEC)
20583 value = COND_EXEC_CODE (value);
20584 if (GET_CODE (value) == PARALLEL)
20585 value = XVECEXP (value, 0, 0);
20586 value = XEXP (value, 0);
20587 if (GET_CODE (op) == COND_EXEC)
20588 op = COND_EXEC_CODE (op);
20589 if (GET_CODE (op) == PARALLEL)
20590 op = XVECEXP (op, 0, 0);
20593 early_op = XEXP (op, 0);
20594 /* This is either an actual independent shift, or a shift applied to
20595 the first operand of another operation. We want the whole shift
20597 if (GET_CODE (early_op) == REG)
20600 return !reg_overlap_mentioned_p (value, early_op);
20603 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20604 have an early register shift value dependency on the result of
20608 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20610 rtx value = PATTERN (producer);
20611 rtx op = PATTERN (consumer);
20614 if (GET_CODE (value) == COND_EXEC)
20615 value = COND_EXEC_CODE (value);
20616 if (GET_CODE (value) == PARALLEL)
20617 value = XVECEXP (value, 0, 0);
20618 value = XEXP (value, 0);
20619 if (GET_CODE (op) == COND_EXEC)
20620 op = COND_EXEC_CODE (op);
20621 if (GET_CODE (op) == PARALLEL)
20622 op = XVECEXP (op, 0, 0);
20625 early_op = XEXP (op, 0);
20627 /* This is either an actual independent shift, or a shift applied to
20628 the first operand of another operation. We want the value being
20629 shifted, in either case. */
20630 if (GET_CODE (early_op) != REG)
20631 early_op = XEXP (early_op, 0);
20633 return !reg_overlap_mentioned_p (value, early_op);
20636 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20637 have an early register mult dependency on the result of
20641 arm_no_early_mul_dep (rtx producer, rtx consumer)
20643 rtx value = PATTERN (producer);
20644 rtx op = PATTERN (consumer);
20646 if (GET_CODE (value) == COND_EXEC)
20647 value = COND_EXEC_CODE (value);
20648 if (GET_CODE (value) == PARALLEL)
20649 value = XVECEXP (value, 0, 0);
20650 value = XEXP (value, 0);
20651 if (GET_CODE (op) == COND_EXEC)
20652 op = COND_EXEC_CODE (op);
20653 if (GET_CODE (op) == PARALLEL)
20654 op = XVECEXP (op, 0, 0);
20657 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20659 if (GET_CODE (XEXP (op, 0)) == MULT)
20660 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20662 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20668 /* We can't rely on the caller doing the proper promotion when
20669 using APCS or ATPCS. */
20672 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20674 return !TARGET_AAPCS_BASED;
20677 static enum machine_mode
20678 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20679 enum machine_mode mode,
20680 int *punsignedp ATTRIBUTE_UNUSED,
20681 const_tree fntype ATTRIBUTE_UNUSED,
20682 int for_return ATTRIBUTE_UNUSED)
20684 if (GET_MODE_CLASS (mode) == MODE_INT
20685 && GET_MODE_SIZE (mode) < 4)
20691 /* AAPCS based ABIs use short enums by default. */
20694 arm_default_short_enums (void)
20696 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20700 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20703 arm_align_anon_bitfield (void)
20705 return TARGET_AAPCS_BASED;
20709 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20712 arm_cxx_guard_type (void)
20714 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20717 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20718 has an accumulator dependency on the result of the producer (a
20719 multiplication instruction) and no other dependency on that result. */
20721 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20723 rtx mul = PATTERN (producer);
20724 rtx mac = PATTERN (consumer);
20726 rtx mac_op0, mac_op1, mac_acc;
20728 if (GET_CODE (mul) == COND_EXEC)
20729 mul = COND_EXEC_CODE (mul);
20730 if (GET_CODE (mac) == COND_EXEC)
20731 mac = COND_EXEC_CODE (mac);
20733 /* Check that mul is of the form (set (...) (mult ...))
20734 and mla is of the form (set (...) (plus (mult ...) (...))). */
20735 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20736 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20737 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20740 mul_result = XEXP (mul, 0);
20741 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20742 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20743 mac_acc = XEXP (XEXP (mac, 1), 1);
20745 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20746 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20747 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20751 /* The EABI says test the least significant bit of a guard variable. */
20754 arm_cxx_guard_mask_bit (void)
20756 return TARGET_AAPCS_BASED;
20760 /* The EABI specifies that all array cookies are 8 bytes long. */
20763 arm_get_cookie_size (tree type)
20767 if (!TARGET_AAPCS_BASED)
20768 return default_cxx_get_cookie_size (type);
20770 size = build_int_cst (sizetype, 8);
20775 /* The EABI says that array cookies should also contain the element size. */
20778 arm_cookie_has_size (void)
20780 return TARGET_AAPCS_BASED;
20784 /* The EABI says constructors and destructors should return a pointer to
20785 the object constructed/destroyed. */
20788 arm_cxx_cdtor_returns_this (void)
20790 return TARGET_AAPCS_BASED;
20793 /* The EABI says that an inline function may never be the key
20797 arm_cxx_key_method_may_be_inline (void)
20799 return !TARGET_AAPCS_BASED;
20803 arm_cxx_determine_class_data_visibility (tree decl)
20805 if (!TARGET_AAPCS_BASED
20806 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20809 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20810 is exported. However, on systems without dynamic vague linkage,
20811 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20812 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20813 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20815 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20816 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20820 arm_cxx_class_data_always_comdat (void)
20822 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20823 vague linkage if the class has no key function. */
20824 return !TARGET_AAPCS_BASED;
20828 /* The EABI says __aeabi_atexit should be used to register static
20832 arm_cxx_use_aeabi_atexit (void)
20834 return TARGET_AAPCS_BASED;
20839 arm_set_return_address (rtx source, rtx scratch)
20841 arm_stack_offsets *offsets;
20842 HOST_WIDE_INT delta;
20844 unsigned long saved_regs;
20846 offsets = arm_get_frame_offsets ();
20847 saved_regs = offsets->saved_regs_mask;
20849 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20850 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20853 if (frame_pointer_needed)
20854 addr = plus_constant(hard_frame_pointer_rtx, -4);
20857 /* LR will be the first saved register. */
20858 delta = offsets->outgoing_args - (offsets->frame + 4);
20863 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20864 GEN_INT (delta & ~4095)));
20869 addr = stack_pointer_rtx;
20871 addr = plus_constant (addr, delta);
20873 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20879 thumb_set_return_address (rtx source, rtx scratch)
20881 arm_stack_offsets *offsets;
20882 HOST_WIDE_INT delta;
20883 HOST_WIDE_INT limit;
20886 unsigned long mask;
20890 offsets = arm_get_frame_offsets ();
20891 mask = offsets->saved_regs_mask;
20892 if (mask & (1 << LR_REGNUM))
20895 /* Find the saved regs. */
20896 if (frame_pointer_needed)
20898 delta = offsets->soft_frame - offsets->saved_args;
20899 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20905 delta = offsets->outgoing_args - offsets->saved_args;
20908 /* Allow for the stack frame. */
20909 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20911 /* The link register is always the first saved register. */
20914 /* Construct the address. */
20915 addr = gen_rtx_REG (SImode, reg);
20918 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20919 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20923 addr = plus_constant (addr, delta);
20925 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20928 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20931 /* Implements target hook vector_mode_supported_p. */
20933 arm_vector_mode_supported_p (enum machine_mode mode)
20935 /* Neon also supports V2SImode, etc. listed in the clause below. */
20936 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20937 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20940 if ((TARGET_NEON || TARGET_IWMMXT)
20941 && ((mode == V2SImode)
20942 || (mode == V4HImode)
20943 || (mode == V8QImode)))
20949 /* Implements target hook small_register_classes_for_mode_p. */
20951 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
20953 return TARGET_THUMB1;
20956 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20957 ARM insns and therefore guarantee that the shift count is modulo 256.
20958 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20959 guarantee no particular behavior for out-of-range counts. */
20961 static unsigned HOST_WIDE_INT
20962 arm_shift_truncation_mask (enum machine_mode mode)
20964 return mode == SImode ? 255 : 0;
20968 /* Map internal gcc register numbers to DWARF2 register numbers. */
20971 arm_dbx_register_number (unsigned int regno)
20976 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20977 compatibility. The EABI defines them as registers 96-103. */
20978 if (IS_FPA_REGNUM (regno))
20979 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20981 if (IS_VFP_REGNUM (regno))
20983 /* See comment in arm_dwarf_register_span. */
20984 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20985 return 64 + regno - FIRST_VFP_REGNUM;
20987 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20990 if (IS_IWMMXT_GR_REGNUM (regno))
20991 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20993 if (IS_IWMMXT_REGNUM (regno))
20994 return 112 + regno - FIRST_IWMMXT_REGNUM;
20996 gcc_unreachable ();
20999 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21000 GCC models tham as 64 32-bit registers, so we need to describe this to
21001 the DWARF generation code. Other registers can use the default. */
21003 arm_dwarf_register_span (rtx rtl)
21010 regno = REGNO (rtl);
21011 if (!IS_VFP_REGNUM (regno))
21014 /* XXX FIXME: The EABI defines two VFP register ranges:
21015 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21017 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21018 corresponding D register. Until GDB supports this, we shall use the
21019 legacy encodings. We also use these encodings for D0-D15 for
21020 compatibility with older debuggers. */
21021 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21024 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21025 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21026 regno = (regno - FIRST_VFP_REGNUM) / 2;
21027 for (i = 0; i < nregs; i++)
21028 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21033 #ifdef TARGET_UNWIND_INFO
21034 /* Emit unwind directives for a store-multiple instruction or stack pointer
21035 push during alignment.
21036 These should only ever be generated by the function prologue code, so
21037 expect them to have a particular form. */
21040 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21043 HOST_WIDE_INT offset;
21044 HOST_WIDE_INT nregs;
21050 e = XVECEXP (p, 0, 0);
21051 if (GET_CODE (e) != SET)
21054 /* First insn will adjust the stack pointer. */
21055 if (GET_CODE (e) != SET
21056 || GET_CODE (XEXP (e, 0)) != REG
21057 || REGNO (XEXP (e, 0)) != SP_REGNUM
21058 || GET_CODE (XEXP (e, 1)) != PLUS)
21061 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21062 nregs = XVECLEN (p, 0) - 1;
21064 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21067 /* The function prologue may also push pc, but not annotate it as it is
21068 never restored. We turn this into a stack pointer adjustment. */
21069 if (nregs * 4 == offset - 4)
21071 fprintf (asm_out_file, "\t.pad #4\n");
21075 fprintf (asm_out_file, "\t.save {");
21077 else if (IS_VFP_REGNUM (reg))
21080 fprintf (asm_out_file, "\t.vsave {");
21082 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21084 /* FPA registers are done differently. */
21085 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21089 /* Unknown register type. */
21092 /* If the stack increment doesn't match the size of the saved registers,
21093 something has gone horribly wrong. */
21094 if (offset != nregs * reg_size)
21099 /* The remaining insns will describe the stores. */
21100 for (i = 1; i <= nregs; i++)
21102 /* Expect (set (mem <addr>) (reg)).
21103 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21104 e = XVECEXP (p, 0, i);
21105 if (GET_CODE (e) != SET
21106 || GET_CODE (XEXP (e, 0)) != MEM
21107 || GET_CODE (XEXP (e, 1)) != REG)
21110 reg = REGNO (XEXP (e, 1));
21115 fprintf (asm_out_file, ", ");
21116 /* We can't use %r for vfp because we need to use the
21117 double precision register names. */
21118 if (IS_VFP_REGNUM (reg))
21119 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21121 asm_fprintf (asm_out_file, "%r", reg);
21123 #ifdef ENABLE_CHECKING
21124 /* Check that the addresses are consecutive. */
21125 e = XEXP (XEXP (e, 0), 0);
21126 if (GET_CODE (e) == PLUS)
21128 offset += reg_size;
21129 if (GET_CODE (XEXP (e, 0)) != REG
21130 || REGNO (XEXP (e, 0)) != SP_REGNUM
21131 || GET_CODE (XEXP (e, 1)) != CONST_INT
21132 || offset != INTVAL (XEXP (e, 1)))
21136 || GET_CODE (e) != REG
21137 || REGNO (e) != SP_REGNUM)
21141 fprintf (asm_out_file, "}\n");
21144 /* Emit unwind directives for a SET. */
21147 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21155 switch (GET_CODE (e0))
21158 /* Pushing a single register. */
21159 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21160 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21161 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21164 asm_fprintf (asm_out_file, "\t.save ");
21165 if (IS_VFP_REGNUM (REGNO (e1)))
21166 asm_fprintf(asm_out_file, "{d%d}\n",
21167 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21169 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21173 if (REGNO (e0) == SP_REGNUM)
21175 /* A stack increment. */
21176 if (GET_CODE (e1) != PLUS
21177 || GET_CODE (XEXP (e1, 0)) != REG
21178 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21179 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21182 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21183 -INTVAL (XEXP (e1, 1)));
21185 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21187 HOST_WIDE_INT offset;
21189 if (GET_CODE (e1) == PLUS)
21191 if (GET_CODE (XEXP (e1, 0)) != REG
21192 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21194 reg = REGNO (XEXP (e1, 0));
21195 offset = INTVAL (XEXP (e1, 1));
21196 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21197 HARD_FRAME_POINTER_REGNUM, reg,
21200 else if (GET_CODE (e1) == REG)
21203 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21204 HARD_FRAME_POINTER_REGNUM, reg);
21209 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21211 /* Move from sp to reg. */
21212 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21214 else if (GET_CODE (e1) == PLUS
21215 && GET_CODE (XEXP (e1, 0)) == REG
21216 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21217 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21219 /* Set reg to offset from sp. */
21220 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21221 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21223 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21225 /* Stack pointer save before alignment. */
21227 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21240 /* Emit unwind directives for the given insn. */
21243 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21247 if (!ARM_EABI_UNWIND_TABLES)
21250 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21251 && (TREE_NOTHROW (current_function_decl)
21252 || crtl->all_throwers_are_sibcalls))
21255 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21258 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21260 pat = XEXP (pat, 0);
21262 pat = PATTERN (insn);
21264 switch (GET_CODE (pat))
21267 arm_unwind_emit_set (asm_out_file, pat);
21271 /* Store multiple. */
21272 arm_unwind_emit_sequence (asm_out_file, pat);
21281 /* Output a reference from a function exception table to the type_info
21282 object X. The EABI specifies that the symbol should be relocated by
21283 an R_ARM_TARGET2 relocation. */
21286 arm_output_ttype (rtx x)
21288 fputs ("\t.word\t", asm_out_file);
21289 output_addr_const (asm_out_file, x);
21290 /* Use special relocations for symbol references. */
21291 if (GET_CODE (x) != CONST_INT)
21292 fputs ("(TARGET2)", asm_out_file);
21293 fputc ('\n', asm_out_file);
21297 #endif /* TARGET_UNWIND_INFO */
21300 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21301 stack alignment. */
21304 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21306 rtx unspec = SET_SRC (pattern);
21307 gcc_assert (GET_CODE (unspec) == UNSPEC);
21311 case UNSPEC_STACK_ALIGN:
21312 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21313 put anything on the stack, so hopefully it won't matter.
21314 CFA = SP will be correct after alignment. */
21315 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21316 SET_DEST (pattern));
21319 gcc_unreachable ();
21324 /* Output unwind directives for the start/end of a function. */
21327 arm_output_fn_unwind (FILE * f, bool prologue)
21329 if (!ARM_EABI_UNWIND_TABLES)
21333 fputs ("\t.fnstart\n", f);
21336 /* If this function will never be unwound, then mark it as such.
21337 The came condition is used in arm_unwind_emit to suppress
21338 the frame annotations. */
21339 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21340 && (TREE_NOTHROW (current_function_decl)
21341 || crtl->all_throwers_are_sibcalls))
21342 fputs("\t.cantunwind\n", f);
21344 fputs ("\t.fnend\n", f);
21349 arm_emit_tls_decoration (FILE *fp, rtx x)
21351 enum tls_reloc reloc;
21354 val = XVECEXP (x, 0, 0);
21355 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21357 output_addr_const (fp, val);
21362 fputs ("(tlsgd)", fp);
21365 fputs ("(tlsldm)", fp);
21368 fputs ("(tlsldo)", fp);
21371 fputs ("(gottpoff)", fp);
21374 fputs ("(tpoff)", fp);
21377 gcc_unreachable ();
21385 fputs (" + (. - ", fp);
21386 output_addr_const (fp, XVECEXP (x, 0, 2));
21388 output_addr_const (fp, XVECEXP (x, 0, 3));
21398 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21401 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21403 gcc_assert (size == 4);
21404 fputs ("\t.word\t", file);
21405 output_addr_const (file, x);
21406 fputs ("(tlsldo)", file);
21410 arm_output_addr_const_extra (FILE *fp, rtx x)
21412 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21413 return arm_emit_tls_decoration (fp, x);
21414 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21417 int labelno = INTVAL (XVECEXP (x, 0, 0));
21419 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21420 assemble_name_raw (fp, label);
21424 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21426 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21430 output_addr_const (fp, XVECEXP (x, 0, 0));
21434 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21436 output_addr_const (fp, XVECEXP (x, 0, 0));
21440 output_addr_const (fp, XVECEXP (x, 0, 1));
21444 else if (GET_CODE (x) == CONST_VECTOR)
21445 return arm_emit_vector_const (fp, x);
21450 /* Output assembly for a shift instruction.
21451 SET_FLAGS determines how the instruction modifies the condition codes.
21452 0 - Do not set condition codes.
21453 1 - Set condition codes.
21454 2 - Use smallest instruction. */
21456 arm_output_shift(rtx * operands, int set_flags)
21459 static const char flag_chars[3] = {'?', '.', '!'};
21464 c = flag_chars[set_flags];
21465 if (TARGET_UNIFIED_ASM)
21467 shift = shift_op(operands[3], &val);
21471 operands[2] = GEN_INT(val);
21472 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21475 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21478 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21479 output_asm_insn (pattern, operands);
21483 /* Output a Thumb-1 casesi dispatch sequence. */
21485 thumb1_output_casesi (rtx *operands)
21487 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21489 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21491 switch (GET_MODE(diff_vec))
21494 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21495 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21497 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21498 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21500 return "bl\t%___gnu_thumb1_case_si";
21502 gcc_unreachable ();
21506 /* Output a Thumb-2 casesi instruction. */
21508 thumb2_output_casesi (rtx *operands)
21510 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21512 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21514 output_asm_insn ("cmp\t%0, %1", operands);
21515 output_asm_insn ("bhi\t%l3", operands);
21516 switch (GET_MODE(diff_vec))
21519 return "tbb\t[%|pc, %0]";
21521 return "tbh\t[%|pc, %0, lsl #1]";
21525 output_asm_insn ("adr\t%4, %l2", operands);
21526 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21527 output_asm_insn ("add\t%4, %4, %5", operands);
21532 output_asm_insn ("adr\t%4, %l2", operands);
21533 return "ldr\t%|pc, [%4, %0, lsl #2]";
21536 gcc_unreachable ();
21540 /* Most ARM cores are single issue, but some newer ones can dual issue.
21541 The scheduler descriptions rely on this being correct. */
21543 arm_issue_rate (void)
21558 /* A table and a function to perform ARM-specific name mangling for
21559 NEON vector types in order to conform to the AAPCS (see "Procedure
21560 Call Standard for the ARM Architecture", Appendix A). To qualify
21561 for emission with the mangled names defined in that document, a
21562 vector type must not only be of the correct mode but also be
21563 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21566 enum machine_mode mode;
21567 const char *element_type_name;
21568 const char *aapcs_name;
21569 } arm_mangle_map_entry;
21571 static arm_mangle_map_entry arm_mangle_map[] = {
21572 /* 64-bit containerized types. */
21573 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21574 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21575 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21576 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21577 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21578 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21579 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21580 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21581 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21582 /* 128-bit containerized types. */
21583 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21584 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21585 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21586 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21587 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21588 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21589 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21590 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21591 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21592 { VOIDmode, NULL, NULL }
21596 arm_mangle_type (const_tree type)
21598 arm_mangle_map_entry *pos = arm_mangle_map;
21600 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21601 has to be managled as if it is in the "std" namespace. */
21602 if (TARGET_AAPCS_BASED
21603 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21605 static bool warned;
21606 if (!warned && warn_psabi && !in_system_header)
21609 inform (input_location,
21610 "the mangling of %<va_list%> has changed in GCC 4.4");
21612 return "St9__va_list";
21615 /* Half-precision float. */
21616 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21619 if (TREE_CODE (type) != VECTOR_TYPE)
21622 /* Check the mode of the vector type, and the name of the vector
21623 element type, against the table. */
21624 while (pos->mode != VOIDmode)
21626 tree elt_type = TREE_TYPE (type);
21628 if (pos->mode == TYPE_MODE (type)
21629 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21630 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21631 pos->element_type_name))
21632 return pos->aapcs_name;
21637 /* Use the default mangling for unrecognized (possibly user-defined)
21642 /* Order of allocation of core registers for Thumb: this allocation is
21643 written over the corresponding initial entries of the array
21644 initialized with REG_ALLOC_ORDER. We allocate all low registers
21645 first. Saving and restoring a low register is usually cheaper than
21646 using a call-clobbered high register. */
21648 static const int thumb_core_reg_alloc_order[] =
21650 3, 2, 1, 0, 4, 5, 6, 7,
21651 14, 12, 8, 9, 10, 11, 13, 15
21654 /* Adjust register allocation order when compiling for Thumb. */
21657 arm_order_regs_for_local_alloc (void)
21659 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21660 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21662 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21663 sizeof (thumb_core_reg_alloc_order));
21666 /* Set default optimization options. */
21668 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21670 /* Enable section anchors by default at -O1 or higher.
21671 Use 2 to distinguish from an explicit -fsection-anchors
21672 given on the command line. */
21674 flag_section_anchors = 2;
21677 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21680 arm_frame_pointer_required (void)
21682 return (cfun->has_nonlocal_label
21683 || SUBTARGET_FRAME_POINTER_REQUIRED
21684 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21687 /* Only thumb1 can't support conditional execution, so return true if
21688 the target is not thumb1. */
21690 arm_have_conditional_execution (void)
21692 return !TARGET_THUMB1;
21695 #include "gt-arm.h"