1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
142 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
143 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_rtx_costs (rtx, int, int, int *, bool);
148 static int arm_address_cost (rtx, bool);
149 static bool arm_memory_load_p (rtx);
150 static bool arm_cirrus_insn_p (rtx);
151 static void cirrus_reorg (rtx);
152 static void arm_init_builtins (void);
153 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
154 static void arm_init_iwmmxt_builtins (void);
155 static rtx safe_vector_operand (rtx, enum machine_mode);
156 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
157 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
158 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
159 static void emit_constant_insn (rtx cond, rtx pattern);
160 static rtx emit_set_insn (rtx, rtx);
161 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 static int aapcs_select_return_coproc (const_tree, const_tree);
167 #ifdef OBJECT_FORMAT_ELF
168 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
169 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_encode_section_info (tree, rtx, int);
175 static void arm_file_end (void);
176 static void arm_file_start (void);
178 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
181 enum machine_mode, const_tree, bool);
182 static bool arm_promote_prototypes (const_tree);
183 static bool arm_default_short_enums (void);
184 static bool arm_align_anon_bitfield (void);
185 static bool arm_return_in_msb (const_tree);
186 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
187 static bool arm_return_in_memory (const_tree, const_tree);
188 #ifdef TARGET_UNWIND_INFO
189 static void arm_unwind_emit (FILE *, rtx);
190 static bool arm_output_ttype (rtx);
192 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree arm_get_cookie_size (tree);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr);
219 static bool arm_scalar_mode_supported_p (enum machine_mode);
220 static bool arm_frame_pointer_required (void);
223 /* Table of machine attributes. */
224 static const struct attribute_spec arm_attribute_table[] =
226 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
227 /* Function calls made to this symbol must be done indirectly, because
228 it may lie outside of the 26 bit addressing range of a normal function
230 { "long_call", 0, 0, false, true, true, NULL },
231 /* Whereas these functions are always known to reside within the 26 bit
233 { "short_call", 0, 0, false, true, true, NULL },
234 /* Specify the procedure call conventions for a function. */
235 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
236 /* Interrupt Service Routines have special prologue and epilogue requirements. */
237 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
238 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
239 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
241 /* ARM/PE has three new attributes:
243 dllexport - for exporting a function/variable that will live in a dll
244 dllimport - for importing a function/variable from a dll
246 Microsoft allows multiple declspecs in one __declspec, separating
247 them with spaces. We do NOT support this. Instead, use __declspec
250 { "dllimport", 0, 0, true, false, false, NULL },
251 { "dllexport", 0, 0, true, false, false, NULL },
252 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
253 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
254 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
255 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
256 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
258 { NULL, 0, 0, false, false, false, NULL }
261 /* Initialize the GCC target structure. */
262 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
263 #undef TARGET_MERGE_DECL_ATTRIBUTES
264 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
267 #undef TARGET_LEGITIMIZE_ADDRESS
268 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
270 #undef TARGET_ATTRIBUTE_TABLE
271 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
273 #undef TARGET_ASM_FILE_START
274 #define TARGET_ASM_FILE_START arm_file_start
275 #undef TARGET_ASM_FILE_END
276 #define TARGET_ASM_FILE_END arm_file_end
278 #undef TARGET_ASM_ALIGNED_SI_OP
279 #define TARGET_ASM_ALIGNED_SI_OP NULL
280 #undef TARGET_ASM_INTEGER
281 #define TARGET_ASM_INTEGER arm_assemble_integer
283 #undef TARGET_ASM_FUNCTION_PROLOGUE
284 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
286 #undef TARGET_ASM_FUNCTION_EPILOGUE
287 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
289 #undef TARGET_DEFAULT_TARGET_FLAGS
290 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
291 #undef TARGET_HANDLE_OPTION
292 #define TARGET_HANDLE_OPTION arm_handle_option
294 #define TARGET_HELP arm_target_help
296 #undef TARGET_COMP_TYPE_ATTRIBUTES
297 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
299 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
300 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
302 #undef TARGET_SCHED_ADJUST_COST
303 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
305 #undef TARGET_ENCODE_SECTION_INFO
307 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
309 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
312 #undef TARGET_STRIP_NAME_ENCODING
313 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
315 #undef TARGET_ASM_INTERNAL_LABEL
316 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
318 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
319 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
321 #undef TARGET_FUNCTION_VALUE
322 #define TARGET_FUNCTION_VALUE arm_function_value
324 #undef TARGET_LIBCALL_VALUE
325 #define TARGET_LIBCALL_VALUE arm_libcall_value
327 #undef TARGET_ASM_OUTPUT_MI_THUNK
328 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
329 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
330 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
332 #undef TARGET_RTX_COSTS
333 #define TARGET_RTX_COSTS arm_rtx_costs
334 #undef TARGET_ADDRESS_COST
335 #define TARGET_ADDRESS_COST arm_address_cost
337 #undef TARGET_SHIFT_TRUNCATION_MASK
338 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
339 #undef TARGET_VECTOR_MODE_SUPPORTED_P
340 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
342 #undef TARGET_MACHINE_DEPENDENT_REORG
343 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
345 #undef TARGET_INIT_BUILTINS
346 #define TARGET_INIT_BUILTINS arm_init_builtins
347 #undef TARGET_EXPAND_BUILTIN
348 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
350 #undef TARGET_INIT_LIBFUNCS
351 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
353 #undef TARGET_PROMOTE_FUNCTION_MODE
354 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
355 #undef TARGET_PROMOTE_PROTOTYPES
356 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
357 #undef TARGET_PASS_BY_REFERENCE
358 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
359 #undef TARGET_ARG_PARTIAL_BYTES
360 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
362 #undef TARGET_SETUP_INCOMING_VARARGS
363 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
365 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
366 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
368 #undef TARGET_DEFAULT_SHORT_ENUMS
369 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
371 #undef TARGET_ALIGN_ANON_BITFIELD
372 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
374 #undef TARGET_NARROW_VOLATILE_BITFIELD
375 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
377 #undef TARGET_CXX_GUARD_TYPE
378 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
380 #undef TARGET_CXX_GUARD_MASK_BIT
381 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
383 #undef TARGET_CXX_GET_COOKIE_SIZE
384 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
386 #undef TARGET_CXX_COOKIE_HAS_SIZE
387 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
389 #undef TARGET_CXX_CDTOR_RETURNS_THIS
390 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
392 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
393 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
395 #undef TARGET_CXX_USE_AEABI_ATEXIT
396 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
398 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
399 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
400 arm_cxx_determine_class_data_visibility
402 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
403 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
405 #undef TARGET_RETURN_IN_MSB
406 #define TARGET_RETURN_IN_MSB arm_return_in_msb
408 #undef TARGET_RETURN_IN_MEMORY
409 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
411 #undef TARGET_MUST_PASS_IN_STACK
412 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
414 #ifdef TARGET_UNWIND_INFO
415 #undef TARGET_UNWIND_EMIT
416 #define TARGET_UNWIND_EMIT arm_unwind_emit
418 /* EABI unwinding tables use a different format for the typeinfo tables. */
419 #undef TARGET_ASM_TTYPE
420 #define TARGET_ASM_TTYPE arm_output_ttype
422 #undef TARGET_ARM_EABI_UNWINDER
423 #define TARGET_ARM_EABI_UNWINDER true
424 #endif /* TARGET_UNWIND_INFO */
426 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
427 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
429 #undef TARGET_CANNOT_COPY_INSN_P
430 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
433 #undef TARGET_HAVE_TLS
434 #define TARGET_HAVE_TLS true
437 #undef TARGET_CANNOT_FORCE_CONST_MEM
438 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
440 #undef TARGET_MAX_ANCHOR_OFFSET
441 #define TARGET_MAX_ANCHOR_OFFSET 4095
443 /* The minimum is set such that the total size of the block
444 for a particular anchor is -4088 + 1 + 4095 bytes, which is
445 divisible by eight, ensuring natural spacing of anchors. */
446 #undef TARGET_MIN_ANCHOR_OFFSET
447 #define TARGET_MIN_ANCHOR_OFFSET -4088
449 #undef TARGET_SCHED_ISSUE_RATE
450 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
452 #undef TARGET_MANGLE_TYPE
453 #define TARGET_MANGLE_TYPE arm_mangle_type
455 #undef TARGET_BUILD_BUILTIN_VA_LIST
456 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
457 #undef TARGET_EXPAND_BUILTIN_VA_START
458 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
459 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
460 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
463 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
464 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
467 #undef TARGET_LEGITIMATE_ADDRESS_P
468 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
470 #undef TARGET_INVALID_PARAMETER_TYPE
471 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
473 #undef TARGET_INVALID_RETURN_TYPE
474 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
476 #undef TARGET_PROMOTED_TYPE
477 #define TARGET_PROMOTED_TYPE arm_promoted_type
479 #undef TARGET_CONVERT_TO_TYPE
480 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
482 #undef TARGET_SCALAR_MODE_SUPPORTED_P
483 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
485 #undef TARGET_FRAME_POINTER_REQUIRED
486 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
488 struct gcc_target targetm = TARGET_INITIALIZER;
490 /* Obstack for minipool constant handling. */
491 static struct obstack minipool_obstack;
492 static char * minipool_startobj;
494 /* The maximum number of insns skipped which
495 will be conditionalised if possible. */
496 static int max_insns_skipped = 5;
498 extern FILE * asm_out_file;
500 /* True if we are currently building a constant table. */
501 int making_const_table;
503 /* The processor for which instructions should be scheduled. */
504 enum processor_type arm_tune = arm_none;
506 /* The default processor used if not overridden by commandline. */
507 static enum processor_type arm_default_cpu = arm_none;
509 /* Which floating point model to use. */
510 enum arm_fp_model arm_fp_model;
512 /* Which floating point hardware is available. */
513 enum fputype arm_fpu_arch;
515 /* Which floating point hardware to schedule for. */
516 enum fputype arm_fpu_tune;
518 /* Whether to use floating point hardware. */
519 enum float_abi_type arm_float_abi;
521 /* Which __fp16 format to use. */
522 enum arm_fp16_format_type arm_fp16_format;
524 /* Which ABI to use. */
525 enum arm_abi_type arm_abi;
527 /* Which thread pointer model to use. */
528 enum arm_tp_type target_thread_pointer = TP_AUTO;
530 /* Used to parse -mstructure_size_boundary command line option. */
531 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
533 /* Used for Thumb call_via trampolines. */
534 rtx thumb_call_via_label[14];
535 static int thumb_call_reg_needed;
537 /* Bit values used to identify processor capabilities. */
538 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
539 #define FL_ARCH3M (1 << 1) /* Extended multiply */
540 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
541 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
542 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
543 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
544 #define FL_THUMB (1 << 6) /* Thumb aware */
545 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
546 #define FL_STRONG (1 << 8) /* StrongARM */
547 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
548 #define FL_XSCALE (1 << 10) /* XScale */
549 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
550 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
551 media instructions. */
552 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
553 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
554 Note: ARM6 & 7 derivatives only. */
555 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
556 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
557 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
559 #define FL_DIV (1 << 18) /* Hardware divide. */
560 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
561 #define FL_NEON (1 << 20) /* Neon instructions. */
563 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
565 #define FL_FOR_ARCH2 FL_NOTM
566 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
567 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
568 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
569 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
570 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
571 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
572 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
573 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
574 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
575 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
576 #define FL_FOR_ARCH6J FL_FOR_ARCH6
577 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
578 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
579 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
580 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
581 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
582 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
583 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
584 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
585 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
587 /* The bits in this mask specify which
588 instructions we are allowed to generate. */
589 static unsigned long insn_flags = 0;
591 /* The bits in this mask specify which instruction scheduling options should
593 static unsigned long tune_flags = 0;
595 /* The following are used in the arm.md file as equivalents to bits
596 in the above two flag variables. */
598 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
601 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
604 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
607 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
610 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
613 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
616 /* Nonzero if this chip supports the ARM 6K extensions. */
619 /* Nonzero if instructions not present in the 'M' profile can be used. */
620 int arm_arch_notm = 0;
622 /* Nonzero if this chip can benefit from load scheduling. */
623 int arm_ld_sched = 0;
625 /* Nonzero if this chip is a StrongARM. */
626 int arm_tune_strongarm = 0;
628 /* Nonzero if this chip is a Cirrus variant. */
629 int arm_arch_cirrus = 0;
631 /* Nonzero if this chip supports Intel Wireless MMX technology. */
632 int arm_arch_iwmmxt = 0;
634 /* Nonzero if this chip is an XScale. */
635 int arm_arch_xscale = 0;
637 /* Nonzero if tuning for XScale */
638 int arm_tune_xscale = 0;
640 /* Nonzero if we want to tune for stores that access the write-buffer.
641 This typically means an ARM6 or ARM7 with MMU or MPU. */
642 int arm_tune_wbuf = 0;
644 /* Nonzero if tuning for Cortex-A9. */
645 int arm_tune_cortex_a9 = 0;
647 /* Nonzero if generating Thumb instructions. */
650 /* Nonzero if we should define __THUMB_INTERWORK__ in the
652 XXX This is a bit of a hack, it's intended to help work around
653 problems in GLD which doesn't understand that armv5t code is
654 interworking clean. */
655 int arm_cpp_interwork = 0;
657 /* Nonzero if chip supports Thumb 2. */
660 /* Nonzero if chip supports integer division instruction. */
663 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
664 must report the mode of the memory reference from PRINT_OPERAND to
665 PRINT_OPERAND_ADDRESS. */
666 enum machine_mode output_memory_reference_mode;
668 /* The register number to be used for the PIC offset register. */
669 unsigned arm_pic_register = INVALID_REGNUM;
671 /* Set to 1 after arm_reorg has started. Reset to start at the start of
672 the next function. */
673 static int after_arm_reorg = 0;
675 /* The maximum number of insns to be used when loading a constant. */
676 static int arm_constant_limit = 3;
678 static enum arm_pcs arm_pcs_default;
680 /* For an explanation of these variables, see final_prescan_insn below. */
682 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
683 enum arm_cond_code arm_current_cc;
685 int arm_target_label;
686 /* The number of conditionally executed insns, including the current insn. */
687 int arm_condexec_count = 0;
688 /* A bitmask specifying the patterns for the IT block.
689 Zero means do not output an IT block before this insn. */
690 int arm_condexec_mask = 0;
691 /* The number of bits used in arm_condexec_mask. */
692 int arm_condexec_masklen = 0;
694 /* The condition codes of the ARM, and the inverse function. */
695 static const char * const arm_condition_codes[] =
697 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
698 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
701 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
702 #define streq(string1, string2) (strcmp (string1, string2) == 0)
704 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
705 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
706 | (1 << PIC_OFFSET_TABLE_REGNUM)))
708 /* Initialization code. */
712 const char *const name;
713 enum processor_type core;
715 const unsigned long flags;
716 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
719 /* Not all of these give usefully different compilation alternatives,
720 but there is no simple way of generalizing them. */
721 static const struct processors all_cores[] =
724 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
725 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
726 #include "arm-cores.def"
728 {NULL, arm_none, NULL, 0, NULL}
731 static const struct processors all_architectures[] =
733 /* ARM Architectures */
734 /* We don't specify rtx_costs here as it will be figured out
737 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
738 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
739 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
740 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
741 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
742 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
743 implementations that support it, so we will leave it out for now. */
744 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
745 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
746 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
747 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
748 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
749 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
750 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
751 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
752 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
753 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
754 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
755 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
756 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
757 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
758 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
759 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
760 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
761 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
762 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
763 {NULL, arm_none, NULL, 0 , NULL}
766 struct arm_cpu_select
770 const struct processors * processors;
773 /* This is a magic structure. The 'string' field is magically filled in
774 with a pointer to the value specified by the user on the command line
775 assuming that the user has specified such a value. */
777 static struct arm_cpu_select arm_select[] =
779 /* string name processors */
780 { NULL, "-mcpu=", all_cores },
781 { NULL, "-march=", all_architectures },
782 { NULL, "-mtune=", all_cores }
785 /* Defines representing the indexes into the above table. */
786 #define ARM_OPT_SET_CPU 0
787 #define ARM_OPT_SET_ARCH 1
788 #define ARM_OPT_SET_TUNE 2
790 /* The name of the preprocessor macro to define for this architecture. */
792 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
801 /* Available values for -mfpu=. */
803 static const struct fpu_desc all_fpus[] =
805 {"fpa", FPUTYPE_FPA},
806 {"fpe2", FPUTYPE_FPA_EMU2},
807 {"fpe3", FPUTYPE_FPA_EMU2},
808 {"maverick", FPUTYPE_MAVERICK},
809 {"vfp", FPUTYPE_VFP},
810 {"vfp3", FPUTYPE_VFP3},
811 {"vfpv3", FPUTYPE_VFP3},
812 {"vfpv3-d16", FPUTYPE_VFP3D16},
813 {"neon", FPUTYPE_NEON},
814 {"neon-fp16", FPUTYPE_NEON_FP16}
818 /* Floating point models used by the different hardware.
819 See fputype in arm.h. */
821 static const enum arm_fp_model fp_model_for_fpu[] =
823 /* No FP hardware. */
824 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
825 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
826 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
827 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
828 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
829 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
830 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
831 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
832 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
833 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
840 enum float_abi_type abi_type;
844 /* Available values for -mfloat-abi=. */
846 static const struct float_abi all_float_abis[] =
848 {"soft", ARM_FLOAT_ABI_SOFT},
849 {"softfp", ARM_FLOAT_ABI_SOFTFP},
850 {"hard", ARM_FLOAT_ABI_HARD}
857 enum arm_fp16_format_type fp16_format_type;
861 /* Available values for -mfp16-format=. */
863 static const struct fp16_format all_fp16_formats[] =
865 {"none", ARM_FP16_FORMAT_NONE},
866 {"ieee", ARM_FP16_FORMAT_IEEE},
867 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
874 enum arm_abi_type abi_type;
878 /* Available values for -mabi=. */
880 static const struct abi_name arm_all_abis[] =
882 {"apcs-gnu", ARM_ABI_APCS},
883 {"atpcs", ARM_ABI_ATPCS},
884 {"aapcs", ARM_ABI_AAPCS},
885 {"iwmmxt", ARM_ABI_IWMMXT},
886 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
889 /* Supported TLS relocations. */
899 /* Emit an insn that's a simple single-set. Both the operands must be known
902 emit_set_insn (rtx x, rtx y)
904 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
907 /* Return the number of bits set in VALUE. */
909 bit_count (unsigned long value)
911 unsigned long count = 0;
916 value &= value - 1; /* Clear the least-significant set bit. */
922 /* Set up library functions unique to ARM. */
925 arm_init_libfuncs (void)
927 /* There are no special library functions unless we are using the
932 /* The functions below are described in Section 4 of the "Run-Time
933 ABI for the ARM architecture", Version 1.0. */
935 /* Double-precision floating-point arithmetic. Table 2. */
936 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
937 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
938 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
939 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
940 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
942 /* Double-precision comparisons. Table 3. */
943 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
944 set_optab_libfunc (ne_optab, DFmode, NULL);
945 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
946 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
947 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
948 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
949 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
951 /* Single-precision floating-point arithmetic. Table 4. */
952 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
953 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
954 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
955 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
956 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
958 /* Single-precision comparisons. Table 5. */
959 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
960 set_optab_libfunc (ne_optab, SFmode, NULL);
961 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
962 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
963 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
964 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
965 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
967 /* Floating-point to integer conversions. Table 6. */
968 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
969 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
970 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
971 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
972 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
973 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
974 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
975 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
977 /* Conversions between floating types. Table 7. */
978 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
979 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
981 /* Integer to floating-point conversions. Table 8. */
982 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
983 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
984 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
985 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
986 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
987 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
988 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
989 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
991 /* Long long. Table 9. */
992 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
993 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
994 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
995 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
996 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
997 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
998 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
999 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1001 /* Integer (32/32->32) division. \S 4.3.1. */
1002 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1003 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1005 /* The divmod functions are designed so that they can be used for
1006 plain division, even though they return both the quotient and the
1007 remainder. The quotient is returned in the usual location (i.e.,
1008 r0 for SImode, {r0, r1} for DImode), just as would be expected
1009 for an ordinary division routine. Because the AAPCS calling
1010 conventions specify that all of { r0, r1, r2, r3 } are
1011 callee-saved registers, there is no need to tell the compiler
1012 explicitly that those registers are clobbered by these
1014 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1015 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1017 /* For SImode division the ABI provides div-without-mod routines,
1018 which are faster. */
1019 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1020 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1022 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1023 divmod libcalls instead. */
1024 set_optab_libfunc (smod_optab, DImode, NULL);
1025 set_optab_libfunc (umod_optab, DImode, NULL);
1026 set_optab_libfunc (smod_optab, SImode, NULL);
1027 set_optab_libfunc (umod_optab, SImode, NULL);
1029 /* Half-precision float operations. The compiler handles all operations
1030 with NULL libfuncs by converting the SFmode. */
1031 switch (arm_fp16_format)
1033 case ARM_FP16_FORMAT_IEEE:
1034 case ARM_FP16_FORMAT_ALTERNATIVE:
1037 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1038 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1040 : "__gnu_f2h_alternative"));
1041 set_conv_libfunc (sext_optab, SFmode, HFmode,
1042 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1044 : "__gnu_h2f_alternative"));
1047 set_optab_libfunc (add_optab, HFmode, NULL);
1048 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1049 set_optab_libfunc (smul_optab, HFmode, NULL);
1050 set_optab_libfunc (neg_optab, HFmode, NULL);
1051 set_optab_libfunc (sub_optab, HFmode, NULL);
1054 set_optab_libfunc (eq_optab, HFmode, NULL);
1055 set_optab_libfunc (ne_optab, HFmode, NULL);
1056 set_optab_libfunc (lt_optab, HFmode, NULL);
1057 set_optab_libfunc (le_optab, HFmode, NULL);
1058 set_optab_libfunc (ge_optab, HFmode, NULL);
1059 set_optab_libfunc (gt_optab, HFmode, NULL);
1060 set_optab_libfunc (unord_optab, HFmode, NULL);
1067 if (TARGET_AAPCS_BASED)
1068 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1071 /* On AAPCS systems, this is the "struct __va_list". */
1072 static GTY(()) tree va_list_type;
1074 /* Return the type to use as __builtin_va_list. */
1076 arm_build_builtin_va_list (void)
1081 if (!TARGET_AAPCS_BASED)
1082 return std_build_builtin_va_list ();
1084 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1092 The C Library ABI further reinforces this definition in \S
1095 We must follow this definition exactly. The structure tag
1096 name is visible in C++ mangled names, and thus forms a part
1097 of the ABI. The field name may be used by people who
1098 #include <stdarg.h>. */
1099 /* Create the type. */
1100 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1101 /* Give it the required name. */
1102 va_list_name = build_decl (BUILTINS_LOCATION,
1104 get_identifier ("__va_list"),
1106 DECL_ARTIFICIAL (va_list_name) = 1;
1107 TYPE_NAME (va_list_type) = va_list_name;
1108 /* Create the __ap field. */
1109 ap_field = build_decl (BUILTINS_LOCATION,
1111 get_identifier ("__ap"),
1113 DECL_ARTIFICIAL (ap_field) = 1;
1114 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1115 TYPE_FIELDS (va_list_type) = ap_field;
1116 /* Compute its layout. */
1117 layout_type (va_list_type);
1119 return va_list_type;
1122 /* Return an expression of type "void *" pointing to the next
1123 available argument in a variable-argument list. VALIST is the
1124 user-level va_list object, of type __builtin_va_list. */
1126 arm_extract_valist_ptr (tree valist)
1128 if (TREE_TYPE (valist) == error_mark_node)
1129 return error_mark_node;
1131 /* On an AAPCS target, the pointer is stored within "struct
1133 if (TARGET_AAPCS_BASED)
1135 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1136 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1137 valist, ap_field, NULL_TREE);
1143 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1145 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1147 valist = arm_extract_valist_ptr (valist);
1148 std_expand_builtin_va_start (valist, nextarg);
1151 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1153 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1156 valist = arm_extract_valist_ptr (valist);
1157 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1160 /* Implement TARGET_HANDLE_OPTION. */
1163 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1168 arm_select[1].string = arg;
1172 arm_select[0].string = arg;
1175 case OPT_mhard_float:
1176 target_float_abi_name = "hard";
1179 case OPT_msoft_float:
1180 target_float_abi_name = "soft";
1184 arm_select[2].string = arg;
1193 arm_target_help (void)
1196 static int columns = 0;
1199 /* If we have not done so already, obtain the desired maximum width of
1200 the output. Note - this is a duplication of the code at the start of
1201 gcc/opts.c:print_specific_help() - the two copies should probably be
1202 replaced by a single function. */
1207 GET_ENVIRONMENT (p, "COLUMNS");
1210 int value = atoi (p);
1217 /* Use a reasonable default. */
1221 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1223 /* The - 2 is because we know that the last entry in the array is NULL. */
1224 i = ARRAY_SIZE (all_cores) - 2;
1226 printf (" %s", all_cores[i].name);
1227 remaining = columns - (strlen (all_cores[i].name) + 4);
1228 gcc_assert (remaining >= 0);
1232 int len = strlen (all_cores[i].name);
1234 if (remaining > len + 2)
1236 printf (", %s", all_cores[i].name);
1237 remaining -= len + 2;
1243 printf ("\n %s", all_cores[i].name);
1244 remaining = columns - (len + 4);
1248 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1250 i = ARRAY_SIZE (all_architectures) - 2;
1253 printf (" %s", all_architectures[i].name);
1254 remaining = columns - (strlen (all_architectures[i].name) + 4);
1255 gcc_assert (remaining >= 0);
1259 int len = strlen (all_architectures[i].name);
1261 if (remaining > len + 2)
1263 printf (", %s", all_architectures[i].name);
1264 remaining -= len + 2;
1270 printf ("\n %s", all_architectures[i].name);
1271 remaining = columns - (len + 4);
1278 /* Fix up any incompatible options that the user has specified.
1279 This has now turned into a maze. */
1281 arm_override_options (void)
1284 enum processor_type target_arch_cpu = arm_none;
1285 enum processor_type selected_cpu = arm_none;
1287 /* Set up the flags based on the cpu/architecture selected by the user. */
1288 for (i = ARRAY_SIZE (arm_select); i--;)
1290 struct arm_cpu_select * ptr = arm_select + i;
1292 if (ptr->string != NULL && ptr->string[0] != '\0')
1294 const struct processors * sel;
1296 for (sel = ptr->processors; sel->name != NULL; sel++)
1297 if (streq (ptr->string, sel->name))
1299 /* Set the architecture define. */
1300 if (i != ARM_OPT_SET_TUNE)
1301 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1303 /* Determine the processor core for which we should
1304 tune code-generation. */
1305 if (/* -mcpu= is a sensible default. */
1306 i == ARM_OPT_SET_CPU
1307 /* -mtune= overrides -mcpu= and -march=. */
1308 || i == ARM_OPT_SET_TUNE)
1309 arm_tune = (enum processor_type) (sel - ptr->processors);
1311 /* Remember the CPU associated with this architecture.
1312 If no other option is used to set the CPU type,
1313 we'll use this to guess the most suitable tuning
1315 if (i == ARM_OPT_SET_ARCH)
1316 target_arch_cpu = sel->core;
1318 if (i == ARM_OPT_SET_CPU)
1319 selected_cpu = (enum processor_type) (sel - ptr->processors);
1321 if (i != ARM_OPT_SET_TUNE)
1323 /* If we have been given an architecture and a processor
1324 make sure that they are compatible. We only generate
1325 a warning though, and we prefer the CPU over the
1327 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1328 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1331 insn_flags = sel->flags;
1337 if (sel->name == NULL)
1338 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1342 /* Guess the tuning options from the architecture if necessary. */
1343 if (arm_tune == arm_none)
1344 arm_tune = target_arch_cpu;
1346 /* If the user did not specify a processor, choose one for them. */
1347 if (insn_flags == 0)
1349 const struct processors * sel;
1350 unsigned int sought;
1352 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1353 if (selected_cpu == arm_none)
1355 #ifdef SUBTARGET_CPU_DEFAULT
1356 /* Use the subtarget default CPU if none was specified by
1358 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1360 /* Default to ARM6. */
1361 if (selected_cpu == arm_none)
1362 selected_cpu = arm6;
1364 sel = &all_cores[selected_cpu];
1366 insn_flags = sel->flags;
1368 /* Now check to see if the user has specified some command line
1369 switch that require certain abilities from the cpu. */
1372 if (TARGET_INTERWORK || TARGET_THUMB)
1374 sought |= (FL_THUMB | FL_MODE32);
1376 /* There are no ARM processors that support both APCS-26 and
1377 interworking. Therefore we force FL_MODE26 to be removed
1378 from insn_flags here (if it was set), so that the search
1379 below will always be able to find a compatible processor. */
1380 insn_flags &= ~FL_MODE26;
1383 if (sought != 0 && ((sought & insn_flags) != sought))
1385 /* Try to locate a CPU type that supports all of the abilities
1386 of the default CPU, plus the extra abilities requested by
1388 for (sel = all_cores; sel->name != NULL; sel++)
1389 if ((sel->flags & sought) == (sought | insn_flags))
1392 if (sel->name == NULL)
1394 unsigned current_bit_count = 0;
1395 const struct processors * best_fit = NULL;
1397 /* Ideally we would like to issue an error message here
1398 saying that it was not possible to find a CPU compatible
1399 with the default CPU, but which also supports the command
1400 line options specified by the programmer, and so they
1401 ought to use the -mcpu=<name> command line option to
1402 override the default CPU type.
1404 If we cannot find a cpu that has both the
1405 characteristics of the default cpu and the given
1406 command line options we scan the array again looking
1407 for a best match. */
1408 for (sel = all_cores; sel->name != NULL; sel++)
1409 if ((sel->flags & sought) == sought)
1413 count = bit_count (sel->flags & insn_flags);
1415 if (count >= current_bit_count)
1418 current_bit_count = count;
1422 gcc_assert (best_fit);
1426 insn_flags = sel->flags;
1428 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1429 arm_default_cpu = (enum processor_type) (sel - all_cores);
1430 if (arm_tune == arm_none)
1431 arm_tune = arm_default_cpu;
1434 /* The processor for which we should tune should now have been
1436 gcc_assert (arm_tune != arm_none);
1438 tune_flags = all_cores[(int)arm_tune].flags;
1440 if (target_fp16_format_name)
1442 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1444 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1446 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1450 if (i == ARRAY_SIZE (all_fp16_formats))
1451 error ("invalid __fp16 format option: -mfp16-format=%s",
1452 target_fp16_format_name);
1455 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1457 if (target_abi_name)
1459 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1461 if (streq (arm_all_abis[i].name, target_abi_name))
1463 arm_abi = arm_all_abis[i].abi_type;
1467 if (i == ARRAY_SIZE (arm_all_abis))
1468 error ("invalid ABI option: -mabi=%s", target_abi_name);
1471 arm_abi = ARM_DEFAULT_ABI;
1473 /* Make sure that the processor choice does not conflict with any of the
1474 other command line choices. */
1475 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1476 error ("target CPU does not support ARM mode");
1478 /* BPABI targets use linker tricks to allow interworking on cores
1479 without thumb support. */
1480 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1482 warning (0, "target CPU does not support interworking" );
1483 target_flags &= ~MASK_INTERWORK;
1486 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1488 warning (0, "target CPU does not support THUMB instructions");
1489 target_flags &= ~MASK_THUMB;
1492 if (TARGET_APCS_FRAME && TARGET_THUMB)
1494 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1495 target_flags &= ~MASK_APCS_FRAME;
1498 /* Callee super interworking implies thumb interworking. Adding
1499 this to the flags here simplifies the logic elsewhere. */
1500 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1501 target_flags |= MASK_INTERWORK;
1503 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1504 from here where no function is being compiled currently. */
1505 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1506 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1508 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1509 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1511 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1512 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1514 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1516 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1517 target_flags |= MASK_APCS_FRAME;
1520 if (TARGET_POKE_FUNCTION_NAME)
1521 target_flags |= MASK_APCS_FRAME;
1523 if (TARGET_APCS_REENT && flag_pic)
1524 error ("-fpic and -mapcs-reent are incompatible");
1526 if (TARGET_APCS_REENT)
1527 warning (0, "APCS reentrant code not supported. Ignored");
1529 /* If this target is normally configured to use APCS frames, warn if they
1530 are turned off and debugging is turned on. */
1532 && write_symbols != NO_DEBUG
1533 && !TARGET_APCS_FRAME
1534 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1535 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1537 if (TARGET_APCS_FLOAT)
1538 warning (0, "passing floating point arguments in fp regs not yet supported");
1540 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1541 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1542 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1543 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1544 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1545 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1546 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1547 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1548 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1549 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1550 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1551 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1553 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1554 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1555 thumb_code = (TARGET_ARM == 0);
1556 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1557 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1558 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1559 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1560 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1562 /* If we are not using the default (ARM mode) section anchor offset
1563 ranges, then set the correct ranges now. */
1566 /* Thumb-1 LDR instructions cannot have negative offsets.
1567 Permissible positive offset ranges are 5-bit (for byte loads),
1568 6-bit (for halfword loads), or 7-bit (for word loads).
1569 Empirical results suggest a 7-bit anchor range gives the best
1570 overall code size. */
1571 targetm.min_anchor_offset = 0;
1572 targetm.max_anchor_offset = 127;
1574 else if (TARGET_THUMB2)
1576 /* The minimum is set such that the total size of the block
1577 for a particular anchor is 248 + 1 + 4095 bytes, which is
1578 divisible by eight, ensuring natural spacing of anchors. */
1579 targetm.min_anchor_offset = -248;
1580 targetm.max_anchor_offset = 4095;
1583 /* V5 code we generate is completely interworking capable, so we turn off
1584 TARGET_INTERWORK here to avoid many tests later on. */
1586 /* XXX However, we must pass the right pre-processor defines to CPP
1587 or GLD can get confused. This is a hack. */
1588 if (TARGET_INTERWORK)
1589 arm_cpp_interwork = 1;
1592 target_flags &= ~MASK_INTERWORK;
1594 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1595 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1597 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1598 error ("iwmmxt abi requires an iwmmxt capable cpu");
1600 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1601 if (target_fpu_name == NULL && target_fpe_name != NULL)
1603 if (streq (target_fpe_name, "2"))
1604 target_fpu_name = "fpe2";
1605 else if (streq (target_fpe_name, "3"))
1606 target_fpu_name = "fpe3";
1608 error ("invalid floating point emulation option: -mfpe=%s",
1611 if (target_fpu_name != NULL)
1613 /* The user specified a FPU. */
1614 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1616 if (streq (all_fpus[i].name, target_fpu_name))
1618 arm_fpu_arch = all_fpus[i].fpu;
1619 arm_fpu_tune = arm_fpu_arch;
1620 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1624 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1625 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1629 #ifdef FPUTYPE_DEFAULT
1630 /* Use the default if it is specified for this platform. */
1631 arm_fpu_arch = FPUTYPE_DEFAULT;
1632 arm_fpu_tune = FPUTYPE_DEFAULT;
1634 /* Pick one based on CPU type. */
1635 /* ??? Some targets assume FPA is the default.
1636 if ((insn_flags & FL_VFP) != 0)
1637 arm_fpu_arch = FPUTYPE_VFP;
1640 if (arm_arch_cirrus)
1641 arm_fpu_arch = FPUTYPE_MAVERICK;
1643 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1645 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1646 arm_fpu_tune = FPUTYPE_FPA;
1648 arm_fpu_tune = arm_fpu_arch;
1649 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1650 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1653 if (target_float_abi_name != NULL)
1655 /* The user specified a FP ABI. */
1656 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1658 if (streq (all_float_abis[i].name, target_float_abi_name))
1660 arm_float_abi = all_float_abis[i].abi_type;
1664 if (i == ARRAY_SIZE (all_float_abis))
1665 error ("invalid floating point abi: -mfloat-abi=%s",
1666 target_float_abi_name);
1669 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1671 if (TARGET_AAPCS_BASED
1672 && (arm_fp_model == ARM_FP_MODEL_FPA))
1673 error ("FPA is unsupported in the AAPCS");
1675 if (TARGET_AAPCS_BASED)
1677 if (TARGET_CALLER_INTERWORKING)
1678 error ("AAPCS does not support -mcaller-super-interworking");
1680 if (TARGET_CALLEE_INTERWORKING)
1681 error ("AAPCS does not support -mcallee-super-interworking");
1684 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1685 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1686 will ever exist. GCC makes no attempt to support this combination. */
1687 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1688 sorry ("iWMMXt and hardware floating point");
1690 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1691 if (TARGET_THUMB2 && TARGET_IWMMXT)
1692 sorry ("Thumb-2 iWMMXt");
1694 /* __fp16 support currently assumes the core has ldrh. */
1695 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1696 sorry ("__fp16 and no ldrh");
1698 /* If soft-float is specified then don't use FPU. */
1699 if (TARGET_SOFT_FLOAT)
1700 arm_fpu_arch = FPUTYPE_NONE;
1702 if (TARGET_AAPCS_BASED)
1704 if (arm_abi == ARM_ABI_IWMMXT)
1705 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1706 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1707 && TARGET_HARD_FLOAT
1709 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1711 arm_pcs_default = ARM_PCS_AAPCS;
1715 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1716 sorry ("-mfloat-abi=hard and VFP");
1718 if (arm_abi == ARM_ABI_APCS)
1719 arm_pcs_default = ARM_PCS_APCS;
1721 arm_pcs_default = ARM_PCS_ATPCS;
1724 /* For arm2/3 there is no need to do any scheduling if there is only
1725 a floating point emulator, or we are doing software floating-point. */
1726 if ((TARGET_SOFT_FLOAT
1727 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1728 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1729 && (tune_flags & FL_MODE32) == 0)
1730 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1732 if (target_thread_switch)
1734 if (strcmp (target_thread_switch, "soft") == 0)
1735 target_thread_pointer = TP_SOFT;
1736 else if (strcmp (target_thread_switch, "auto") == 0)
1737 target_thread_pointer = TP_AUTO;
1738 else if (strcmp (target_thread_switch, "cp15") == 0)
1739 target_thread_pointer = TP_CP15;
1741 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1744 /* Use the cp15 method if it is available. */
1745 if (target_thread_pointer == TP_AUTO)
1747 if (arm_arch6k && !TARGET_THUMB)
1748 target_thread_pointer = TP_CP15;
1750 target_thread_pointer = TP_SOFT;
1753 if (TARGET_HARD_TP && TARGET_THUMB1)
1754 error ("can not use -mtp=cp15 with 16-bit Thumb");
1756 /* Override the default structure alignment for AAPCS ABI. */
1757 if (TARGET_AAPCS_BASED)
1758 arm_structure_size_boundary = 8;
1760 if (structure_size_string != NULL)
1762 int size = strtol (structure_size_string, NULL, 0);
1764 if (size == 8 || size == 32
1765 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1766 arm_structure_size_boundary = size;
1768 warning (0, "structure size boundary can only be set to %s",
1769 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1772 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1774 error ("RTP PIC is incompatible with Thumb");
1778 /* If stack checking is disabled, we can use r10 as the PIC register,
1779 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1780 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1782 if (TARGET_VXWORKS_RTP)
1783 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1784 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1787 if (flag_pic && TARGET_VXWORKS_RTP)
1788 arm_pic_register = 9;
1790 if (arm_pic_register_string != NULL)
1792 int pic_register = decode_reg_name (arm_pic_register_string);
1795 warning (0, "-mpic-register= is useless without -fpic");
1797 /* Prevent the user from choosing an obviously stupid PIC register. */
1798 else if (pic_register < 0 || call_used_regs[pic_register]
1799 || pic_register == HARD_FRAME_POINTER_REGNUM
1800 || pic_register == STACK_POINTER_REGNUM
1801 || pic_register >= PC_REGNUM
1802 || (TARGET_VXWORKS_RTP
1803 && (unsigned int) pic_register != arm_pic_register))
1804 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1806 arm_pic_register = pic_register;
1809 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1810 if (fix_cm3_ldrd == 2)
1812 if (selected_cpu == cortexm3)
1818 /* ??? We might want scheduling for thumb2. */
1819 if (TARGET_THUMB && flag_schedule_insns)
1821 /* Don't warn since it's on by default in -O2. */
1822 flag_schedule_insns = 0;
1827 arm_constant_limit = 1;
1829 /* If optimizing for size, bump the number of instructions that we
1830 are prepared to conditionally execute (even on a StrongARM). */
1831 max_insns_skipped = 6;
1835 /* For processors with load scheduling, it never costs more than
1836 2 cycles to load a constant, and the load scheduler may well
1837 reduce that to 1. */
1839 arm_constant_limit = 1;
1841 /* On XScale the longer latency of a load makes it more difficult
1842 to achieve a good schedule, so it's faster to synthesize
1843 constants that can be done in two insns. */
1844 if (arm_tune_xscale)
1845 arm_constant_limit = 2;
1847 /* StrongARM has early execution of branches, so a sequence
1848 that is worth skipping is shorter. */
1849 if (arm_tune_strongarm)
1850 max_insns_skipped = 3;
1853 /* Register global variables with the garbage collector. */
1854 arm_add_gc_roots ();
1858 arm_add_gc_roots (void)
1860 gcc_obstack_init(&minipool_obstack);
1861 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1864 /* A table of known ARM exception types.
1865 For use with the interrupt function attribute. */
1869 const char *const arg;
1870 const unsigned long return_value;
1874 static const isr_attribute_arg isr_attribute_args [] =
1876 { "IRQ", ARM_FT_ISR },
1877 { "irq", ARM_FT_ISR },
1878 { "FIQ", ARM_FT_FIQ },
1879 { "fiq", ARM_FT_FIQ },
1880 { "ABORT", ARM_FT_ISR },
1881 { "abort", ARM_FT_ISR },
1882 { "ABORT", ARM_FT_ISR },
1883 { "abort", ARM_FT_ISR },
1884 { "UNDEF", ARM_FT_EXCEPTION },
1885 { "undef", ARM_FT_EXCEPTION },
1886 { "SWI", ARM_FT_EXCEPTION },
1887 { "swi", ARM_FT_EXCEPTION },
1888 { NULL, ARM_FT_NORMAL }
1891 /* Returns the (interrupt) function type of the current
1892 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1894 static unsigned long
1895 arm_isr_value (tree argument)
1897 const isr_attribute_arg * ptr;
1901 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1903 /* No argument - default to IRQ. */
1904 if (argument == NULL_TREE)
1907 /* Get the value of the argument. */
1908 if (TREE_VALUE (argument) == NULL_TREE
1909 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1910 return ARM_FT_UNKNOWN;
1912 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1914 /* Check it against the list of known arguments. */
1915 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1916 if (streq (arg, ptr->arg))
1917 return ptr->return_value;
1919 /* An unrecognized interrupt type. */
1920 return ARM_FT_UNKNOWN;
1923 /* Computes the type of the current function. */
1925 static unsigned long
1926 arm_compute_func_type (void)
1928 unsigned long type = ARM_FT_UNKNOWN;
1932 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1934 /* Decide if the current function is volatile. Such functions
1935 never return, and many memory cycles can be saved by not storing
1936 register values that will never be needed again. This optimization
1937 was added to speed up context switching in a kernel application. */
1939 && (TREE_NOTHROW (current_function_decl)
1940 || !(flag_unwind_tables
1941 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1942 && TREE_THIS_VOLATILE (current_function_decl))
1943 type |= ARM_FT_VOLATILE;
1945 if (cfun->static_chain_decl != NULL)
1946 type |= ARM_FT_NESTED;
1948 attr = DECL_ATTRIBUTES (current_function_decl);
1950 a = lookup_attribute ("naked", attr);
1952 type |= ARM_FT_NAKED;
1954 a = lookup_attribute ("isr", attr);
1956 a = lookup_attribute ("interrupt", attr);
1959 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1961 type |= arm_isr_value (TREE_VALUE (a));
1966 /* Returns the type of the current function. */
1969 arm_current_func_type (void)
1971 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1972 cfun->machine->func_type = arm_compute_func_type ();
1974 return cfun->machine->func_type;
1978 arm_allocate_stack_slots_for_args (void)
1980 /* Naked functions should not allocate stack slots for arguments. */
1981 return !IS_NAKED (arm_current_func_type ());
1985 /* Return 1 if it is possible to return using a single instruction.
1986 If SIBLING is non-null, this is a test for a return before a sibling
1987 call. SIBLING is the call insn, so we can examine its register usage. */
1990 use_return_insn (int iscond, rtx sibling)
1993 unsigned int func_type;
1994 unsigned long saved_int_regs;
1995 unsigned HOST_WIDE_INT stack_adjust;
1996 arm_stack_offsets *offsets;
1998 /* Never use a return instruction before reload has run. */
1999 if (!reload_completed)
2002 func_type = arm_current_func_type ();
2004 /* Naked, volatile and stack alignment functions need special
2006 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2009 /* So do interrupt functions that use the frame pointer and Thumb
2010 interrupt functions. */
2011 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2014 offsets = arm_get_frame_offsets ();
2015 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2017 /* As do variadic functions. */
2018 if (crtl->args.pretend_args_size
2019 || cfun->machine->uses_anonymous_args
2020 /* Or if the function calls __builtin_eh_return () */
2021 || crtl->calls_eh_return
2022 /* Or if the function calls alloca */
2023 || cfun->calls_alloca
2024 /* Or if there is a stack adjustment. However, if the stack pointer
2025 is saved on the stack, we can use a pre-incrementing stack load. */
2026 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2027 && stack_adjust == 4)))
2030 saved_int_regs = offsets->saved_regs_mask;
2032 /* Unfortunately, the insn
2034 ldmib sp, {..., sp, ...}
2036 triggers a bug on most SA-110 based devices, such that the stack
2037 pointer won't be correctly restored if the instruction takes a
2038 page fault. We work around this problem by popping r3 along with
2039 the other registers, since that is never slower than executing
2040 another instruction.
2042 We test for !arm_arch5 here, because code for any architecture
2043 less than this could potentially be run on one of the buggy
2045 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2047 /* Validate that r3 is a call-clobbered register (always true in
2048 the default abi) ... */
2049 if (!call_used_regs[3])
2052 /* ... that it isn't being used for a return value ... */
2053 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2056 /* ... or for a tail-call argument ... */
2059 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2061 if (find_regno_fusage (sibling, USE, 3))
2065 /* ... and that there are no call-saved registers in r0-r2
2066 (always true in the default ABI). */
2067 if (saved_int_regs & 0x7)
2071 /* Can't be done if interworking with Thumb, and any registers have been
2073 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2076 /* On StrongARM, conditional returns are expensive if they aren't
2077 taken and multiple registers have been stacked. */
2078 if (iscond && arm_tune_strongarm)
2080 /* Conditional return when just the LR is stored is a simple
2081 conditional-load instruction, that's not expensive. */
2082 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2086 && arm_pic_register != INVALID_REGNUM
2087 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2091 /* If there are saved registers but the LR isn't saved, then we need
2092 two instructions for the return. */
2093 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2096 /* Can't be done if any of the FPA regs are pushed,
2097 since this also requires an insn. */
2098 if (TARGET_HARD_FLOAT && TARGET_FPA)
2099 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2100 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2103 /* Likewise VFP regs. */
2104 if (TARGET_HARD_FLOAT && TARGET_VFP)
2105 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2106 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2109 if (TARGET_REALLY_IWMMXT)
2110 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2111 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2117 /* Return TRUE if int I is a valid immediate ARM constant. */
2120 const_ok_for_arm (HOST_WIDE_INT i)
2124 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2125 be all zero, or all one. */
2126 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2127 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2128 != ((~(unsigned HOST_WIDE_INT) 0)
2129 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2132 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2134 /* Fast return for 0 and small values. We must do this for zero, since
2135 the code below can't handle that one case. */
2136 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2139 /* Get the number of trailing zeros. */
2140 lowbit = ffs((int) i) - 1;
2142 /* Only even shifts are allowed in ARM mode so round down to the
2143 nearest even number. */
2147 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2152 /* Allow rotated constants in ARM mode. */
2154 && ((i & ~0xc000003f) == 0
2155 || (i & ~0xf000000f) == 0
2156 || (i & ~0xfc000003) == 0))
2163 /* Allow repeated pattern. */
2166 if (i == v || i == (v | (v << 8)))
2173 /* Return true if I is a valid constant for the operation CODE. */
2175 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2177 if (const_ok_for_arm (i))
2201 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2203 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2209 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2213 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2220 /* Emit a sequence of insns to handle a large constant.
2221 CODE is the code of the operation required, it can be any of SET, PLUS,
2222 IOR, AND, XOR, MINUS;
2223 MODE is the mode in which the operation is being performed;
2224 VAL is the integer to operate on;
2225 SOURCE is the other operand (a register, or a null-pointer for SET);
2226 SUBTARGETS means it is safe to create scratch registers if that will
2227 either produce a simpler sequence, or we will want to cse the values.
2228 Return value is the number of insns emitted. */
2230 /* ??? Tweak this for thumb2. */
2232 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2233 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2237 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2238 cond = COND_EXEC_TEST (PATTERN (insn));
2242 if (subtargets || code == SET
2243 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2244 && REGNO (target) != REGNO (source)))
2246 /* After arm_reorg has been called, we can't fix up expensive
2247 constants by pushing them into memory so we must synthesize
2248 them in-line, regardless of the cost. This is only likely to
2249 be more costly on chips that have load delay slots and we are
2250 compiling without running the scheduler (so no splitting
2251 occurred before the final instruction emission).
2253 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2255 if (!after_arm_reorg
2257 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2259 > arm_constant_limit + (code != SET)))
2263 /* Currently SET is the only monadic value for CODE, all
2264 the rest are diadic. */
2265 if (TARGET_USE_MOVT)
2266 arm_emit_movpair (target, GEN_INT (val));
2268 emit_set_insn (target, GEN_INT (val));
2274 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2276 if (TARGET_USE_MOVT)
2277 arm_emit_movpair (temp, GEN_INT (val));
2279 emit_set_insn (temp, GEN_INT (val));
2281 /* For MINUS, the value is subtracted from, since we never
2282 have subtraction of a constant. */
2284 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2286 emit_set_insn (target,
2287 gen_rtx_fmt_ee (code, mode, source, temp));
2293 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2297 /* Return the number of ARM instructions required to synthesize the given
2300 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2302 HOST_WIDE_INT temp1;
2310 if (remainder & (3 << (i - 2)))
2315 temp1 = remainder & ((0x0ff << end)
2316 | ((i < end) ? (0xff >> (32 - end)) : 0));
2317 remainder &= ~temp1;
2322 } while (remainder);
2326 /* Emit an instruction with the indicated PATTERN. If COND is
2327 non-NULL, conditionalize the execution of the instruction on COND
2331 emit_constant_insn (rtx cond, rtx pattern)
2334 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2335 emit_insn (pattern);
2338 /* As above, but extra parameter GENERATE which, if clear, suppresses
2340 /* ??? This needs more work for thumb2. */
2343 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2344 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2349 int can_negate_initial = 0;
2352 int num_bits_set = 0;
2353 int set_sign_bit_copies = 0;
2354 int clear_sign_bit_copies = 0;
2355 int clear_zero_bit_copies = 0;
2356 int set_zero_bit_copies = 0;
2358 unsigned HOST_WIDE_INT temp1, temp2;
2359 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2361 /* Find out which operations are safe for a given CODE. Also do a quick
2362 check for degenerate cases; these can occur when DImode operations
2374 can_negate_initial = 1;
2378 if (remainder == 0xffffffff)
2381 emit_constant_insn (cond,
2382 gen_rtx_SET (VOIDmode, target,
2383 GEN_INT (ARM_SIGN_EXTEND (val))));
2389 if (reload_completed && rtx_equal_p (target, source))
2393 emit_constant_insn (cond,
2394 gen_rtx_SET (VOIDmode, target, source));
2406 emit_constant_insn (cond,
2407 gen_rtx_SET (VOIDmode, target, const0_rtx));
2410 if (remainder == 0xffffffff)
2412 if (reload_completed && rtx_equal_p (target, source))
2415 emit_constant_insn (cond,
2416 gen_rtx_SET (VOIDmode, target, source));
2425 if (reload_completed && rtx_equal_p (target, source))
2428 emit_constant_insn (cond,
2429 gen_rtx_SET (VOIDmode, target, source));
2433 /* We don't know how to handle other cases yet. */
2434 gcc_assert (remainder == 0xffffffff);
2437 emit_constant_insn (cond,
2438 gen_rtx_SET (VOIDmode, target,
2439 gen_rtx_NOT (mode, source)));
2443 /* We treat MINUS as (val - source), since (source - val) is always
2444 passed as (source + (-val)). */
2448 emit_constant_insn (cond,
2449 gen_rtx_SET (VOIDmode, target,
2450 gen_rtx_NEG (mode, source)));
2453 if (const_ok_for_arm (val))
2456 emit_constant_insn (cond,
2457 gen_rtx_SET (VOIDmode, target,
2458 gen_rtx_MINUS (mode, GEN_INT (val),
2470 /* If we can do it in one insn get out quickly. */
2471 if (const_ok_for_arm (val)
2472 || (can_negate_initial && const_ok_for_arm (-val))
2473 || (can_invert && const_ok_for_arm (~val)))
2476 emit_constant_insn (cond,
2477 gen_rtx_SET (VOIDmode, target,
2479 ? gen_rtx_fmt_ee (code, mode, source,
2485 /* Calculate a few attributes that may be useful for specific
2487 /* Count number of leading zeros. */
2488 for (i = 31; i >= 0; i--)
2490 if ((remainder & (1 << i)) == 0)
2491 clear_sign_bit_copies++;
2496 /* Count number of leading 1's. */
2497 for (i = 31; i >= 0; i--)
2499 if ((remainder & (1 << i)) != 0)
2500 set_sign_bit_copies++;
2505 /* Count number of trailing zero's. */
2506 for (i = 0; i <= 31; i++)
2508 if ((remainder & (1 << i)) == 0)
2509 clear_zero_bit_copies++;
2514 /* Count number of trailing 1's. */
2515 for (i = 0; i <= 31; i++)
2517 if ((remainder & (1 << i)) != 0)
2518 set_zero_bit_copies++;
2526 /* See if we can use movw. */
2527 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2530 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2535 /* See if we can do this by sign_extending a constant that is known
2536 to be negative. This is a good, way of doing it, since the shift
2537 may well merge into a subsequent insn. */
2538 if (set_sign_bit_copies > 1)
2540 if (const_ok_for_arm
2541 (temp1 = ARM_SIGN_EXTEND (remainder
2542 << (set_sign_bit_copies - 1))))
2546 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2547 emit_constant_insn (cond,
2548 gen_rtx_SET (VOIDmode, new_src,
2550 emit_constant_insn (cond,
2551 gen_ashrsi3 (target, new_src,
2552 GEN_INT (set_sign_bit_copies - 1)));
2556 /* For an inverted constant, we will need to set the low bits,
2557 these will be shifted out of harm's way. */
2558 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2559 if (const_ok_for_arm (~temp1))
2563 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2564 emit_constant_insn (cond,
2565 gen_rtx_SET (VOIDmode, new_src,
2567 emit_constant_insn (cond,
2568 gen_ashrsi3 (target, new_src,
2569 GEN_INT (set_sign_bit_copies - 1)));
2575 /* See if we can calculate the value as the difference between two
2576 valid immediates. */
2577 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2579 int topshift = clear_sign_bit_copies & ~1;
2581 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2582 & (0xff000000 >> topshift));
2584 /* If temp1 is zero, then that means the 9 most significant
2585 bits of remainder were 1 and we've caused it to overflow.
2586 When topshift is 0 we don't need to do anything since we
2587 can borrow from 'bit 32'. */
2588 if (temp1 == 0 && topshift != 0)
2589 temp1 = 0x80000000 >> (topshift - 1);
2591 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2593 if (const_ok_for_arm (temp2))
2597 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2598 emit_constant_insn (cond,
2599 gen_rtx_SET (VOIDmode, new_src,
2601 emit_constant_insn (cond,
2602 gen_addsi3 (target, new_src,
2610 /* See if we can generate this by setting the bottom (or the top)
2611 16 bits, and then shifting these into the other half of the
2612 word. We only look for the simplest cases, to do more would cost
2613 too much. Be careful, however, not to generate this when the
2614 alternative would take fewer insns. */
2615 if (val & 0xffff0000)
2617 temp1 = remainder & 0xffff0000;
2618 temp2 = remainder & 0x0000ffff;
2620 /* Overlaps outside this range are best done using other methods. */
2621 for (i = 9; i < 24; i++)
2623 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2624 && !const_ok_for_arm (temp2))
2626 rtx new_src = (subtargets
2627 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2629 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2630 source, subtargets, generate);
2638 gen_rtx_ASHIFT (mode, source,
2645 /* Don't duplicate cases already considered. */
2646 for (i = 17; i < 24; i++)
2648 if (((temp1 | (temp1 >> i)) == remainder)
2649 && !const_ok_for_arm (temp1))
2651 rtx new_src = (subtargets
2652 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2654 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2655 source, subtargets, generate);
2660 gen_rtx_SET (VOIDmode, target,
2663 gen_rtx_LSHIFTRT (mode, source,
2674 /* If we have IOR or XOR, and the constant can be loaded in a
2675 single instruction, and we can find a temporary to put it in,
2676 then this can be done in two instructions instead of 3-4. */
2678 /* TARGET can't be NULL if SUBTARGETS is 0 */
2679 || (reload_completed && !reg_mentioned_p (target, source)))
2681 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2685 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2687 emit_constant_insn (cond,
2688 gen_rtx_SET (VOIDmode, sub,
2690 emit_constant_insn (cond,
2691 gen_rtx_SET (VOIDmode, target,
2692 gen_rtx_fmt_ee (code, mode,
2703 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2704 and the remainder 0s for e.g. 0xfff00000)
2705 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2707 This can be done in 2 instructions by using shifts with mov or mvn.
2712 mvn r0, r0, lsr #12 */
2713 if (set_sign_bit_copies > 8
2714 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2718 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2719 rtx shift = GEN_INT (set_sign_bit_copies);
2723 gen_rtx_SET (VOIDmode, sub,
2725 gen_rtx_ASHIFT (mode,
2730 gen_rtx_SET (VOIDmode, target,
2732 gen_rtx_LSHIFTRT (mode, sub,
2739 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2741 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2743 For eg. r0 = r0 | 0xfff
2748 if (set_zero_bit_copies > 8
2749 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2753 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2754 rtx shift = GEN_INT (set_zero_bit_copies);
2758 gen_rtx_SET (VOIDmode, sub,
2760 gen_rtx_LSHIFTRT (mode,
2765 gen_rtx_SET (VOIDmode, target,
2767 gen_rtx_ASHIFT (mode, sub,
2773 /* This will never be reached for Thumb2 because orn is a valid
2774 instruction. This is for Thumb1 and the ARM 32 bit cases.
2776 x = y | constant (such that ~constant is a valid constant)
2778 x = ~(~y & ~constant).
2780 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2784 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2785 emit_constant_insn (cond,
2786 gen_rtx_SET (VOIDmode, sub,
2787 gen_rtx_NOT (mode, source)));
2790 sub = gen_reg_rtx (mode);
2791 emit_constant_insn (cond,
2792 gen_rtx_SET (VOIDmode, sub,
2793 gen_rtx_AND (mode, source,
2795 emit_constant_insn (cond,
2796 gen_rtx_SET (VOIDmode, target,
2797 gen_rtx_NOT (mode, sub)));
2804 /* See if two shifts will do 2 or more insn's worth of work. */
2805 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2807 HOST_WIDE_INT shift_mask = ((0xffffffff
2808 << (32 - clear_sign_bit_copies))
2811 if ((remainder | shift_mask) != 0xffffffff)
2815 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2816 insns = arm_gen_constant (AND, mode, cond,
2817 remainder | shift_mask,
2818 new_src, source, subtargets, 1);
2823 rtx targ = subtargets ? NULL_RTX : target;
2824 insns = arm_gen_constant (AND, mode, cond,
2825 remainder | shift_mask,
2826 targ, source, subtargets, 0);
2832 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2833 rtx shift = GEN_INT (clear_sign_bit_copies);
2835 emit_insn (gen_ashlsi3 (new_src, source, shift));
2836 emit_insn (gen_lshrsi3 (target, new_src, shift));
2842 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2844 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2846 if ((remainder | shift_mask) != 0xffffffff)
2850 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2852 insns = arm_gen_constant (AND, mode, cond,
2853 remainder | shift_mask,
2854 new_src, source, subtargets, 1);
2859 rtx targ = subtargets ? NULL_RTX : target;
2861 insns = arm_gen_constant (AND, mode, cond,
2862 remainder | shift_mask,
2863 targ, source, subtargets, 0);
2869 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2870 rtx shift = GEN_INT (clear_zero_bit_copies);
2872 emit_insn (gen_lshrsi3 (new_src, source, shift));
2873 emit_insn (gen_ashlsi3 (target, new_src, shift));
2885 for (i = 0; i < 32; i++)
2886 if (remainder & (1 << i))
2890 || (code != IOR && can_invert && num_bits_set > 16))
2891 remainder = (~remainder) & 0xffffffff;
2892 else if (code == PLUS && num_bits_set > 16)
2893 remainder = (-remainder) & 0xffffffff;
2900 /* Now try and find a way of doing the job in either two or three
2902 We start by looking for the largest block of zeros that are aligned on
2903 a 2-bit boundary, we then fill up the temps, wrapping around to the
2904 top of the word when we drop off the bottom.
2905 In the worst case this code should produce no more than four insns.
2906 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2907 best place to start. */
2909 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2915 int best_consecutive_zeros = 0;
2917 for (i = 0; i < 32; i += 2)
2919 int consecutive_zeros = 0;
2921 if (!(remainder & (3 << i)))
2923 while ((i < 32) && !(remainder & (3 << i)))
2925 consecutive_zeros += 2;
2928 if (consecutive_zeros > best_consecutive_zeros)
2930 best_consecutive_zeros = consecutive_zeros;
2931 best_start = i - consecutive_zeros;
2937 /* So long as it won't require any more insns to do so, it's
2938 desirable to emit a small constant (in bits 0...9) in the last
2939 insn. This way there is more chance that it can be combined with
2940 a later addressing insn to form a pre-indexed load or store
2941 operation. Consider:
2943 *((volatile int *)0xe0000100) = 1;
2944 *((volatile int *)0xe0000110) = 2;
2946 We want this to wind up as:
2950 str rB, [rA, #0x100]
2952 str rB, [rA, #0x110]
2954 rather than having to synthesize both large constants from scratch.
2956 Therefore, we calculate how many insns would be required to emit
2957 the constant starting from `best_start', and also starting from
2958 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2959 yield a shorter sequence, we may as well use zero. */
2961 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2962 && (count_insns_for_constant (remainder, 0) <=
2963 count_insns_for_constant (remainder, best_start)))
2967 /* Now start emitting the insns. */
2975 if (remainder & (3 << (i - 2)))
2980 temp1 = remainder & ((0x0ff << end)
2981 | ((i < end) ? (0xff >> (32 - end)) : 0));
2982 remainder &= ~temp1;
2986 rtx new_src, temp1_rtx;
2988 if (code == SET || code == MINUS)
2990 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2991 if (can_invert && code != MINUS)
2996 if (remainder && subtargets)
2997 new_src = gen_reg_rtx (mode);
3002 else if (can_negate)
3006 temp1 = trunc_int_for_mode (temp1, mode);
3007 temp1_rtx = GEN_INT (temp1);
3011 else if (code == MINUS)
3012 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3014 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3016 emit_constant_insn (cond,
3017 gen_rtx_SET (VOIDmode, new_src,
3027 else if (code == MINUS)
3036 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3049 /* Canonicalize a comparison so that we are more likely to recognize it.
3050 This can be done for a few constant compares, where we can make the
3051 immediate value easier to load. */
3054 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3057 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3058 unsigned HOST_WIDE_INT maxval;
3059 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3070 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3072 *op1 = GEN_INT (i + 1);
3073 return code == GT ? GE : LT;
3080 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3082 *op1 = GEN_INT (i - 1);
3083 return code == GE ? GT : LE;
3089 if (i != ~((unsigned HOST_WIDE_INT) 0)
3090 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3092 *op1 = GEN_INT (i + 1);
3093 return code == GTU ? GEU : LTU;
3100 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3102 *op1 = GEN_INT (i - 1);
3103 return code == GEU ? GTU : LEU;
3115 /* Define how to find the value returned by a function. */
3118 arm_function_value(const_tree type, const_tree func,
3119 bool outgoing ATTRIBUTE_UNUSED)
3121 enum machine_mode mode;
3122 int unsignedp ATTRIBUTE_UNUSED;
3123 rtx r ATTRIBUTE_UNUSED;
3125 mode = TYPE_MODE (type);
3127 if (TARGET_AAPCS_BASED)
3128 return aapcs_allocate_return_reg (mode, type, func);
3130 /* Promote integer types. */
3131 if (INTEGRAL_TYPE_P (type))
3132 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3134 /* Promotes small structs returned in a register to full-word size
3135 for big-endian AAPCS. */
3136 if (arm_return_in_msb (type))
3138 HOST_WIDE_INT size = int_size_in_bytes (type);
3139 if (size % UNITS_PER_WORD != 0)
3141 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3142 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3146 return LIBCALL_VALUE (mode);
3150 libcall_eq (const void *p1, const void *p2)
3152 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3156 libcall_hash (const void *p1)
3158 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3162 add_libcall (htab_t htab, rtx libcall)
3164 *htab_find_slot (htab, libcall, INSERT) = libcall;
3168 arm_libcall_uses_aapcs_base (rtx libcall)
3170 static bool init_done = false;
3171 static htab_t libcall_htab;
3177 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3179 add_libcall (libcall_htab,
3180 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3181 add_libcall (libcall_htab,
3182 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3183 add_libcall (libcall_htab,
3184 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3185 add_libcall (libcall_htab,
3186 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3188 add_libcall (libcall_htab,
3189 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3190 add_libcall (libcall_htab,
3191 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3192 add_libcall (libcall_htab,
3193 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3194 add_libcall (libcall_htab,
3195 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3197 add_libcall (libcall_htab,
3198 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3199 add_libcall (libcall_htab,
3200 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3201 add_libcall (libcall_htab,
3202 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3203 add_libcall (libcall_htab,
3204 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3205 add_libcall (libcall_htab,
3206 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3207 add_libcall (libcall_htab,
3208 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3211 return libcall && htab_find (libcall_htab, libcall) != NULL;
3215 arm_libcall_value (enum machine_mode mode, rtx libcall)
3217 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3218 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3220 /* The following libcalls return their result in integer registers,
3221 even though they return a floating point value. */
3222 if (arm_libcall_uses_aapcs_base (libcall))
3223 return gen_rtx_REG (mode, ARG_REGISTER(1));
3227 return LIBCALL_VALUE (mode);
3230 /* Determine the amount of memory needed to store the possible return
3231 registers of an untyped call. */
3233 arm_apply_result_size (void)
3239 if (TARGET_HARD_FLOAT_ABI)
3245 if (TARGET_MAVERICK)
3248 if (TARGET_IWMMXT_ABI)
3255 /* Decide whether TYPE should be returned in memory (true)
3256 or in a register (false). FNTYPE is the type of the function making
3259 arm_return_in_memory (const_tree type, const_tree fntype)
3263 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3265 if (TARGET_AAPCS_BASED)
3267 /* Simple, non-aggregate types (ie not including vectors and
3268 complex) are always returned in a register (or registers).
3269 We don't care about which register here, so we can short-cut
3270 some of the detail. */
3271 if (!AGGREGATE_TYPE_P (type)
3272 && TREE_CODE (type) != VECTOR_TYPE
3273 && TREE_CODE (type) != COMPLEX_TYPE)
3276 /* Any return value that is no larger than one word can be
3278 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3281 /* Check any available co-processors to see if they accept the
3282 type as a register candidate (VFP, for example, can return
3283 some aggregates in consecutive registers). These aren't
3284 available if the call is variadic. */
3285 if (aapcs_select_return_coproc (type, fntype) >= 0)
3288 /* Vector values should be returned using ARM registers, not
3289 memory (unless they're over 16 bytes, which will break since
3290 we only have four call-clobbered registers to play with). */
3291 if (TREE_CODE (type) == VECTOR_TYPE)
3292 return (size < 0 || size > (4 * UNITS_PER_WORD));
3294 /* The rest go in memory. */
3298 if (TREE_CODE (type) == VECTOR_TYPE)
3299 return (size < 0 || size > (4 * UNITS_PER_WORD));
3301 if (!AGGREGATE_TYPE_P (type) &&
3302 (TREE_CODE (type) != VECTOR_TYPE))
3303 /* All simple types are returned in registers. */
3306 if (arm_abi != ARM_ABI_APCS)
3308 /* ATPCS and later return aggregate types in memory only if they are
3309 larger than a word (or are variable size). */
3310 return (size < 0 || size > UNITS_PER_WORD);
3313 /* For the arm-wince targets we choose to be compatible with Microsoft's
3314 ARM and Thumb compilers, which always return aggregates in memory. */
3316 /* All structures/unions bigger than one word are returned in memory.
3317 Also catch the case where int_size_in_bytes returns -1. In this case
3318 the aggregate is either huge or of variable size, and in either case
3319 we will want to return it via memory and not in a register. */
3320 if (size < 0 || size > UNITS_PER_WORD)
3323 if (TREE_CODE (type) == RECORD_TYPE)
3327 /* For a struct the APCS says that we only return in a register
3328 if the type is 'integer like' and every addressable element
3329 has an offset of zero. For practical purposes this means
3330 that the structure can have at most one non bit-field element
3331 and that this element must be the first one in the structure. */
3333 /* Find the first field, ignoring non FIELD_DECL things which will
3334 have been created by C++. */
3335 for (field = TYPE_FIELDS (type);
3336 field && TREE_CODE (field) != FIELD_DECL;
3337 field = TREE_CHAIN (field))
3341 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3343 /* Check that the first field is valid for returning in a register. */
3345 /* ... Floats are not allowed */
3346 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3349 /* ... Aggregates that are not themselves valid for returning in
3350 a register are not allowed. */
3351 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3354 /* Now check the remaining fields, if any. Only bitfields are allowed,
3355 since they are not addressable. */
3356 for (field = TREE_CHAIN (field);
3358 field = TREE_CHAIN (field))
3360 if (TREE_CODE (field) != FIELD_DECL)
3363 if (!DECL_BIT_FIELD_TYPE (field))
3370 if (TREE_CODE (type) == UNION_TYPE)
3374 /* Unions can be returned in registers if every element is
3375 integral, or can be returned in an integer register. */
3376 for (field = TYPE_FIELDS (type);
3378 field = TREE_CHAIN (field))
3380 if (TREE_CODE (field) != FIELD_DECL)
3383 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3386 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3392 #endif /* not ARM_WINCE */
3394 /* Return all other types in memory. */
3398 /* Indicate whether or not words of a double are in big-endian order. */
3401 arm_float_words_big_endian (void)
3403 if (TARGET_MAVERICK)
3406 /* For FPA, float words are always big-endian. For VFP, floats words
3407 follow the memory system mode. */
3415 return (TARGET_BIG_END ? 1 : 0);
3420 const struct pcs_attribute_arg
3424 } pcs_attribute_args[] =
3426 {"aapcs", ARM_PCS_AAPCS},
3427 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3429 /* We could recognize these, but changes would be needed elsewhere
3430 * to implement them. */
3431 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3432 {"atpcs", ARM_PCS_ATPCS},
3433 {"apcs", ARM_PCS_APCS},
3435 {NULL, ARM_PCS_UNKNOWN}
3439 arm_pcs_from_attribute (tree attr)
3441 const struct pcs_attribute_arg *ptr;
3444 /* Get the value of the argument. */
3445 if (TREE_VALUE (attr) == NULL_TREE
3446 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3447 return ARM_PCS_UNKNOWN;
3449 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3451 /* Check it against the list of known arguments. */
3452 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3453 if (streq (arg, ptr->arg))
3456 /* An unrecognized interrupt type. */
3457 return ARM_PCS_UNKNOWN;
3460 /* Get the PCS variant to use for this call. TYPE is the function's type
3461 specification, DECL is the specific declartion. DECL may be null if
3462 the call could be indirect or if this is a library call. */
3464 arm_get_pcs_model (const_tree type, const_tree decl)
3466 bool user_convention = false;
3467 enum arm_pcs user_pcs = arm_pcs_default;
3472 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3475 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3476 user_convention = true;
3479 if (TARGET_AAPCS_BASED)
3481 /* Detect varargs functions. These always use the base rules
3482 (no argument is ever a candidate for a co-processor
3484 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3485 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3486 != void_type_node));
3488 if (user_convention)
3490 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3491 sorry ("Non-AAPCS derived PCS variant");
3492 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3493 error ("Variadic functions must use the base AAPCS variant");
3497 return ARM_PCS_AAPCS;
3498 else if (user_convention)
3500 else if (decl && flag_unit_at_a_time)
3502 /* Local functions never leak outside this compilation unit,
3503 so we are free to use whatever conventions are
3505 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3506 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3508 return ARM_PCS_AAPCS_LOCAL;
3511 else if (user_convention && user_pcs != arm_pcs_default)
3512 sorry ("PCS variant");
3514 /* For everything else we use the target's default. */
3515 return arm_pcs_default;
3520 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3521 const_tree fntype ATTRIBUTE_UNUSED,
3522 rtx libcall ATTRIBUTE_UNUSED,
3523 const_tree fndecl ATTRIBUTE_UNUSED)
3525 /* Record the unallocated VFP registers. */
3526 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3527 pcum->aapcs_vfp_reg_alloc = 0;
3530 /* Walk down the type tree of TYPE counting consecutive base elements.
3531 If *MODEP is VOIDmode, then set it to the first valid floating point
3532 type. If a non-floating point type is found, or if a floating point
3533 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3534 otherwise return the count in the sub-tree. */
3536 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3538 enum machine_mode mode;
3541 switch (TREE_CODE (type))
3544 mode = TYPE_MODE (type);
3545 if (mode != DFmode && mode != SFmode)
3548 if (*modep == VOIDmode)
3557 mode = TYPE_MODE (TREE_TYPE (type));
3558 if (mode != DFmode && mode != SFmode)
3561 if (*modep == VOIDmode)
3570 /* Use V2SImode and V4SImode as representatives of all 64-bit
3571 and 128-bit vector types, whether or not those modes are
3572 supported with the present options. */
3573 size = int_size_in_bytes (type);
3586 if (*modep == VOIDmode)
3589 /* Vector modes are considered to be opaque: two vectors are
3590 equivalent for the purposes of being homogeneous aggregates
3591 if they are the same size. */
3600 tree index = TYPE_DOMAIN (type);
3602 /* Can't handle incomplete types. */
3603 if (!COMPLETE_TYPE_P(type))
3606 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3609 || !TYPE_MAX_VALUE (index)
3610 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3611 || !TYPE_MIN_VALUE (index)
3612 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3616 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3617 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3619 /* There must be no padding. */
3620 if (!host_integerp (TYPE_SIZE (type), 1)
3621 || (tree_low_cst (TYPE_SIZE (type), 1)
3622 != count * GET_MODE_BITSIZE (*modep)))
3634 /* Can't handle incomplete types. */
3635 if (!COMPLETE_TYPE_P(type))
3638 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3640 if (TREE_CODE (field) != FIELD_DECL)
3643 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3649 /* There must be no padding. */
3650 if (!host_integerp (TYPE_SIZE (type), 1)
3651 || (tree_low_cst (TYPE_SIZE (type), 1)
3652 != count * GET_MODE_BITSIZE (*modep)))
3659 case QUAL_UNION_TYPE:
3661 /* These aren't very interesting except in a degenerate case. */
3666 /* Can't handle incomplete types. */
3667 if (!COMPLETE_TYPE_P(type))
3670 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3672 if (TREE_CODE (field) != FIELD_DECL)
3675 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3678 count = count > sub_count ? count : sub_count;
3681 /* There must be no padding. */
3682 if (!host_integerp (TYPE_SIZE (type), 1)
3683 || (tree_low_cst (TYPE_SIZE (type), 1)
3684 != count * GET_MODE_BITSIZE (*modep)))
3698 aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
3702 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3703 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3704 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3710 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3713 *base_mode = (mode == DCmode ? DFmode : SFmode);
3716 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3718 enum machine_mode aggregate_mode = VOIDmode;
3719 int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
3721 if (ag_count > 0 && ag_count <= 4)
3724 *base_mode = aggregate_mode;
3732 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3733 enum machine_mode mode, const_tree type)
3735 int count ATTRIBUTE_UNUSED;
3736 int ag_mode ATTRIBUTE_UNUSED;
3738 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3739 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3740 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3742 return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3746 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3749 if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
3750 || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
3751 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3753 return aapcs_vfp_is_call_or_return_candidate (mode, type,
3754 &pcum->aapcs_vfp_rmode,
3755 &pcum->aapcs_vfp_rcount);
3759 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3760 const_tree type ATTRIBUTE_UNUSED)
3762 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3763 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3766 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3767 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3769 pcum->aapcs_vfp_reg_alloc = mask << regno;
3770 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3773 int rcount = pcum->aapcs_vfp_rcount;
3775 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3779 /* Avoid using unsupported vector modes. */
3780 if (rmode == V2SImode)
3782 else if (rmode == V4SImode)
3789 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3790 for (i = 0; i < rcount; i++)
3792 rtx tmp = gen_rtx_REG (rmode,
3793 FIRST_VFP_REGNUM + regno + i * rshift);
3794 tmp = gen_rtx_EXPR_LIST
3796 GEN_INT (i * GET_MODE_SIZE (rmode)));
3797 XVECEXP (par, 0, i) = tmp;
3800 pcum->aapcs_reg = par;
3803 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3810 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3811 enum machine_mode mode,
3812 const_tree type ATTRIBUTE_UNUSED)
3814 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3815 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3816 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3818 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3826 aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3830 if (ag_mode == V2SImode)
3832 else if (ag_mode == V4SImode)
3838 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3839 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3840 for (i = 0; i < count; i++)
3842 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3843 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3844 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3845 XVECEXP (par, 0, i) = tmp;
3851 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3855 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3856 enum machine_mode mode ATTRIBUTE_UNUSED,
3857 const_tree type ATTRIBUTE_UNUSED)
3859 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3860 pcum->aapcs_vfp_reg_alloc = 0;
3864 #define AAPCS_CP(X) \
3866 aapcs_ ## X ## _cum_init, \
3867 aapcs_ ## X ## _is_call_candidate, \
3868 aapcs_ ## X ## _allocate, \
3869 aapcs_ ## X ## _is_return_candidate, \
3870 aapcs_ ## X ## _allocate_return_reg, \
3871 aapcs_ ## X ## _advance \
3874 /* Table of co-processors that can be used to pass arguments in
3875 registers. Idealy no arugment should be a candidate for more than
3876 one co-processor table entry, but the table is processed in order
3877 and stops after the first match. If that entry then fails to put
3878 the argument into a co-processor register, the argument will go on
3882 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
3883 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
3885 /* Return true if an argument of mode MODE (or type TYPE if MODE is
3886 BLKmode) is a candidate for this co-processor's registers; this
3887 function should ignore any position-dependent state in
3888 CUMULATIVE_ARGS and only use call-type dependent information. */
3889 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3891 /* Return true if the argument does get a co-processor register; it
3892 should set aapcs_reg to an RTX of the register allocated as is
3893 required for a return from FUNCTION_ARG. */
3894 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3896 /* Return true if a result of mode MODE (or type TYPE if MODE is
3897 BLKmode) is can be returned in this co-processor's registers. */
3898 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
3900 /* Allocate and return an RTX element to hold the return type of a
3901 call, this routine must not fail and will only be called if
3902 is_return_candidate returned true with the same parameters. */
3903 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
3905 /* Finish processing this argument and prepare to start processing
3907 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3908 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
3916 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3921 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
3922 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
3929 aapcs_select_return_coproc (const_tree type, const_tree fntype)
3931 /* We aren't passed a decl, so we can't check that a call is local.
3932 However, it isn't clear that that would be a win anyway, since it
3933 might limit some tail-calling opportunities. */
3934 enum arm_pcs pcs_variant;
3938 const_tree fndecl = NULL_TREE;
3940 if (TREE_CODE (fntype) == FUNCTION_DECL)
3943 fntype = TREE_TYPE (fntype);
3946 pcs_variant = arm_get_pcs_model (fntype, fndecl);
3949 pcs_variant = arm_pcs_default;
3951 if (pcs_variant != ARM_PCS_AAPCS)
3955 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
3956 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
3965 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
3968 /* We aren't passed a decl, so we can't check that a call is local.
3969 However, it isn't clear that that would be a win anyway, since it
3970 might limit some tail-calling opportunities. */
3971 enum arm_pcs pcs_variant;
3972 int unsignedp ATTRIBUTE_UNUSED;
3976 const_tree fndecl = NULL_TREE;
3978 if (TREE_CODE (fntype) == FUNCTION_DECL)
3981 fntype = TREE_TYPE (fntype);
3984 pcs_variant = arm_get_pcs_model (fntype, fndecl);
3987 pcs_variant = arm_pcs_default;
3989 /* Promote integer types. */
3990 if (type && INTEGRAL_TYPE_P (type))
3991 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
3993 if (pcs_variant != ARM_PCS_AAPCS)
3997 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
3998 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4000 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4004 /* Promotes small structs returned in a register to full-word size
4005 for big-endian AAPCS. */
4006 if (type && arm_return_in_msb (type))
4008 HOST_WIDE_INT size = int_size_in_bytes (type);
4009 if (size % UNITS_PER_WORD != 0)
4011 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4012 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4016 return gen_rtx_REG (mode, R0_REGNUM);
4020 aapcs_libcall_value (enum machine_mode mode)
4022 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4025 /* Lay out a function argument using the AAPCS rules. The rule
4026 numbers referred to here are those in the AAPCS. */
4028 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4029 tree type, int named)
4034 /* We only need to do this once per argument. */
4035 if (pcum->aapcs_arg_processed)
4038 pcum->aapcs_arg_processed = true;
4040 /* Special case: if named is false then we are handling an incoming
4041 anonymous argument which is on the stack. */
4045 /* Is this a potential co-processor register candidate? */
4046 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4048 int slot = aapcs_select_call_coproc (pcum, mode, type);
4049 pcum->aapcs_cprc_slot = slot;
4051 /* We don't have to apply any of the rules from part B of the
4052 preparation phase, these are handled elsewhere in the
4057 /* A Co-processor register candidate goes either in its own
4058 class of registers or on the stack. */
4059 if (!pcum->aapcs_cprc_failed[slot])
4061 /* C1.cp - Try to allocate the argument to co-processor
4063 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4066 /* C2.cp - Put the argument on the stack and note that we
4067 can't assign any more candidates in this slot. We also
4068 need to note that we have allocated stack space, so that
4069 we won't later try to split a non-cprc candidate between
4070 core registers and the stack. */
4071 pcum->aapcs_cprc_failed[slot] = true;
4072 pcum->can_split = false;
4075 /* We didn't get a register, so this argument goes on the
4077 gcc_assert (pcum->can_split == false);
4082 /* C3 - For double-word aligned arguments, round the NCRN up to the
4083 next even number. */
4084 ncrn = pcum->aapcs_ncrn;
4085 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4088 nregs = ARM_NUM_REGS2(mode, type);
4090 /* Sigh, this test should really assert that nregs > 0, but a GCC
4091 extension allows empty structs and then gives them empty size; it
4092 then allows such a structure to be passed by value. For some of
4093 the code below we have to pretend that such an argument has
4094 non-zero size so that we 'locate' it correctly either in
4095 registers or on the stack. */
4096 gcc_assert (nregs >= 0);
4098 nregs2 = nregs ? nregs : 1;
4100 /* C4 - Argument fits entirely in core registers. */
4101 if (ncrn + nregs2 <= NUM_ARG_REGS)
4103 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4104 pcum->aapcs_next_ncrn = ncrn + nregs;
4108 /* C5 - Some core registers left and there are no arguments already
4109 on the stack: split this argument between the remaining core
4110 registers and the stack. */
4111 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4113 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4114 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4115 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4119 /* C6 - NCRN is set to 4. */
4120 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4122 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4126 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4127 for a call to a function whose data type is FNTYPE.
4128 For a library call, FNTYPE is NULL. */
4130 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4132 tree fndecl ATTRIBUTE_UNUSED)
4134 /* Long call handling. */
4136 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4138 pcum->pcs_variant = arm_pcs_default;
4140 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4142 if (arm_libcall_uses_aapcs_base (libname))
4143 pcum->pcs_variant = ARM_PCS_AAPCS;
4145 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4146 pcum->aapcs_reg = NULL_RTX;
4147 pcum->aapcs_partial = 0;
4148 pcum->aapcs_arg_processed = false;
4149 pcum->aapcs_cprc_slot = -1;
4150 pcum->can_split = true;
4152 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4156 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4158 pcum->aapcs_cprc_failed[i] = false;
4159 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4167 /* On the ARM, the offset starts at 0. */
4169 pcum->iwmmxt_nregs = 0;
4170 pcum->can_split = true;
4172 /* Varargs vectors are treated the same as long long.
4173 named_count avoids having to change the way arm handles 'named' */
4174 pcum->named_count = 0;
4177 if (TARGET_REALLY_IWMMXT && fntype)
4181 for (fn_arg = TYPE_ARG_TYPES (fntype);
4183 fn_arg = TREE_CHAIN (fn_arg))
4184 pcum->named_count += 1;
4186 if (! pcum->named_count)
4187 pcum->named_count = INT_MAX;
4192 /* Return true if mode/type need doubleword alignment. */
4194 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4196 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4197 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4201 /* Determine where to put an argument to a function.
4202 Value is zero to push the argument on the stack,
4203 or a hard register in which to store the argument.
4205 MODE is the argument's machine mode.
4206 TYPE is the data type of the argument (as a tree).
4207 This is null for libcalls where that information may
4209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4210 the preceding args and about the function being called.
4211 NAMED is nonzero if this argument is a named parameter
4212 (otherwise it is an extra parameter matching an ellipsis). */
4215 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4216 tree type, int named)
4220 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4221 a call insn (op3 of a call_value insn). */
4222 if (mode == VOIDmode)
4225 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4227 aapcs_layout_arg (pcum, mode, type, named);
4228 return pcum->aapcs_reg;
4231 /* Varargs vectors are treated the same as long long.
4232 named_count avoids having to change the way arm handles 'named' */
4233 if (TARGET_IWMMXT_ABI
4234 && arm_vector_mode_supported_p (mode)
4235 && pcum->named_count > pcum->nargs + 1)
4237 if (pcum->iwmmxt_nregs <= 9)
4238 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4241 pcum->can_split = false;
4246 /* Put doubleword aligned quantities in even register pairs. */
4248 && ARM_DOUBLEWORD_ALIGN
4249 && arm_needs_doubleword_align (mode, type))
4252 if (mode == VOIDmode)
4253 /* Pick an arbitrary value for operand 2 of the call insn. */
4256 /* Only allow splitting an arg between regs and memory if all preceding
4257 args were allocated to regs. For args passed by reference we only count
4258 the reference pointer. */
4259 if (pcum->can_split)
4262 nregs = ARM_NUM_REGS2 (mode, type);
4264 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4267 return gen_rtx_REG (mode, pcum->nregs);
4271 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4272 tree type, bool named)
4274 int nregs = pcum->nregs;
4276 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4278 aapcs_layout_arg (pcum, mode, type, named);
4279 return pcum->aapcs_partial;
4282 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4285 if (NUM_ARG_REGS > nregs
4286 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4288 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4294 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4295 tree type, bool named)
4297 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4299 aapcs_layout_arg (pcum, mode, type, named);
4301 if (pcum->aapcs_cprc_slot >= 0)
4303 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4305 pcum->aapcs_cprc_slot = -1;
4308 /* Generic stuff. */
4309 pcum->aapcs_arg_processed = false;
4310 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4311 pcum->aapcs_reg = NULL_RTX;
4312 pcum->aapcs_partial = 0;
4317 if (arm_vector_mode_supported_p (mode)
4318 && pcum->named_count > pcum->nargs
4319 && TARGET_IWMMXT_ABI)
4320 pcum->iwmmxt_nregs += 1;
4322 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4326 /* Variable sized types are passed by reference. This is a GCC
4327 extension to the ARM ABI. */
4330 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4331 enum machine_mode mode ATTRIBUTE_UNUSED,
4332 const_tree type, bool named ATTRIBUTE_UNUSED)
4334 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4337 /* Encode the current state of the #pragma [no_]long_calls. */
4340 OFF, /* No #pragma [no_]long_calls is in effect. */
4341 LONG, /* #pragma long_calls is in effect. */
4342 SHORT /* #pragma no_long_calls is in effect. */
4345 static arm_pragma_enum arm_pragma_long_calls = OFF;
4348 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4350 arm_pragma_long_calls = LONG;
4354 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4356 arm_pragma_long_calls = SHORT;
4360 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4362 arm_pragma_long_calls = OFF;
4365 /* Handle an attribute requiring a FUNCTION_DECL;
4366 arguments as in struct attribute_spec.handler. */
4368 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4369 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4371 if (TREE_CODE (*node) != FUNCTION_DECL)
4373 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4375 *no_add_attrs = true;
4381 /* Handle an "interrupt" or "isr" attribute;
4382 arguments as in struct attribute_spec.handler. */
4384 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4389 if (TREE_CODE (*node) != FUNCTION_DECL)
4391 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4393 *no_add_attrs = true;
4395 /* FIXME: the argument if any is checked for type attributes;
4396 should it be checked for decl ones? */
4400 if (TREE_CODE (*node) == FUNCTION_TYPE
4401 || TREE_CODE (*node) == METHOD_TYPE)
4403 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4405 warning (OPT_Wattributes, "%qE attribute ignored",
4407 *no_add_attrs = true;
4410 else if (TREE_CODE (*node) == POINTER_TYPE
4411 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4412 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4413 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4415 *node = build_variant_type_copy (*node);
4416 TREE_TYPE (*node) = build_type_attribute_variant
4418 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4419 *no_add_attrs = true;
4423 /* Possibly pass this attribute on from the type to a decl. */
4424 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4425 | (int) ATTR_FLAG_FUNCTION_NEXT
4426 | (int) ATTR_FLAG_ARRAY_NEXT))
4428 *no_add_attrs = true;
4429 return tree_cons (name, args, NULL_TREE);
4433 warning (OPT_Wattributes, "%qE attribute ignored",
4442 /* Handle a "pcs" attribute; arguments as in struct
4443 attribute_spec.handler. */
4445 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4446 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4448 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4450 warning (OPT_Wattributes, "%qE attribute ignored", name);
4451 *no_add_attrs = true;
4456 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4457 /* Handle the "notshared" attribute. This attribute is another way of
4458 requesting hidden visibility. ARM's compiler supports
4459 "__declspec(notshared)"; we support the same thing via an
4463 arm_handle_notshared_attribute (tree *node,
4464 tree name ATTRIBUTE_UNUSED,
4465 tree args ATTRIBUTE_UNUSED,
4466 int flags ATTRIBUTE_UNUSED,
4469 tree decl = TYPE_NAME (*node);
4473 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4474 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4475 *no_add_attrs = false;
4481 /* Return 0 if the attributes for two types are incompatible, 1 if they
4482 are compatible, and 2 if they are nearly compatible (which causes a
4483 warning to be generated). */
4485 arm_comp_type_attributes (const_tree type1, const_tree type2)
4489 /* Check for mismatch of non-default calling convention. */
4490 if (TREE_CODE (type1) != FUNCTION_TYPE)
4493 /* Check for mismatched call attributes. */
4494 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4495 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4496 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4497 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4499 /* Only bother to check if an attribute is defined. */
4500 if (l1 | l2 | s1 | s2)
4502 /* If one type has an attribute, the other must have the same attribute. */
4503 if ((l1 != l2) || (s1 != s2))
4506 /* Disallow mixed attributes. */
4507 if ((l1 & s2) || (l2 & s1))
4511 /* Check for mismatched ISR attribute. */
4512 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4514 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4515 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4517 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4524 /* Assigns default attributes to newly defined type. This is used to
4525 set short_call/long_call attributes for function types of
4526 functions defined inside corresponding #pragma scopes. */
4528 arm_set_default_type_attributes (tree type)
4530 /* Add __attribute__ ((long_call)) to all functions, when
4531 inside #pragma long_calls or __attribute__ ((short_call)),
4532 when inside #pragma no_long_calls. */
4533 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4535 tree type_attr_list, attr_name;
4536 type_attr_list = TYPE_ATTRIBUTES (type);
4538 if (arm_pragma_long_calls == LONG)
4539 attr_name = get_identifier ("long_call");
4540 else if (arm_pragma_long_calls == SHORT)
4541 attr_name = get_identifier ("short_call");
4545 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4546 TYPE_ATTRIBUTES (type) = type_attr_list;
4550 /* Return true if DECL is known to be linked into section SECTION. */
4553 arm_function_in_section_p (tree decl, section *section)
4555 /* We can only be certain about functions defined in the same
4556 compilation unit. */
4557 if (!TREE_STATIC (decl))
4560 /* Make sure that SYMBOL always binds to the definition in this
4561 compilation unit. */
4562 if (!targetm.binds_local_p (decl))
4565 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4566 if (!DECL_SECTION_NAME (decl))
4568 /* Make sure that we will not create a unique section for DECL. */
4569 if (flag_function_sections || DECL_ONE_ONLY (decl))
4573 return function_section (decl) == section;
4576 /* Return nonzero if a 32-bit "long_call" should be generated for
4577 a call from the current function to DECL. We generate a long_call
4580 a. has an __attribute__((long call))
4581 or b. is within the scope of a #pragma long_calls
4582 or c. the -mlong-calls command line switch has been specified
4584 However we do not generate a long call if the function:
4586 d. has an __attribute__ ((short_call))
4587 or e. is inside the scope of a #pragma no_long_calls
4588 or f. is defined in the same section as the current function. */
4591 arm_is_long_call_p (tree decl)
4596 return TARGET_LONG_CALLS;
4598 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4599 if (lookup_attribute ("short_call", attrs))
4602 /* For "f", be conservative, and only cater for cases in which the
4603 whole of the current function is placed in the same section. */
4604 if (!flag_reorder_blocks_and_partition
4605 && TREE_CODE (decl) == FUNCTION_DECL
4606 && arm_function_in_section_p (decl, current_function_section ()))
4609 if (lookup_attribute ("long_call", attrs))
4612 return TARGET_LONG_CALLS;
4615 /* Return nonzero if it is ok to make a tail-call to DECL. */
4617 arm_function_ok_for_sibcall (tree decl, tree exp)
4619 unsigned long func_type;
4621 if (cfun->machine->sibcall_blocked)
4624 /* Never tailcall something for which we have no decl, or if we
4625 are in Thumb mode. */
4626 if (decl == NULL || TARGET_THUMB)
4629 /* The PIC register is live on entry to VxWorks PLT entries, so we
4630 must make the call before restoring the PIC register. */
4631 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4634 /* Cannot tail-call to long calls, since these are out of range of
4635 a branch instruction. */
4636 if (arm_is_long_call_p (decl))
4639 /* If we are interworking and the function is not declared static
4640 then we can't tail-call it unless we know that it exists in this
4641 compilation unit (since it might be a Thumb routine). */
4642 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4645 func_type = arm_current_func_type ();
4646 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4647 if (IS_INTERRUPT (func_type))
4650 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4652 /* Check that the return value locations are the same. For
4653 example that we aren't returning a value from the sibling in
4654 a VFP register but then need to transfer it to a core
4658 a = arm_function_value (TREE_TYPE (exp), decl, false);
4659 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4661 if (!rtx_equal_p (a, b))
4665 /* Never tailcall if function may be called with a misaligned SP. */
4666 if (IS_STACKALIGN (func_type))
4669 /* Everything else is ok. */
4674 /* Addressing mode support functions. */
4676 /* Return nonzero if X is a legitimate immediate operand when compiling
4677 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4679 legitimate_pic_operand_p (rtx x)
4681 if (GET_CODE (x) == SYMBOL_REF
4682 || (GET_CODE (x) == CONST
4683 && GET_CODE (XEXP (x, 0)) == PLUS
4684 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4690 /* Record that the current function needs a PIC register. Initialize
4691 cfun->machine->pic_reg if we have not already done so. */
4694 require_pic_register (void)
4696 /* A lot of the logic here is made obscure by the fact that this
4697 routine gets called as part of the rtx cost estimation process.
4698 We don't want those calls to affect any assumptions about the real
4699 function; and further, we can't call entry_of_function() until we
4700 start the real expansion process. */
4701 if (!crtl->uses_pic_offset_table)
4703 gcc_assert (can_create_pseudo_p ());
4704 if (arm_pic_register != INVALID_REGNUM)
4706 if (!cfun->machine->pic_reg)
4707 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4709 /* Play games to avoid marking the function as needing pic
4710 if we are being called as part of the cost-estimation
4712 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4713 crtl->uses_pic_offset_table = 1;
4719 if (!cfun->machine->pic_reg)
4720 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4722 /* Play games to avoid marking the function as needing pic
4723 if we are being called as part of the cost-estimation
4725 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4727 crtl->uses_pic_offset_table = 1;
4730 arm_load_pic_register (0UL);
4734 /* We can be called during expansion of PHI nodes, where
4735 we can't yet emit instructions directly in the final
4736 insn stream. Queue the insns on the entry edge, they will
4737 be committed after everything else is expanded. */
4738 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4745 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4747 if (GET_CODE (orig) == SYMBOL_REF
4748 || GET_CODE (orig) == LABEL_REF)
4750 rtx pic_ref, address;
4754 /* If this function doesn't have a pic register, create one now. */
4755 require_pic_register ();
4759 gcc_assert (can_create_pseudo_p ());
4760 reg = gen_reg_rtx (Pmode);
4766 address = gen_reg_rtx (Pmode);
4771 emit_insn (gen_pic_load_addr_arm (address, orig));
4772 else if (TARGET_THUMB2)
4773 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
4774 else /* TARGET_THUMB1 */
4775 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4777 /* VxWorks does not impose a fixed gap between segments; the run-time
4778 gap can be different from the object-file gap. We therefore can't
4779 use GOTOFF unless we are absolutely sure that the symbol is in the
4780 same segment as the GOT. Unfortunately, the flexibility of linker
4781 scripts means that we can't be sure of that in general, so assume
4782 that GOTOFF is never valid on VxWorks. */
4783 if ((GET_CODE (orig) == LABEL_REF
4784 || (GET_CODE (orig) == SYMBOL_REF &&
4785 SYMBOL_REF_LOCAL_P (orig)))
4787 && !TARGET_VXWORKS_RTP)
4788 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4791 pic_ref = gen_const_mem (Pmode,
4792 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4796 insn = emit_move_insn (reg, pic_ref);
4798 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4800 set_unique_reg_note (insn, REG_EQUAL, orig);
4804 else if (GET_CODE (orig) == CONST)
4808 if (GET_CODE (XEXP (orig, 0)) == PLUS
4809 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4812 /* Handle the case where we have: const (UNSPEC_TLS). */
4813 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4814 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4817 /* Handle the case where we have:
4818 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4820 if (GET_CODE (XEXP (orig, 0)) == PLUS
4821 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4822 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4824 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4830 gcc_assert (can_create_pseudo_p ());
4831 reg = gen_reg_rtx (Pmode);
4834 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4836 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4837 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4838 base == reg ? 0 : reg);
4840 if (GET_CODE (offset) == CONST_INT)
4842 /* The base register doesn't really matter, we only want to
4843 test the index for the appropriate mode. */
4844 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4846 gcc_assert (can_create_pseudo_p ());
4847 offset = force_reg (Pmode, offset);
4850 if (GET_CODE (offset) == CONST_INT)
4851 return plus_constant (base, INTVAL (offset));
4854 if (GET_MODE_SIZE (mode) > 4
4855 && (GET_MODE_CLASS (mode) == MODE_INT
4856 || TARGET_SOFT_FLOAT))
4858 emit_insn (gen_addsi3 (reg, base, offset));
4862 return gen_rtx_PLUS (Pmode, base, offset);
4869 /* Find a spare register to use during the prolog of a function. */
4872 thumb_find_work_register (unsigned long pushed_regs_mask)
4876 /* Check the argument registers first as these are call-used. The
4877 register allocation order means that sometimes r3 might be used
4878 but earlier argument registers might not, so check them all. */
4879 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
4880 if (!df_regs_ever_live_p (reg))
4883 /* Before going on to check the call-saved registers we can try a couple
4884 more ways of deducing that r3 is available. The first is when we are
4885 pushing anonymous arguments onto the stack and we have less than 4
4886 registers worth of fixed arguments(*). In this case r3 will be part of
4887 the variable argument list and so we can be sure that it will be
4888 pushed right at the start of the function. Hence it will be available
4889 for the rest of the prologue.
4890 (*): ie crtl->args.pretend_args_size is greater than 0. */
4891 if (cfun->machine->uses_anonymous_args
4892 && crtl->args.pretend_args_size > 0)
4893 return LAST_ARG_REGNUM;
4895 /* The other case is when we have fixed arguments but less than 4 registers
4896 worth. In this case r3 might be used in the body of the function, but
4897 it is not being used to convey an argument into the function. In theory
4898 we could just check crtl->args.size to see how many bytes are
4899 being passed in argument registers, but it seems that it is unreliable.
4900 Sometimes it will have the value 0 when in fact arguments are being
4901 passed. (See testcase execute/20021111-1.c for an example). So we also
4902 check the args_info.nregs field as well. The problem with this field is
4903 that it makes no allowances for arguments that are passed to the
4904 function but which are not used. Hence we could miss an opportunity
4905 when a function has an unused argument in r3. But it is better to be
4906 safe than to be sorry. */
4907 if (! cfun->machine->uses_anonymous_args
4908 && crtl->args.size >= 0
4909 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
4910 && crtl->args.info.nregs < 4)
4911 return LAST_ARG_REGNUM;
4913 /* Otherwise look for a call-saved register that is going to be pushed. */
4914 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
4915 if (pushed_regs_mask & (1 << reg))
4920 /* Thumb-2 can use high regs. */
4921 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
4922 if (pushed_regs_mask & (1 << reg))
4925 /* Something went wrong - thumb_compute_save_reg_mask()
4926 should have arranged for a suitable register to be pushed. */
4930 static GTY(()) int pic_labelno;
4932 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
4936 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
4938 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
4940 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
4943 gcc_assert (flag_pic);
4945 pic_reg = cfun->machine->pic_reg;
4946 if (TARGET_VXWORKS_RTP)
4948 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
4949 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
4950 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
4952 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
4954 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
4955 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
4959 /* We use an UNSPEC rather than a LABEL_REF because this label
4960 never appears in the code stream. */
4962 labelno = GEN_INT (pic_labelno++);
4963 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4964 l1 = gen_rtx_CONST (VOIDmode, l1);
4966 /* On the ARM the PC register contains 'dot + 8' at the time of the
4967 addition, on the Thumb it is 'dot + 4'. */
4968 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
4969 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
4971 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
4975 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
4976 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
4978 else if (TARGET_THUMB2)
4980 /* Thumb-2 only allows very limited access to the PC. Calculate the
4981 address in a temporary register. */
4982 if (arm_pic_register != INVALID_REGNUM)
4984 pic_tmp = gen_rtx_REG (SImode,
4985 thumb_find_work_register (saved_regs));
4989 gcc_assert (can_create_pseudo_p ());
4990 pic_tmp = gen_reg_rtx (Pmode);
4993 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
4994 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
4995 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
4997 else /* TARGET_THUMB1 */
4999 if (arm_pic_register != INVALID_REGNUM
5000 && REGNO (pic_reg) > LAST_LO_REGNUM)
5002 /* We will have pushed the pic register, so we should always be
5003 able to find a work register. */
5004 pic_tmp = gen_rtx_REG (SImode,
5005 thumb_find_work_register (saved_regs));
5006 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5007 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5010 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5011 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5015 /* Need to emit this whether or not we obey regdecls,
5016 since setjmp/longjmp can cause life info to screw up. */
5021 /* Return nonzero if X is valid as an ARM state addressing register. */
5023 arm_address_register_rtx_p (rtx x, int strict_p)
5027 if (GET_CODE (x) != REG)
5033 return ARM_REGNO_OK_FOR_BASE_P (regno);
5035 return (regno <= LAST_ARM_REGNUM
5036 || regno >= FIRST_PSEUDO_REGISTER
5037 || regno == FRAME_POINTER_REGNUM
5038 || regno == ARG_POINTER_REGNUM);
5041 /* Return TRUE if this rtx is the difference of a symbol and a label,
5042 and will reduce to a PC-relative relocation in the object file.
5043 Expressions like this can be left alone when generating PIC, rather
5044 than forced through the GOT. */
5046 pcrel_constant_p (rtx x)
5048 if (GET_CODE (x) == MINUS)
5049 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5054 /* Return nonzero if X is a valid ARM state address operand. */
5056 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5060 enum rtx_code code = GET_CODE (x);
5062 if (arm_address_register_rtx_p (x, strict_p))
5065 use_ldrd = (TARGET_LDRD
5067 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5069 if (code == POST_INC || code == PRE_DEC
5070 || ((code == PRE_INC || code == POST_DEC)
5071 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5072 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5074 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5075 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5076 && GET_CODE (XEXP (x, 1)) == PLUS
5077 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5079 rtx addend = XEXP (XEXP (x, 1), 1);
5081 /* Don't allow ldrd post increment by register because it's hard
5082 to fixup invalid register choices. */
5084 && GET_CODE (x) == POST_MODIFY
5085 && GET_CODE (addend) == REG)
5088 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5089 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5092 /* After reload constants split into minipools will have addresses
5093 from a LABEL_REF. */
5094 else if (reload_completed
5095 && (code == LABEL_REF
5097 && GET_CODE (XEXP (x, 0)) == PLUS
5098 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5099 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5102 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5105 else if (code == PLUS)
5107 rtx xop0 = XEXP (x, 0);
5108 rtx xop1 = XEXP (x, 1);
5110 return ((arm_address_register_rtx_p (xop0, strict_p)
5111 && GET_CODE(xop1) == CONST_INT
5112 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5113 || (arm_address_register_rtx_p (xop1, strict_p)
5114 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5118 /* Reload currently can't handle MINUS, so disable this for now */
5119 else if (GET_CODE (x) == MINUS)
5121 rtx xop0 = XEXP (x, 0);
5122 rtx xop1 = XEXP (x, 1);
5124 return (arm_address_register_rtx_p (xop0, strict_p)
5125 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5129 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5130 && code == SYMBOL_REF
5131 && CONSTANT_POOL_ADDRESS_P (x)
5133 && symbol_mentioned_p (get_pool_constant (x))
5134 && ! pcrel_constant_p (get_pool_constant (x))))
5140 /* Return nonzero if X is a valid Thumb-2 address operand. */
5142 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5145 enum rtx_code code = GET_CODE (x);
5147 if (arm_address_register_rtx_p (x, strict_p))
5150 use_ldrd = (TARGET_LDRD
5152 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5154 if (code == POST_INC || code == PRE_DEC
5155 || ((code == PRE_INC || code == POST_DEC)
5156 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5157 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5159 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5160 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5161 && GET_CODE (XEXP (x, 1)) == PLUS
5162 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5164 /* Thumb-2 only has autoincrement by constant. */
5165 rtx addend = XEXP (XEXP (x, 1), 1);
5166 HOST_WIDE_INT offset;
5168 if (GET_CODE (addend) != CONST_INT)
5171 offset = INTVAL(addend);
5172 if (GET_MODE_SIZE (mode) <= 4)
5173 return (offset > -256 && offset < 256);
5175 return (use_ldrd && offset > -1024 && offset < 1024
5176 && (offset & 3) == 0);
5179 /* After reload constants split into minipools will have addresses
5180 from a LABEL_REF. */
5181 else if (reload_completed
5182 && (code == LABEL_REF
5184 && GET_CODE (XEXP (x, 0)) == PLUS
5185 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5186 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5189 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5192 else if (code == PLUS)
5194 rtx xop0 = XEXP (x, 0);
5195 rtx xop1 = XEXP (x, 1);
5197 return ((arm_address_register_rtx_p (xop0, strict_p)
5198 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5199 || (arm_address_register_rtx_p (xop1, strict_p)
5200 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5203 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5204 && code == SYMBOL_REF
5205 && CONSTANT_POOL_ADDRESS_P (x)
5207 && symbol_mentioned_p (get_pool_constant (x))
5208 && ! pcrel_constant_p (get_pool_constant (x))))
5214 /* Return nonzero if INDEX is valid for an address index operand in
5217 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5220 HOST_WIDE_INT range;
5221 enum rtx_code code = GET_CODE (index);
5223 /* Standard coprocessor addressing modes. */
5224 if (TARGET_HARD_FLOAT
5225 && (TARGET_FPA || TARGET_MAVERICK)
5226 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5227 || (TARGET_MAVERICK && mode == DImode)))
5228 return (code == CONST_INT && INTVAL (index) < 1024
5229 && INTVAL (index) > -1024
5230 && (INTVAL (index) & 3) == 0);
5233 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5234 return (code == CONST_INT
5235 && INTVAL (index) < 1016
5236 && INTVAL (index) > -1024
5237 && (INTVAL (index) & 3) == 0);
5239 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5240 return (code == CONST_INT
5241 && INTVAL (index) < 1024
5242 && INTVAL (index) > -1024
5243 && (INTVAL (index) & 3) == 0);
5245 if (arm_address_register_rtx_p (index, strict_p)
5246 && (GET_MODE_SIZE (mode) <= 4))
5249 if (mode == DImode || mode == DFmode)
5251 if (code == CONST_INT)
5253 HOST_WIDE_INT val = INTVAL (index);
5256 return val > -256 && val < 256;
5258 return val > -4096 && val < 4092;
5261 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5264 if (GET_MODE_SIZE (mode) <= 4
5268 || (mode == QImode && outer == SIGN_EXTEND))))
5272 rtx xiop0 = XEXP (index, 0);
5273 rtx xiop1 = XEXP (index, 1);
5275 return ((arm_address_register_rtx_p (xiop0, strict_p)
5276 && power_of_two_operand (xiop1, SImode))
5277 || (arm_address_register_rtx_p (xiop1, strict_p)
5278 && power_of_two_operand (xiop0, SImode)));
5280 else if (code == LSHIFTRT || code == ASHIFTRT
5281 || code == ASHIFT || code == ROTATERT)
5283 rtx op = XEXP (index, 1);
5285 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5286 && GET_CODE (op) == CONST_INT
5288 && INTVAL (op) <= 31);
5292 /* For ARM v4 we may be doing a sign-extend operation during the
5298 || (outer == SIGN_EXTEND && mode == QImode))
5304 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5306 return (code == CONST_INT
5307 && INTVAL (index) < range
5308 && INTVAL (index) > -range);
5311 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5312 index operand. i.e. 1, 2, 4 or 8. */
5314 thumb2_index_mul_operand (rtx op)
5318 if (GET_CODE(op) != CONST_INT)
5322 return (val == 1 || val == 2 || val == 4 || val == 8);
5325 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5327 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5329 enum rtx_code code = GET_CODE (index);
5331 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5332 /* Standard coprocessor addressing modes. */
5333 if (TARGET_HARD_FLOAT
5334 && (TARGET_FPA || TARGET_MAVERICK)
5335 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5336 || (TARGET_MAVERICK && mode == DImode)))
5337 return (code == CONST_INT && INTVAL (index) < 1024
5338 && INTVAL (index) > -1024
5339 && (INTVAL (index) & 3) == 0);
5341 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5343 /* For DImode assume values will usually live in core regs
5344 and only allow LDRD addressing modes. */
5345 if (!TARGET_LDRD || mode != DImode)
5346 return (code == CONST_INT
5347 && INTVAL (index) < 1024
5348 && INTVAL (index) > -1024
5349 && (INTVAL (index) & 3) == 0);
5353 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5354 return (code == CONST_INT
5355 && INTVAL (index) < 1016
5356 && INTVAL (index) > -1024
5357 && (INTVAL (index) & 3) == 0);
5359 if (arm_address_register_rtx_p (index, strict_p)
5360 && (GET_MODE_SIZE (mode) <= 4))
5363 if (mode == DImode || mode == DFmode)
5365 if (code == CONST_INT)
5367 HOST_WIDE_INT val = INTVAL (index);
5368 /* ??? Can we assume ldrd for thumb2? */
5369 /* Thumb-2 ldrd only has reg+const addressing modes. */
5370 /* ldrd supports offsets of +-1020.
5371 However the ldr fallback does not. */
5372 return val > -256 && val < 256 && (val & 3) == 0;
5380 rtx xiop0 = XEXP (index, 0);
5381 rtx xiop1 = XEXP (index, 1);
5383 return ((arm_address_register_rtx_p (xiop0, strict_p)
5384 && thumb2_index_mul_operand (xiop1))
5385 || (arm_address_register_rtx_p (xiop1, strict_p)
5386 && thumb2_index_mul_operand (xiop0)));
5388 else if (code == ASHIFT)
5390 rtx op = XEXP (index, 1);
5392 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5393 && GET_CODE (op) == CONST_INT
5395 && INTVAL (op) <= 3);
5398 return (code == CONST_INT
5399 && INTVAL (index) < 4096
5400 && INTVAL (index) > -256);
5403 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5405 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5409 if (GET_CODE (x) != REG)
5415 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5417 return (regno <= LAST_LO_REGNUM
5418 || regno > LAST_VIRTUAL_REGISTER
5419 || regno == FRAME_POINTER_REGNUM
5420 || (GET_MODE_SIZE (mode) >= 4
5421 && (regno == STACK_POINTER_REGNUM
5422 || regno >= FIRST_PSEUDO_REGISTER
5423 || x == hard_frame_pointer_rtx
5424 || x == arg_pointer_rtx)));
5427 /* Return nonzero if x is a legitimate index register. This is the case
5428 for any base register that can access a QImode object. */
5430 thumb1_index_register_rtx_p (rtx x, int strict_p)
5432 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5435 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5437 The AP may be eliminated to either the SP or the FP, so we use the
5438 least common denominator, e.g. SImode, and offsets from 0 to 64.
5440 ??? Verify whether the above is the right approach.
5442 ??? Also, the FP may be eliminated to the SP, so perhaps that
5443 needs special handling also.
5445 ??? Look at how the mips16 port solves this problem. It probably uses
5446 better ways to solve some of these problems.
5448 Although it is not incorrect, we don't accept QImode and HImode
5449 addresses based on the frame pointer or arg pointer until the
5450 reload pass starts. This is so that eliminating such addresses
5451 into stack based ones won't produce impossible code. */
5453 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5455 /* ??? Not clear if this is right. Experiment. */
5456 if (GET_MODE_SIZE (mode) < 4
5457 && !(reload_in_progress || reload_completed)
5458 && (reg_mentioned_p (frame_pointer_rtx, x)
5459 || reg_mentioned_p (arg_pointer_rtx, x)
5460 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5461 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5462 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5463 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5466 /* Accept any base register. SP only in SImode or larger. */
5467 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5470 /* This is PC relative data before arm_reorg runs. */
5471 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5472 && GET_CODE (x) == SYMBOL_REF
5473 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5476 /* This is PC relative data after arm_reorg runs. */
5477 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5479 && (GET_CODE (x) == LABEL_REF
5480 || (GET_CODE (x) == CONST
5481 && GET_CODE (XEXP (x, 0)) == PLUS
5482 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5483 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5486 /* Post-inc indexing only supported for SImode and larger. */
5487 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5488 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5491 else if (GET_CODE (x) == PLUS)
5493 /* REG+REG address can be any two index registers. */
5494 /* We disallow FRAME+REG addressing since we know that FRAME
5495 will be replaced with STACK, and SP relative addressing only
5496 permits SP+OFFSET. */
5497 if (GET_MODE_SIZE (mode) <= 4
5498 && XEXP (x, 0) != frame_pointer_rtx
5499 && XEXP (x, 1) != frame_pointer_rtx
5500 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5501 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5504 /* REG+const has 5-7 bit offset for non-SP registers. */
5505 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5506 || XEXP (x, 0) == arg_pointer_rtx)
5507 && GET_CODE (XEXP (x, 1)) == CONST_INT
5508 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5511 /* REG+const has 10-bit offset for SP, but only SImode and
5512 larger is supported. */
5513 /* ??? Should probably check for DI/DFmode overflow here
5514 just like GO_IF_LEGITIMATE_OFFSET does. */
5515 else if (GET_CODE (XEXP (x, 0)) == REG
5516 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5517 && GET_MODE_SIZE (mode) >= 4
5518 && GET_CODE (XEXP (x, 1)) == CONST_INT
5519 && INTVAL (XEXP (x, 1)) >= 0
5520 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5521 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5524 else if (GET_CODE (XEXP (x, 0)) == REG
5525 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5526 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5527 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5528 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5529 && GET_MODE_SIZE (mode) >= 4
5530 && GET_CODE (XEXP (x, 1)) == CONST_INT
5531 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5535 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5536 && GET_MODE_SIZE (mode) == 4
5537 && GET_CODE (x) == SYMBOL_REF
5538 && CONSTANT_POOL_ADDRESS_P (x)
5540 && symbol_mentioned_p (get_pool_constant (x))
5541 && ! pcrel_constant_p (get_pool_constant (x))))
5547 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5548 instruction of mode MODE. */
5550 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5552 switch (GET_MODE_SIZE (mode))
5555 return val >= 0 && val < 32;
5558 return val >= 0 && val < 64 && (val & 1) == 0;
5562 && (val + GET_MODE_SIZE (mode)) <= 128
5568 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5571 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5572 else if (TARGET_THUMB2)
5573 return thumb2_legitimate_address_p (mode, x, strict_p);
5574 else /* if (TARGET_THUMB1) */
5575 return thumb1_legitimate_address_p (mode, x, strict_p);
5578 /* Build the SYMBOL_REF for __tls_get_addr. */
5580 static GTY(()) rtx tls_get_addr_libfunc;
5583 get_tls_get_addr (void)
5585 if (!tls_get_addr_libfunc)
5586 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5587 return tls_get_addr_libfunc;
5591 arm_load_tp (rtx target)
5594 target = gen_reg_rtx (SImode);
5598 /* Can return in any reg. */
5599 emit_insn (gen_load_tp_hard (target));
5603 /* Always returned in r0. Immediately copy the result into a pseudo,
5604 otherwise other uses of r0 (e.g. setting up function arguments) may
5605 clobber the value. */
5609 emit_insn (gen_load_tp_soft ());
5611 tmp = gen_rtx_REG (SImode, 0);
5612 emit_move_insn (target, tmp);
5618 load_tls_operand (rtx x, rtx reg)
5622 if (reg == NULL_RTX)
5623 reg = gen_reg_rtx (SImode);
5625 tmp = gen_rtx_CONST (SImode, x);
5627 emit_move_insn (reg, tmp);
5633 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5635 rtx insns, label, labelno, sum;
5639 labelno = GEN_INT (pic_labelno++);
5640 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5641 label = gen_rtx_CONST (VOIDmode, label);
5643 sum = gen_rtx_UNSPEC (Pmode,
5644 gen_rtvec (4, x, GEN_INT (reloc), label,
5645 GEN_INT (TARGET_ARM ? 8 : 4)),
5647 reg = load_tls_operand (sum, reg);
5650 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5651 else if (TARGET_THUMB2)
5654 /* Thumb-2 only allows very limited access to the PC. Calculate
5655 the address in a temporary register. */
5656 tmp = gen_reg_rtx (SImode);
5657 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5658 emit_insn (gen_addsi3(reg, reg, tmp));
5660 else /* TARGET_THUMB1 */
5661 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5663 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5664 Pmode, 1, reg, Pmode);
5666 insns = get_insns ();
5673 legitimize_tls_address (rtx x, rtx reg)
5675 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5676 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5680 case TLS_MODEL_GLOBAL_DYNAMIC:
5681 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5682 dest = gen_reg_rtx (Pmode);
5683 emit_libcall_block (insns, dest, ret, x);
5686 case TLS_MODEL_LOCAL_DYNAMIC:
5687 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5689 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5690 share the LDM result with other LD model accesses. */
5691 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5693 dest = gen_reg_rtx (Pmode);
5694 emit_libcall_block (insns, dest, ret, eqv);
5696 /* Load the addend. */
5697 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5699 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5700 return gen_rtx_PLUS (Pmode, dest, addend);
5702 case TLS_MODEL_INITIAL_EXEC:
5703 labelno = GEN_INT (pic_labelno++);
5704 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5705 label = gen_rtx_CONST (VOIDmode, label);
5706 sum = gen_rtx_UNSPEC (Pmode,
5707 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5708 GEN_INT (TARGET_ARM ? 8 : 4)),
5710 reg = load_tls_operand (sum, reg);
5713 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5714 else if (TARGET_THUMB2)
5717 /* Thumb-2 only allows very limited access to the PC. Calculate
5718 the address in a temporary register. */
5719 tmp = gen_reg_rtx (SImode);
5720 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5721 emit_insn (gen_addsi3(reg, reg, tmp));
5722 emit_move_insn (reg, gen_const_mem (SImode, reg));
5726 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5727 emit_move_insn (reg, gen_const_mem (SImode, reg));
5730 tp = arm_load_tp (NULL_RTX);
5732 return gen_rtx_PLUS (Pmode, tp, reg);
5734 case TLS_MODEL_LOCAL_EXEC:
5735 tp = arm_load_tp (NULL_RTX);
5737 reg = gen_rtx_UNSPEC (Pmode,
5738 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5740 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5742 return gen_rtx_PLUS (Pmode, tp, reg);
5749 /* Try machine-dependent ways of modifying an illegitimate address
5750 to be legitimate. If we find one, return the new, valid address. */
5752 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5756 /* TODO: legitimize_address for Thumb2. */
5759 return thumb_legitimize_address (x, orig_x, mode);
5762 if (arm_tls_symbol_p (x))
5763 return legitimize_tls_address (x, NULL_RTX);
5765 if (GET_CODE (x) == PLUS)
5767 rtx xop0 = XEXP (x, 0);
5768 rtx xop1 = XEXP (x, 1);
5770 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5771 xop0 = force_reg (SImode, xop0);
5773 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5774 xop1 = force_reg (SImode, xop1);
5776 if (ARM_BASE_REGISTER_RTX_P (xop0)
5777 && GET_CODE (xop1) == CONST_INT)
5779 HOST_WIDE_INT n, low_n;
5783 /* VFP addressing modes actually allow greater offsets, but for
5784 now we just stick with the lowest common denominator. */
5786 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5798 low_n = ((mode) == TImode ? 0
5799 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5803 base_reg = gen_reg_rtx (SImode);
5804 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5805 emit_move_insn (base_reg, val);
5806 x = plus_constant (base_reg, low_n);
5808 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5809 x = gen_rtx_PLUS (SImode, xop0, xop1);
5812 /* XXX We don't allow MINUS any more -- see comment in
5813 arm_legitimate_address_outer_p (). */
5814 else if (GET_CODE (x) == MINUS)
5816 rtx xop0 = XEXP (x, 0);
5817 rtx xop1 = XEXP (x, 1);
5819 if (CONSTANT_P (xop0))
5820 xop0 = force_reg (SImode, xop0);
5822 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5823 xop1 = force_reg (SImode, xop1);
5825 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5826 x = gen_rtx_MINUS (SImode, xop0, xop1);
5829 /* Make sure to take full advantage of the pre-indexed addressing mode
5830 with absolute addresses which often allows for the base register to
5831 be factorized for multiple adjacent memory references, and it might
5832 even allows for the mini pool to be avoided entirely. */
5833 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5836 HOST_WIDE_INT mask, base, index;
5839 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5840 use a 8-bit index. So let's use a 12-bit index for SImode only and
5841 hope that arm_gen_constant will enable ldrb to use more bits. */
5842 bits = (mode == SImode) ? 12 : 8;
5843 mask = (1 << bits) - 1;
5844 base = INTVAL (x) & ~mask;
5845 index = INTVAL (x) & mask;
5846 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5848 /* It'll most probably be more efficient to generate the base
5849 with more bits set and use a negative index instead. */
5853 base_reg = force_reg (SImode, GEN_INT (base));
5854 x = plus_constant (base_reg, index);
5859 /* We need to find and carefully transform any SYMBOL and LABEL
5860 references; so go back to the original address expression. */
5861 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5863 if (new_x != orig_x)
5871 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5872 to be legitimate. If we find one, return the new, valid address. */
5874 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5876 if (arm_tls_symbol_p (x))
5877 return legitimize_tls_address (x, NULL_RTX);
5879 if (GET_CODE (x) == PLUS
5880 && GET_CODE (XEXP (x, 1)) == CONST_INT
5881 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
5882 || INTVAL (XEXP (x, 1)) < 0))
5884 rtx xop0 = XEXP (x, 0);
5885 rtx xop1 = XEXP (x, 1);
5886 HOST_WIDE_INT offset = INTVAL (xop1);
5888 /* Try and fold the offset into a biasing of the base register and
5889 then offsetting that. Don't do this when optimizing for space
5890 since it can cause too many CSEs. */
5891 if (optimize_size && offset >= 0
5892 && offset < 256 + 31 * GET_MODE_SIZE (mode))
5894 HOST_WIDE_INT delta;
5897 delta = offset - (256 - GET_MODE_SIZE (mode));
5898 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
5899 delta = 31 * GET_MODE_SIZE (mode);
5901 delta = offset & (~31 * GET_MODE_SIZE (mode));
5903 xop0 = force_operand (plus_constant (xop0, offset - delta),
5905 x = plus_constant (xop0, delta);
5907 else if (offset < 0 && offset > -256)
5908 /* Small negative offsets are best done with a subtract before the
5909 dereference, forcing these into a register normally takes two
5911 x = force_operand (x, NULL_RTX);
5914 /* For the remaining cases, force the constant into a register. */
5915 xop1 = force_reg (SImode, xop1);
5916 x = gen_rtx_PLUS (SImode, xop0, xop1);
5919 else if (GET_CODE (x) == PLUS
5920 && s_register_operand (XEXP (x, 1), SImode)
5921 && !s_register_operand (XEXP (x, 0), SImode))
5923 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
5925 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
5930 /* We need to find and carefully transform any SYMBOL and LABEL
5931 references; so go back to the original address expression. */
5932 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5934 if (new_x != orig_x)
5942 thumb_legitimize_reload_address (rtx *x_p,
5943 enum machine_mode mode,
5944 int opnum, int type,
5945 int ind_levels ATTRIBUTE_UNUSED)
5949 if (GET_CODE (x) == PLUS
5950 && GET_MODE_SIZE (mode) < 4
5951 && REG_P (XEXP (x, 0))
5952 && XEXP (x, 0) == stack_pointer_rtx
5953 && GET_CODE (XEXP (x, 1)) == CONST_INT
5954 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5959 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
5960 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
5964 /* If both registers are hi-regs, then it's better to reload the
5965 entire expression rather than each register individually. That
5966 only requires one reload register rather than two. */
5967 if (GET_CODE (x) == PLUS
5968 && REG_P (XEXP (x, 0))
5969 && REG_P (XEXP (x, 1))
5970 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
5971 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
5976 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
5977 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
5984 /* Test for various thread-local symbols. */
5986 /* Return TRUE if X is a thread-local symbol. */
5989 arm_tls_symbol_p (rtx x)
5991 if (! TARGET_HAVE_TLS)
5994 if (GET_CODE (x) != SYMBOL_REF)
5997 return SYMBOL_REF_TLS_MODEL (x) != 0;
6000 /* Helper for arm_tls_referenced_p. */
6003 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6005 if (GET_CODE (*x) == SYMBOL_REF)
6006 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6008 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6009 TLS offsets, not real symbol references. */
6010 if (GET_CODE (*x) == UNSPEC
6011 && XINT (*x, 1) == UNSPEC_TLS)
6017 /* Return TRUE if X contains any TLS symbol references. */
6020 arm_tls_referenced_p (rtx x)
6022 if (! TARGET_HAVE_TLS)
6025 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6028 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6031 arm_cannot_force_const_mem (rtx x)
6035 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6037 split_const (x, &base, &offset);
6038 if (GET_CODE (base) == SYMBOL_REF
6039 && !offset_within_block_p (base, INTVAL (offset)))
6042 return arm_tls_referenced_p (x);
6045 #define REG_OR_SUBREG_REG(X) \
6046 (GET_CODE (X) == REG \
6047 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6049 #define REG_OR_SUBREG_RTX(X) \
6050 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6052 #ifndef COSTS_N_INSNS
6053 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6056 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6058 enum machine_mode mode = GET_MODE (x);
6071 return COSTS_N_INSNS (1);
6074 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6077 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6084 return COSTS_N_INSNS (2) + cycles;
6086 return COSTS_N_INSNS (1) + 16;
6089 return (COSTS_N_INSNS (1)
6090 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6091 + GET_CODE (SET_DEST (x)) == MEM));
6096 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6098 if (thumb_shiftable_const (INTVAL (x)))
6099 return COSTS_N_INSNS (2);
6100 return COSTS_N_INSNS (3);
6102 else if ((outer == PLUS || outer == COMPARE)
6103 && INTVAL (x) < 256 && INTVAL (x) > -256)
6105 else if (outer == AND
6106 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6107 return COSTS_N_INSNS (1);
6108 else if (outer == ASHIFT || outer == ASHIFTRT
6109 || outer == LSHIFTRT)
6111 return COSTS_N_INSNS (2);
6117 return COSTS_N_INSNS (3);
6135 /* XXX another guess. */
6136 /* Memory costs quite a lot for the first word, but subsequent words
6137 load at the equivalent of a single insn each. */
6138 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6139 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6144 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6149 /* XXX still guessing. */
6150 switch (GET_MODE (XEXP (x, 0)))
6153 return (1 + (mode == DImode ? 4 : 0)
6154 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6157 return (4 + (mode == DImode ? 4 : 0)
6158 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6161 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6173 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6175 enum machine_mode mode = GET_MODE (x);
6176 enum rtx_code subcode;
6178 enum rtx_code code = GET_CODE (x);
6185 /* Memory costs quite a lot for the first word, but subsequent words
6186 load at the equivalent of a single insn each. */
6187 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6194 if (TARGET_HARD_FLOAT && mode == SFmode)
6195 *total = COSTS_N_INSNS (2);
6196 else if (TARGET_HARD_FLOAT && mode == DFmode)
6197 *total = COSTS_N_INSNS (4);
6199 *total = COSTS_N_INSNS (20);
6203 if (GET_CODE (XEXP (x, 1)) == REG)
6204 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6205 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6206 *total = rtx_cost (XEXP (x, 1), code, speed);
6212 *total += COSTS_N_INSNS (4);
6217 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6218 *total += rtx_cost (XEXP (x, 0), code, speed);
6221 *total += COSTS_N_INSNS (3);
6225 *total += COSTS_N_INSNS (1);
6226 /* Increase the cost of complex shifts because they aren't any faster,
6227 and reduce dual issue opportunities. */
6228 if (arm_tune_cortex_a9
6229 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6237 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6239 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6240 *total = COSTS_N_INSNS (1);
6242 *total = COSTS_N_INSNS (20);
6245 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6246 /* Thumb2 does not have RSB, so all arguments must be
6247 registers (subtracting a constant is canonicalized as
6248 addition of the negated constant). */
6254 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6255 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6256 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6258 *total += rtx_cost (XEXP (x, 1), code, speed);
6262 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6263 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6265 *total += rtx_cost (XEXP (x, 0), code, speed);
6272 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6274 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6276 *total = COSTS_N_INSNS (1);
6277 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6278 && arm_const_double_rtx (XEXP (x, 0)))
6280 *total += rtx_cost (XEXP (x, 1), code, speed);
6284 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6285 && arm_const_double_rtx (XEXP (x, 1)))
6287 *total += rtx_cost (XEXP (x, 0), code, speed);
6293 *total = COSTS_N_INSNS (20);
6297 *total = COSTS_N_INSNS (1);
6298 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6299 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6301 *total += rtx_cost (XEXP (x, 1), code, speed);
6305 subcode = GET_CODE (XEXP (x, 1));
6306 if (subcode == ASHIFT || subcode == ASHIFTRT
6307 || subcode == LSHIFTRT
6308 || subcode == ROTATE || subcode == ROTATERT)
6310 *total += rtx_cost (XEXP (x, 0), code, speed);
6311 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6315 /* A shift as a part of RSB costs no more than RSB itself. */
6316 if (GET_CODE (XEXP (x, 0)) == MULT
6317 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6319 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6320 *total += rtx_cost (XEXP (x, 1), code, speed);
6325 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6327 *total += rtx_cost (XEXP (x, 0), code, speed);
6328 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6332 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6333 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6335 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6336 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6337 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6338 *total += COSTS_N_INSNS (1);
6346 if (code == PLUS && arm_arch6 && mode == SImode
6347 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6348 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6350 *total = COSTS_N_INSNS (1);
6351 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6353 *total += rtx_cost (XEXP (x, 1), code, speed);
6357 /* MLA: All arguments must be registers. We filter out
6358 multiplication by a power of two, so that we fall down into
6360 if (GET_CODE (XEXP (x, 0)) == MULT
6361 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6363 /* The cost comes from the cost of the multiply. */
6367 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6369 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6371 *total = COSTS_N_INSNS (1);
6372 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6373 && arm_const_double_rtx (XEXP (x, 1)))
6375 *total += rtx_cost (XEXP (x, 0), code, speed);
6382 *total = COSTS_N_INSNS (20);
6386 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6387 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6389 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6390 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6391 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6392 *total += COSTS_N_INSNS (1);
6398 case AND: case XOR: case IOR:
6401 /* Normally the frame registers will be spilt into reg+const during
6402 reload, so it is a bad idea to combine them with other instructions,
6403 since then they might not be moved outside of loops. As a compromise
6404 we allow integration with ops that have a constant as their second
6406 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6407 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6408 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6409 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6410 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6415 *total += COSTS_N_INSNS (2);
6416 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6417 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6419 *total += rtx_cost (XEXP (x, 0), code, speed);
6426 *total += COSTS_N_INSNS (1);
6427 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6428 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6430 *total += rtx_cost (XEXP (x, 0), code, speed);
6433 subcode = GET_CODE (XEXP (x, 0));
6434 if (subcode == ASHIFT || subcode == ASHIFTRT
6435 || subcode == LSHIFTRT
6436 || subcode == ROTATE || subcode == ROTATERT)
6438 *total += rtx_cost (XEXP (x, 1), code, speed);
6439 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6444 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6446 *total += rtx_cost (XEXP (x, 1), code, speed);
6447 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6451 if (subcode == UMIN || subcode == UMAX
6452 || subcode == SMIN || subcode == SMAX)
6454 *total = COSTS_N_INSNS (3);
6461 /* This should have been handled by the CPU specific routines. */
6465 if (arm_arch3m && mode == SImode
6466 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6467 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6468 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6469 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6470 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6471 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6473 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6476 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6480 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6482 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6484 *total = COSTS_N_INSNS (1);
6487 *total = COSTS_N_INSNS (2);
6493 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6494 if (mode == SImode && code == NOT)
6496 subcode = GET_CODE (XEXP (x, 0));
6497 if (subcode == ASHIFT || subcode == ASHIFTRT
6498 || subcode == LSHIFTRT
6499 || subcode == ROTATE || subcode == ROTATERT
6501 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6503 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6504 /* Register shifts cost an extra cycle. */
6505 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6506 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6515 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6517 *total = COSTS_N_INSNS (4);
6521 operand = XEXP (x, 0);
6523 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6524 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6525 && GET_CODE (XEXP (operand, 0)) == REG
6526 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6527 *total += COSTS_N_INSNS (1);
6528 *total += (rtx_cost (XEXP (x, 1), code, speed)
6529 + rtx_cost (XEXP (x, 2), code, speed));
6533 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6535 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6541 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6542 && mode == SImode && XEXP (x, 1) == const0_rtx)
6544 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6550 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6551 && mode == SImode && XEXP (x, 1) == const0_rtx)
6553 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6573 /* SCC insns. In the case where the comparison has already been
6574 performed, then they cost 2 instructions. Otherwise they need
6575 an additional comparison before them. */
6576 *total = COSTS_N_INSNS (2);
6577 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6584 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6590 *total += COSTS_N_INSNS (1);
6591 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6592 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6594 *total += rtx_cost (XEXP (x, 0), code, speed);
6598 subcode = GET_CODE (XEXP (x, 0));
6599 if (subcode == ASHIFT || subcode == ASHIFTRT
6600 || subcode == LSHIFTRT
6601 || subcode == ROTATE || subcode == ROTATERT)
6603 *total += rtx_cost (XEXP (x, 1), code, speed);
6604 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6609 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6611 *total += rtx_cost (XEXP (x, 1), code, speed);
6612 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6622 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6623 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6624 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6625 *total += rtx_cost (XEXP (x, 1), code, speed);
6629 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6631 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6633 *total = COSTS_N_INSNS (1);
6636 *total = COSTS_N_INSNS (20);
6639 *total = COSTS_N_INSNS (1);
6641 *total += COSTS_N_INSNS (3);
6645 if (GET_MODE_CLASS (mode) == MODE_INT)
6649 *total += COSTS_N_INSNS (1);
6651 if (GET_MODE (XEXP (x, 0)) != SImode)
6655 if (GET_CODE (XEXP (x, 0)) != MEM)
6656 *total += COSTS_N_INSNS (1);
6658 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6659 *total += COSTS_N_INSNS (2);
6668 if (GET_MODE_CLASS (mode) == MODE_INT)
6671 *total += COSTS_N_INSNS (1);
6673 if (GET_MODE (XEXP (x, 0)) != SImode)
6677 if (GET_CODE (XEXP (x, 0)) != MEM)
6678 *total += COSTS_N_INSNS (1);
6680 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6681 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6688 switch (GET_MODE (XEXP (x, 0)))
6695 *total = COSTS_N_INSNS (1);
6705 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6709 if (const_ok_for_arm (INTVAL (x))
6710 || const_ok_for_arm (~INTVAL (x)))
6711 *total = COSTS_N_INSNS (1);
6713 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6714 INTVAL (x), NULL_RTX,
6721 *total = COSTS_N_INSNS (3);
6725 *total = COSTS_N_INSNS (1);
6729 *total = COSTS_N_INSNS (1);
6730 *total += rtx_cost (XEXP (x, 0), code, speed);
6734 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
6735 *total = COSTS_N_INSNS (1);
6737 *total = COSTS_N_INSNS (4);
6741 *total = COSTS_N_INSNS (4);
6746 /* RTX costs when optimizing for size. */
6748 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6751 enum machine_mode mode = GET_MODE (x);
6754 /* XXX TBD. For now, use the standard costs. */
6755 *total = thumb1_rtx_costs (x, code, outer_code);
6759 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6763 /* A memory access costs 1 insn if the mode is small, or the address is
6764 a single register, otherwise it costs one insn per word. */
6765 if (REG_P (XEXP (x, 0)))
6766 *total = COSTS_N_INSNS (1);
6768 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6775 /* Needs a libcall, so it costs about this. */
6776 *total = COSTS_N_INSNS (2);
6780 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6782 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6790 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6792 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6795 else if (mode == SImode)
6797 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6798 /* Slightly disparage register shifts, but not by much. */
6799 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6800 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6804 /* Needs a libcall. */
6805 *total = COSTS_N_INSNS (2);
6809 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6811 *total = COSTS_N_INSNS (1);
6817 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6818 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6820 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6821 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6822 || subcode1 == ROTATE || subcode1 == ROTATERT
6823 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6824 || subcode1 == ASHIFTRT)
6826 /* It's just the cost of the two operands. */
6831 *total = COSTS_N_INSNS (1);
6835 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6839 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6841 *total = COSTS_N_INSNS (1);
6845 /* A shift as a part of ADD costs nothing. */
6846 if (GET_CODE (XEXP (x, 0)) == MULT
6847 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6849 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6850 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6851 *total += rtx_cost (XEXP (x, 1), code, false);
6856 case AND: case XOR: case IOR:
6859 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
6861 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
6862 || subcode == LSHIFTRT || subcode == ASHIFTRT
6863 || (code == AND && subcode == NOT))
6865 /* It's just the cost of the two operands. */
6871 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6875 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6879 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6881 *total = COSTS_N_INSNS (1);
6887 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6896 if (cc_register (XEXP (x, 0), VOIDmode))
6899 *total = COSTS_N_INSNS (1);
6903 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6904 *total = COSTS_N_INSNS (1);
6906 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
6911 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
6913 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
6914 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
6917 *total += COSTS_N_INSNS (1);
6922 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
6924 switch (GET_MODE (XEXP (x, 0)))
6927 *total += COSTS_N_INSNS (1);
6931 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
6937 *total += COSTS_N_INSNS (2);
6942 *total += COSTS_N_INSNS (1);
6947 if (const_ok_for_arm (INTVAL (x)))
6948 /* A multiplication by a constant requires another instruction
6949 to load the constant to a register. */
6950 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
6952 else if (const_ok_for_arm (~INTVAL (x)))
6953 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
6954 else if (const_ok_for_arm (-INTVAL (x)))
6956 if (outer_code == COMPARE || outer_code == PLUS
6957 || outer_code == MINUS)
6960 *total = COSTS_N_INSNS (1);
6963 *total = COSTS_N_INSNS (2);
6969 *total = COSTS_N_INSNS (2);
6973 *total = COSTS_N_INSNS (4);
6978 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
6979 cost of these slightly. */
6980 *total = COSTS_N_INSNS (1) + 1;
6984 if (mode != VOIDmode)
6985 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6987 *total = COSTS_N_INSNS (4); /* How knows? */
6992 /* RTX costs when optimizing for size. */
6994 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
6998 return arm_size_rtx_costs (x, (enum rtx_code) code,
6999 (enum rtx_code) outer_code, total);
7001 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7002 (enum rtx_code) outer_code,
7006 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7007 supported on any "slowmul" cores, so it can be ignored. */
7010 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7011 int *total, bool speed)
7013 enum machine_mode mode = GET_MODE (x);
7017 *total = thumb1_rtx_costs (x, code, outer_code);
7024 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7027 *total = COSTS_N_INSNS (20);
7031 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7033 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7034 & (unsigned HOST_WIDE_INT) 0xffffffff);
7035 int cost, const_ok = const_ok_for_arm (i);
7036 int j, booth_unit_size;
7038 /* Tune as appropriate. */
7039 cost = const_ok ? 4 : 8;
7040 booth_unit_size = 2;
7041 for (j = 0; i && j < 32; j += booth_unit_size)
7043 i >>= booth_unit_size;
7047 *total = COSTS_N_INSNS (cost);
7048 *total += rtx_cost (XEXP (x, 0), code, speed);
7052 *total = COSTS_N_INSNS (20);
7056 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7061 /* RTX cost for cores with a fast multiply unit (M variants). */
7064 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7065 int *total, bool speed)
7067 enum machine_mode mode = GET_MODE (x);
7071 *total = thumb1_rtx_costs (x, code, outer_code);
7075 /* ??? should thumb2 use different costs? */
7079 /* There is no point basing this on the tuning, since it is always the
7080 fast variant if it exists at all. */
7082 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7083 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7084 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7086 *total = COSTS_N_INSNS(2);
7093 *total = COSTS_N_INSNS (5);
7097 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7099 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7100 & (unsigned HOST_WIDE_INT) 0xffffffff);
7101 int cost, const_ok = const_ok_for_arm (i);
7102 int j, booth_unit_size;
7104 /* Tune as appropriate. */
7105 cost = const_ok ? 4 : 8;
7106 booth_unit_size = 8;
7107 for (j = 0; i && j < 32; j += booth_unit_size)
7109 i >>= booth_unit_size;
7113 *total = COSTS_N_INSNS(cost);
7119 *total = COSTS_N_INSNS (4);
7123 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7125 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7127 *total = COSTS_N_INSNS (1);
7132 /* Requires a lib call */
7133 *total = COSTS_N_INSNS (20);
7137 return arm_rtx_costs_1 (x, outer_code, total, speed);
7142 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7143 so it can be ignored. */
7146 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7148 enum machine_mode mode = GET_MODE (x);
7152 *total = thumb1_rtx_costs (x, code, outer_code);
7159 if (GET_CODE (XEXP (x, 0)) != MULT)
7160 return arm_rtx_costs_1 (x, outer_code, total, speed);
7162 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7163 will stall until the multiplication is complete. */
7164 *total = COSTS_N_INSNS (3);
7168 /* There is no point basing this on the tuning, since it is always the
7169 fast variant if it exists at all. */
7171 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7172 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7173 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7175 *total = COSTS_N_INSNS (2);
7182 *total = COSTS_N_INSNS (5);
7186 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7188 /* If operand 1 is a constant we can more accurately
7189 calculate the cost of the multiply. The multiplier can
7190 retire 15 bits on the first cycle and a further 12 on the
7191 second. We do, of course, have to load the constant into
7192 a register first. */
7193 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7194 /* There's a general overhead of one cycle. */
7196 unsigned HOST_WIDE_INT masked_const;
7201 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7203 masked_const = i & 0xffff8000;
7204 if (masked_const != 0)
7207 masked_const = i & 0xf8000000;
7208 if (masked_const != 0)
7211 *total = COSTS_N_INSNS (cost);
7217 *total = COSTS_N_INSNS (3);
7221 /* Requires a lib call */
7222 *total = COSTS_N_INSNS (20);
7226 return arm_rtx_costs_1 (x, outer_code, total, speed);
7231 /* RTX costs for 9e (and later) cores. */
7234 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7235 int *total, bool speed)
7237 enum machine_mode mode = GET_MODE (x);
7244 *total = COSTS_N_INSNS (3);
7248 *total = thumb1_rtx_costs (x, code, outer_code);
7256 /* There is no point basing this on the tuning, since it is always the
7257 fast variant if it exists at all. */
7259 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7260 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7261 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7263 *total = COSTS_N_INSNS (2);
7270 *total = COSTS_N_INSNS (5);
7276 *total = COSTS_N_INSNS (2);
7280 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7282 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7284 *total = COSTS_N_INSNS (1);
7289 *total = COSTS_N_INSNS (20);
7293 return arm_rtx_costs_1 (x, outer_code, total, speed);
7296 /* All address computations that can be done are free, but rtx cost returns
7297 the same for practically all of them. So we weight the different types
7298 of address here in the order (most pref first):
7299 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7301 arm_arm_address_cost (rtx x)
7303 enum rtx_code c = GET_CODE (x);
7305 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7307 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7315 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7325 arm_thumb_address_cost (rtx x)
7327 enum rtx_code c = GET_CODE (x);
7332 && GET_CODE (XEXP (x, 0)) == REG
7333 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7340 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7342 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7346 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7350 /* Some true dependencies can have a higher cost depending
7351 on precisely how certain input operands are used. */
7353 && REG_NOTE_KIND (link) == 0
7354 && recog_memoized (insn) >= 0
7355 && recog_memoized (dep) >= 0)
7357 int shift_opnum = get_attr_shift (insn);
7358 enum attr_type attr_type = get_attr_type (dep);
7360 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7361 operand for INSN. If we have a shifted input operand and the
7362 instruction we depend on is another ALU instruction, then we may
7363 have to account for an additional stall. */
7364 if (shift_opnum != 0
7365 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7367 rtx shifted_operand;
7370 /* Get the shifted operand. */
7371 extract_insn (insn);
7372 shifted_operand = recog_data.operand[shift_opnum];
7374 /* Iterate over all the operands in DEP. If we write an operand
7375 that overlaps with SHIFTED_OPERAND, then we have increase the
7376 cost of this dependency. */
7378 preprocess_constraints ();
7379 for (opno = 0; opno < recog_data.n_operands; opno++)
7381 /* We can ignore strict inputs. */
7382 if (recog_data.operand_type[opno] == OP_IN)
7385 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7392 /* XXX This is not strictly true for the FPA. */
7393 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7394 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7397 /* Call insns don't incur a stall, even if they follow a load. */
7398 if (REG_NOTE_KIND (link) == 0
7399 && GET_CODE (insn) == CALL_INSN)
7402 if ((i_pat = single_set (insn)) != NULL
7403 && GET_CODE (SET_SRC (i_pat)) == MEM
7404 && (d_pat = single_set (dep)) != NULL
7405 && GET_CODE (SET_DEST (d_pat)) == MEM)
7407 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7408 /* This is a load after a store, there is no conflict if the load reads
7409 from a cached area. Assume that loads from the stack, and from the
7410 constant pool are cached, and that others will miss. This is a
7413 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7414 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7415 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7416 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7423 static int fp_consts_inited = 0;
7425 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7426 static const char * const strings_fp[8] =
7429 "4", "5", "0.5", "10"
7432 static REAL_VALUE_TYPE values_fp[8];
7435 init_fp_table (void)
7441 fp_consts_inited = 1;
7443 fp_consts_inited = 8;
7445 for (i = 0; i < fp_consts_inited; i++)
7447 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7452 /* Return TRUE if rtx X is a valid immediate FP constant. */
7454 arm_const_double_rtx (rtx x)
7459 if (!fp_consts_inited)
7462 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7463 if (REAL_VALUE_MINUS_ZERO (r))
7466 for (i = 0; i < fp_consts_inited; i++)
7467 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7473 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7475 neg_const_double_rtx_ok_for_fpa (rtx x)
7480 if (!fp_consts_inited)
7483 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7484 r = REAL_VALUE_NEGATE (r);
7485 if (REAL_VALUE_MINUS_ZERO (r))
7488 for (i = 0; i < 8; i++)
7489 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7496 /* VFPv3 has a fairly wide range of representable immediates, formed from
7497 "quarter-precision" floating-point values. These can be evaluated using this
7498 formula (with ^ for exponentiation):
7502 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7503 16 <= n <= 31 and 0 <= r <= 7.
7505 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7507 - A (most-significant) is the sign bit.
7508 - BCD are the exponent (encoded as r XOR 3).
7509 - EFGH are the mantissa (encoded as n - 16).
7512 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7513 fconst[sd] instruction, or -1 if X isn't suitable. */
7515 vfp3_const_double_index (rtx x)
7517 REAL_VALUE_TYPE r, m;
7519 unsigned HOST_WIDE_INT mantissa, mant_hi;
7520 unsigned HOST_WIDE_INT mask;
7521 HOST_WIDE_INT m1, m2;
7522 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7524 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7527 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7529 /* We can't represent these things, so detect them first. */
7530 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7533 /* Extract sign, exponent and mantissa. */
7534 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7535 r = REAL_VALUE_ABS (r);
7536 exponent = REAL_EXP (&r);
7537 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7538 highest (sign) bit, with a fixed binary point at bit point_pos.
7539 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7540 bits for the mantissa, this may fail (low bits would be lost). */
7541 real_ldexp (&m, &r, point_pos - exponent);
7542 REAL_VALUE_TO_INT (&m1, &m2, m);
7546 /* If there are bits set in the low part of the mantissa, we can't
7547 represent this value. */
7551 /* Now make it so that mantissa contains the most-significant bits, and move
7552 the point_pos to indicate that the least-significant bits have been
7554 point_pos -= HOST_BITS_PER_WIDE_INT;
7557 /* We can permit four significant bits of mantissa only, plus a high bit
7558 which is always 1. */
7559 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7560 if ((mantissa & mask) != 0)
7563 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7564 mantissa >>= point_pos - 5;
7566 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7567 floating-point immediate zero with Neon using an integer-zero load, but
7568 that case is handled elsewhere.) */
7572 gcc_assert (mantissa >= 16 && mantissa <= 31);
7574 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7575 normalized significands are in the range [1, 2). (Our mantissa is shifted
7576 left 4 places at this point relative to normalized IEEE754 values). GCC
7577 internally uses [0.5, 1) (see real.c), so the exponent returned from
7578 REAL_EXP must be altered. */
7579 exponent = 5 - exponent;
7581 if (exponent < 0 || exponent > 7)
7584 /* Sign, mantissa and exponent are now in the correct form to plug into the
7585 formula described in the comment above. */
7586 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7589 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7591 vfp3_const_double_rtx (rtx x)
7596 return vfp3_const_double_index (x) != -1;
7599 /* Recognize immediates which can be used in various Neon instructions. Legal
7600 immediates are described by the following table (for VMVN variants, the
7601 bitwise inverse of the constant shown is recognized. In either case, VMOV
7602 is output and the correct instruction to use for a given constant is chosen
7603 by the assembler). The constant shown is replicated across all elements of
7604 the destination vector.
7606 insn elems variant constant (binary)
7607 ---- ----- ------- -----------------
7608 vmov i32 0 00000000 00000000 00000000 abcdefgh
7609 vmov i32 1 00000000 00000000 abcdefgh 00000000
7610 vmov i32 2 00000000 abcdefgh 00000000 00000000
7611 vmov i32 3 abcdefgh 00000000 00000000 00000000
7612 vmov i16 4 00000000 abcdefgh
7613 vmov i16 5 abcdefgh 00000000
7614 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7615 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7616 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7617 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7618 vmvn i16 10 00000000 abcdefgh
7619 vmvn i16 11 abcdefgh 00000000
7620 vmov i32 12 00000000 00000000 abcdefgh 11111111
7621 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7622 vmov i32 14 00000000 abcdefgh 11111111 11111111
7623 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7625 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7626 eeeeeeee ffffffff gggggggg hhhhhhhh
7627 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7629 For case 18, B = !b. Representable values are exactly those accepted by
7630 vfp3_const_double_index, but are output as floating-point numbers rather
7633 Variants 0-5 (inclusive) may also be used as immediates for the second
7634 operand of VORR/VBIC instructions.
7636 The INVERSE argument causes the bitwise inverse of the given operand to be
7637 recognized instead (used for recognizing legal immediates for the VAND/VORN
7638 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7639 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7640 output, rather than the real insns vbic/vorr).
7642 INVERSE makes no difference to the recognition of float vectors.
7644 The return value is the variant of immediate as shown in the above table, or
7645 -1 if the given value doesn't match any of the listed patterns.
7648 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7649 rtx *modconst, int *elementwidth)
7651 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7653 for (i = 0; i < idx; i += (STRIDE)) \
7658 immtype = (CLASS); \
7659 elsize = (ELSIZE); \
7663 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7664 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7665 unsigned char bytes[16];
7666 int immtype = -1, matches;
7667 unsigned int invmask = inverse ? 0xff : 0;
7669 /* Vectors of float constants. */
7670 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7672 rtx el0 = CONST_VECTOR_ELT (op, 0);
7675 if (!vfp3_const_double_rtx (el0))
7678 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7680 for (i = 1; i < n_elts; i++)
7682 rtx elt = CONST_VECTOR_ELT (op, i);
7685 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7687 if (!REAL_VALUES_EQUAL (r0, re))
7692 *modconst = CONST_VECTOR_ELT (op, 0);
7700 /* Splat vector constant out into a byte vector. */
7701 for (i = 0; i < n_elts; i++)
7703 rtx el = CONST_VECTOR_ELT (op, i);
7704 unsigned HOST_WIDE_INT elpart;
7705 unsigned int part, parts;
7707 if (GET_CODE (el) == CONST_INT)
7709 elpart = INTVAL (el);
7712 else if (GET_CODE (el) == CONST_DOUBLE)
7714 elpart = CONST_DOUBLE_LOW (el);
7720 for (part = 0; part < parts; part++)
7723 for (byte = 0; byte < innersize; byte++)
7725 bytes[idx++] = (elpart & 0xff) ^ invmask;
7726 elpart >>= BITS_PER_UNIT;
7728 if (GET_CODE (el) == CONST_DOUBLE)
7729 elpart = CONST_DOUBLE_HIGH (el);
7734 gcc_assert (idx == GET_MODE_SIZE (mode));
7738 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7739 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7741 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7742 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7744 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7745 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7747 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7748 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7750 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7752 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7754 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7755 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7757 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7758 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7760 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7761 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7763 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7764 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7766 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7768 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7770 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7771 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7773 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7774 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7776 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7777 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7779 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7780 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7782 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7784 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7785 && bytes[i] == bytes[(i + 8) % idx]);
7793 *elementwidth = elsize;
7797 unsigned HOST_WIDE_INT imm = 0;
7799 /* Un-invert bytes of recognized vector, if necessary. */
7801 for (i = 0; i < idx; i++)
7802 bytes[i] ^= invmask;
7806 /* FIXME: Broken on 32-bit H_W_I hosts. */
7807 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7809 for (i = 0; i < 8; i++)
7810 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7811 << (i * BITS_PER_UNIT);
7813 *modconst = GEN_INT (imm);
7817 unsigned HOST_WIDE_INT imm = 0;
7819 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7820 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7822 *modconst = GEN_INT (imm);
7830 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7831 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7832 float elements), and a modified constant (whatever should be output for a
7833 VMOV) in *MODCONST. */
7836 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7837 rtx *modconst, int *elementwidth)
7841 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7847 *modconst = tmpconst;
7850 *elementwidth = tmpwidth;
7855 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
7856 the immediate is valid, write a constant suitable for using as an operand
7857 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
7858 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
7861 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
7862 rtx *modconst, int *elementwidth)
7866 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
7868 if (retval < 0 || retval > 5)
7872 *modconst = tmpconst;
7875 *elementwidth = tmpwidth;
7880 /* Return a string suitable for output of Neon immediate logic operation
7884 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
7885 int inverse, int quad)
7887 int width, is_valid;
7888 static char templ[40];
7890 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
7892 gcc_assert (is_valid != 0);
7895 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
7897 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
7902 /* Output a sequence of pairwise operations to implement a reduction.
7903 NOTE: We do "too much work" here, because pairwise operations work on two
7904 registers-worth of operands in one go. Unfortunately we can't exploit those
7905 extra calculations to do the full operation in fewer steps, I don't think.
7906 Although all vector elements of the result but the first are ignored, we
7907 actually calculate the same result in each of the elements. An alternative
7908 such as initially loading a vector with zero to use as each of the second
7909 operands would use up an additional register and take an extra instruction,
7910 for no particular gain. */
7913 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
7914 rtx (*reduc) (rtx, rtx, rtx))
7916 enum machine_mode inner = GET_MODE_INNER (mode);
7917 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
7920 for (i = parts / 2; i >= 1; i /= 2)
7922 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
7923 emit_insn (reduc (dest, tmpsum, tmpsum));
7928 /* Initialize a vector with non-constant elements. FIXME: We can do better
7929 than the current implementation (building a vector on the stack and then
7930 loading it) in many cases. See rs6000.c. */
7933 neon_expand_vector_init (rtx target, rtx vals)
7935 enum machine_mode mode = GET_MODE (target);
7936 enum machine_mode inner = GET_MODE_INNER (mode);
7937 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
7940 gcc_assert (VECTOR_MODE_P (mode));
7942 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
7943 for (i = 0; i < n_elts; i++)
7944 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
7945 XVECEXP (vals, 0, i));
7947 emit_move_insn (target, mem);
7950 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
7951 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
7952 reported source locations are bogus. */
7955 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
7960 gcc_assert (GET_CODE (operand) == CONST_INT);
7962 lane = INTVAL (operand);
7964 if (lane < low || lane >= high)
7968 /* Bounds-check lanes. */
7971 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7973 bounds_check (operand, low, high, "lane out of range");
7976 /* Bounds-check constants. */
7979 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7981 bounds_check (operand, low, high, "constant out of range");
7985 neon_element_bits (enum machine_mode mode)
7988 return GET_MODE_BITSIZE (mode);
7990 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
7994 /* Predicates for `match_operand' and `match_operator'. */
7996 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
7998 cirrus_memory_offset (rtx op)
8000 /* Reject eliminable registers. */
8001 if (! (reload_in_progress || reload_completed)
8002 && ( reg_mentioned_p (frame_pointer_rtx, op)
8003 || reg_mentioned_p (arg_pointer_rtx, op)
8004 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8005 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8006 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8007 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8010 if (GET_CODE (op) == MEM)
8016 /* Match: (mem (reg)). */
8017 if (GET_CODE (ind) == REG)
8023 if (GET_CODE (ind) == PLUS
8024 && GET_CODE (XEXP (ind, 0)) == REG
8025 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8026 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8033 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8034 WB is true if full writeback address modes are allowed and is false
8035 if limited writeback address modes (POST_INC and PRE_DEC) are
8039 arm_coproc_mem_operand (rtx op, bool wb)
8043 /* Reject eliminable registers. */
8044 if (! (reload_in_progress || reload_completed)
8045 && ( reg_mentioned_p (frame_pointer_rtx, op)
8046 || reg_mentioned_p (arg_pointer_rtx, op)
8047 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8048 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8049 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8050 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8053 /* Constants are converted into offsets from labels. */
8054 if (GET_CODE (op) != MEM)
8059 if (reload_completed
8060 && (GET_CODE (ind) == LABEL_REF
8061 || (GET_CODE (ind) == CONST
8062 && GET_CODE (XEXP (ind, 0)) == PLUS
8063 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8064 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8067 /* Match: (mem (reg)). */
8068 if (GET_CODE (ind) == REG)
8069 return arm_address_register_rtx_p (ind, 0);
8071 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8072 acceptable in any case (subject to verification by
8073 arm_address_register_rtx_p). We need WB to be true to accept
8074 PRE_INC and POST_DEC. */
8075 if (GET_CODE (ind) == POST_INC
8076 || GET_CODE (ind) == PRE_DEC
8078 && (GET_CODE (ind) == PRE_INC
8079 || GET_CODE (ind) == POST_DEC)))
8080 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8083 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8084 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8085 && GET_CODE (XEXP (ind, 1)) == PLUS
8086 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8087 ind = XEXP (ind, 1);
8092 if (GET_CODE (ind) == PLUS
8093 && GET_CODE (XEXP (ind, 0)) == REG
8094 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8095 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8096 && INTVAL (XEXP (ind, 1)) > -1024
8097 && INTVAL (XEXP (ind, 1)) < 1024
8098 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8104 /* Return TRUE if OP is a memory operand which we can load or store a vector
8105 to/from. TYPE is one of the following values:
8106 0 - Vector load/stor (vldr)
8107 1 - Core registers (ldm)
8108 2 - Element/structure loads (vld1)
8111 neon_vector_mem_operand (rtx op, int type)
8115 /* Reject eliminable registers. */
8116 if (! (reload_in_progress || reload_completed)
8117 && ( reg_mentioned_p (frame_pointer_rtx, op)
8118 || reg_mentioned_p (arg_pointer_rtx, op)
8119 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8120 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8121 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8122 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8125 /* Constants are converted into offsets from labels. */
8126 if (GET_CODE (op) != MEM)
8131 if (reload_completed
8132 && (GET_CODE (ind) == LABEL_REF
8133 || (GET_CODE (ind) == CONST
8134 && GET_CODE (XEXP (ind, 0)) == PLUS
8135 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8136 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8139 /* Match: (mem (reg)). */
8140 if (GET_CODE (ind) == REG)
8141 return arm_address_register_rtx_p (ind, 0);
8143 /* Allow post-increment with Neon registers. */
8144 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8145 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8147 /* FIXME: vld1 allows register post-modify. */
8153 && GET_CODE (ind) == PLUS
8154 && GET_CODE (XEXP (ind, 0)) == REG
8155 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8156 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8157 && INTVAL (XEXP (ind, 1)) > -1024
8158 && INTVAL (XEXP (ind, 1)) < 1016
8159 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8165 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8168 neon_struct_mem_operand (rtx op)
8172 /* Reject eliminable registers. */
8173 if (! (reload_in_progress || reload_completed)
8174 && ( reg_mentioned_p (frame_pointer_rtx, op)
8175 || reg_mentioned_p (arg_pointer_rtx, op)
8176 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8177 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8178 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8179 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8182 /* Constants are converted into offsets from labels. */
8183 if (GET_CODE (op) != MEM)
8188 if (reload_completed
8189 && (GET_CODE (ind) == LABEL_REF
8190 || (GET_CODE (ind) == CONST
8191 && GET_CODE (XEXP (ind, 0)) == PLUS
8192 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8193 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8196 /* Match: (mem (reg)). */
8197 if (GET_CODE (ind) == REG)
8198 return arm_address_register_rtx_p (ind, 0);
8203 /* Return true if X is a register that will be eliminated later on. */
8205 arm_eliminable_register (rtx x)
8207 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8208 || REGNO (x) == ARG_POINTER_REGNUM
8209 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8210 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8213 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8214 coprocessor registers. Otherwise return NO_REGS. */
8217 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8221 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8223 return GENERAL_REGS;
8227 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8228 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8229 && neon_vector_mem_operand (x, 0))
8232 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8235 return GENERAL_REGS;
8238 /* Values which must be returned in the most-significant end of the return
8242 arm_return_in_msb (const_tree valtype)
8244 return (TARGET_AAPCS_BASED
8246 && (AGGREGATE_TYPE_P (valtype)
8247 || TREE_CODE (valtype) == COMPLEX_TYPE));
8250 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8251 Use by the Cirrus Maverick code which has to workaround
8252 a hardware bug triggered by such instructions. */
8254 arm_memory_load_p (rtx insn)
8256 rtx body, lhs, rhs;;
8258 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8261 body = PATTERN (insn);
8263 if (GET_CODE (body) != SET)
8266 lhs = XEXP (body, 0);
8267 rhs = XEXP (body, 1);
8269 lhs = REG_OR_SUBREG_RTX (lhs);
8271 /* If the destination is not a general purpose
8272 register we do not have to worry. */
8273 if (GET_CODE (lhs) != REG
8274 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8277 /* As well as loads from memory we also have to react
8278 to loads of invalid constants which will be turned
8279 into loads from the minipool. */
8280 return (GET_CODE (rhs) == MEM
8281 || GET_CODE (rhs) == SYMBOL_REF
8282 || note_invalid_constants (insn, -1, false));
8285 /* Return TRUE if INSN is a Cirrus instruction. */
8287 arm_cirrus_insn_p (rtx insn)
8289 enum attr_cirrus attr;
8291 /* get_attr cannot accept USE or CLOBBER. */
8293 || GET_CODE (insn) != INSN
8294 || GET_CODE (PATTERN (insn)) == USE
8295 || GET_CODE (PATTERN (insn)) == CLOBBER)
8298 attr = get_attr_cirrus (insn);
8300 return attr != CIRRUS_NOT;
8303 /* Cirrus reorg for invalid instruction combinations. */
8305 cirrus_reorg (rtx first)
8307 enum attr_cirrus attr;
8308 rtx body = PATTERN (first);
8312 /* Any branch must be followed by 2 non Cirrus instructions. */
8313 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8316 t = next_nonnote_insn (first);
8318 if (arm_cirrus_insn_p (t))
8321 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8325 emit_insn_after (gen_nop (), first);
8330 /* (float (blah)) is in parallel with a clobber. */
8331 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8332 body = XVECEXP (body, 0, 0);
8334 if (GET_CODE (body) == SET)
8336 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8338 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8339 be followed by a non Cirrus insn. */
8340 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8342 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8343 emit_insn_after (gen_nop (), first);
8347 else if (arm_memory_load_p (first))
8349 unsigned int arm_regno;
8351 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8352 ldr/cfmv64hr combination where the Rd field is the same
8353 in both instructions must be split with a non Cirrus
8360 /* Get Arm register number for ldr insn. */
8361 if (GET_CODE (lhs) == REG)
8362 arm_regno = REGNO (lhs);
8365 gcc_assert (GET_CODE (rhs) == REG);
8366 arm_regno = REGNO (rhs);
8370 first = next_nonnote_insn (first);
8372 if (! arm_cirrus_insn_p (first))
8375 body = PATTERN (first);
8377 /* (float (blah)) is in parallel with a clobber. */
8378 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8379 body = XVECEXP (body, 0, 0);
8381 if (GET_CODE (body) == FLOAT)
8382 body = XEXP (body, 0);
8384 if (get_attr_cirrus (first) == CIRRUS_MOVE
8385 && GET_CODE (XEXP (body, 1)) == REG
8386 && arm_regno == REGNO (XEXP (body, 1)))
8387 emit_insn_after (gen_nop (), first);
8393 /* get_attr cannot accept USE or CLOBBER. */
8395 || GET_CODE (first) != INSN
8396 || GET_CODE (PATTERN (first)) == USE
8397 || GET_CODE (PATTERN (first)) == CLOBBER)
8400 attr = get_attr_cirrus (first);
8402 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8403 must be followed by a non-coprocessor instruction. */
8404 if (attr == CIRRUS_COMPARE)
8408 t = next_nonnote_insn (first);
8410 if (arm_cirrus_insn_p (t))
8413 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8417 emit_insn_after (gen_nop (), first);
8423 /* Return TRUE if X references a SYMBOL_REF. */
8425 symbol_mentioned_p (rtx x)
8430 if (GET_CODE (x) == SYMBOL_REF)
8433 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8434 are constant offsets, not symbols. */
8435 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8438 fmt = GET_RTX_FORMAT (GET_CODE (x));
8440 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8446 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8447 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8450 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8457 /* Return TRUE if X references a LABEL_REF. */
8459 label_mentioned_p (rtx x)
8464 if (GET_CODE (x) == LABEL_REF)
8467 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8468 instruction, but they are constant offsets, not symbols. */
8469 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8472 fmt = GET_RTX_FORMAT (GET_CODE (x));
8473 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8479 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8480 if (label_mentioned_p (XVECEXP (x, i, j)))
8483 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8491 tls_mentioned_p (rtx x)
8493 switch (GET_CODE (x))
8496 return tls_mentioned_p (XEXP (x, 0));
8499 if (XINT (x, 1) == UNSPEC_TLS)
8507 /* Must not copy a SET whose source operand is PC-relative. */
8510 arm_cannot_copy_insn_p (rtx insn)
8512 rtx pat = PATTERN (insn);
8514 if (GET_CODE (pat) == SET)
8516 rtx rhs = SET_SRC (pat);
8518 if (GET_CODE (rhs) == UNSPEC
8519 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8522 if (GET_CODE (rhs) == MEM
8523 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8524 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8534 enum rtx_code code = GET_CODE (x);
8551 /* Return 1 if memory locations are adjacent. */
8553 adjacent_mem_locations (rtx a, rtx b)
8555 /* We don't guarantee to preserve the order of these memory refs. */
8556 if (volatile_refs_p (a) || volatile_refs_p (b))
8559 if ((GET_CODE (XEXP (a, 0)) == REG
8560 || (GET_CODE (XEXP (a, 0)) == PLUS
8561 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8562 && (GET_CODE (XEXP (b, 0)) == REG
8563 || (GET_CODE (XEXP (b, 0)) == PLUS
8564 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8566 HOST_WIDE_INT val0 = 0, val1 = 0;
8570 if (GET_CODE (XEXP (a, 0)) == PLUS)
8572 reg0 = XEXP (XEXP (a, 0), 0);
8573 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8578 if (GET_CODE (XEXP (b, 0)) == PLUS)
8580 reg1 = XEXP (XEXP (b, 0), 0);
8581 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8586 /* Don't accept any offset that will require multiple
8587 instructions to handle, since this would cause the
8588 arith_adjacentmem pattern to output an overlong sequence. */
8589 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8592 /* Don't allow an eliminable register: register elimination can make
8593 the offset too large. */
8594 if (arm_eliminable_register (reg0))
8597 val_diff = val1 - val0;
8601 /* If the target has load delay slots, then there's no benefit
8602 to using an ldm instruction unless the offset is zero and
8603 we are optimizing for size. */
8604 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8605 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8606 && (val_diff == 4 || val_diff == -4));
8609 return ((REGNO (reg0) == REGNO (reg1))
8610 && (val_diff == 4 || val_diff == -4));
8617 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8618 HOST_WIDE_INT *load_offset)
8620 int unsorted_regs[4];
8621 HOST_WIDE_INT unsorted_offsets[4];
8626 /* Can only handle 2, 3, or 4 insns at present,
8627 though could be easily extended if required. */
8628 gcc_assert (nops >= 2 && nops <= 4);
8630 memset (order, 0, 4 * sizeof (int));
8632 /* Loop over the operands and check that the memory references are
8633 suitable (i.e. immediate offsets from the same base register). At
8634 the same time, extract the target register, and the memory
8636 for (i = 0; i < nops; i++)
8641 /* Convert a subreg of a mem into the mem itself. */
8642 if (GET_CODE (operands[nops + i]) == SUBREG)
8643 operands[nops + i] = alter_subreg (operands + (nops + i));
8645 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8647 /* Don't reorder volatile memory references; it doesn't seem worth
8648 looking for the case where the order is ok anyway. */
8649 if (MEM_VOLATILE_P (operands[nops + i]))
8652 offset = const0_rtx;
8654 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8655 || (GET_CODE (reg) == SUBREG
8656 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8657 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8658 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8660 || (GET_CODE (reg) == SUBREG
8661 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8662 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8667 base_reg = REGNO (reg);
8668 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8669 ? REGNO (operands[i])
8670 : REGNO (SUBREG_REG (operands[i])));
8675 if (base_reg != (int) REGNO (reg))
8676 /* Not addressed from the same base register. */
8679 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8680 ? REGNO (operands[i])
8681 : REGNO (SUBREG_REG (operands[i])));
8682 if (unsorted_regs[i] < unsorted_regs[order[0]])
8686 /* If it isn't an integer register, or if it overwrites the
8687 base register but isn't the last insn in the list, then
8688 we can't do this. */
8689 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8690 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8693 unsorted_offsets[i] = INTVAL (offset);
8696 /* Not a suitable memory address. */
8700 /* All the useful information has now been extracted from the
8701 operands into unsorted_regs and unsorted_offsets; additionally,
8702 order[0] has been set to the lowest numbered register in the
8703 list. Sort the registers into order, and check that the memory
8704 offsets are ascending and adjacent. */
8706 for (i = 1; i < nops; i++)
8710 order[i] = order[i - 1];
8711 for (j = 0; j < nops; j++)
8712 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
8713 && (order[i] == order[i - 1]
8714 || unsorted_regs[j] < unsorted_regs[order[i]]))
8717 /* Have we found a suitable register? if not, one must be used more
8719 if (order[i] == order[i - 1])
8722 /* Is the memory address adjacent and ascending? */
8723 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
8731 for (i = 0; i < nops; i++)
8732 regs[i] = unsorted_regs[order[i]];
8734 *load_offset = unsorted_offsets[order[0]];
8737 if (unsorted_offsets[order[0]] == 0)
8738 return 1; /* ldmia */
8740 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
8741 return 2; /* ldmib */
8743 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
8744 return 3; /* ldmda */
8746 if (unsorted_offsets[order[nops - 1]] == -4)
8747 return 4; /* ldmdb */
8749 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
8750 if the offset isn't small enough. The reason 2 ldrs are faster
8751 is because these ARMs are able to do more than one cache access
8752 in a single cycle. The ARM9 and StrongARM have Harvard caches,
8753 whilst the ARM8 has a double bandwidth cache. This means that
8754 these cores can do both an instruction fetch and a data fetch in
8755 a single cycle, so the trick of calculating the address into a
8756 scratch register (one of the result regs) and then doing a load
8757 multiple actually becomes slower (and no smaller in code size).
8758 That is the transformation
8760 ldr rd1, [rbase + offset]
8761 ldr rd2, [rbase + offset + 4]
8765 add rd1, rbase, offset
8766 ldmia rd1, {rd1, rd2}
8768 produces worse code -- '3 cycles + any stalls on rd2' instead of
8769 '2 cycles + any stalls on rd2'. On ARMs with only one cache
8770 access per cycle, the first sequence could never complete in less
8771 than 6 cycles, whereas the ldm sequence would only take 5 and
8772 would make better use of sequential accesses if not hitting the
8775 We cheat here and test 'arm_ld_sched' which we currently know to
8776 only be true for the ARM8, ARM9 and StrongARM. If this ever
8777 changes, then the test below needs to be reworked. */
8778 if (nops == 2 && arm_ld_sched)
8781 /* Can't do it without setting up the offset, only do this if it takes
8782 no more than one insn. */
8783 return (const_ok_for_arm (unsorted_offsets[order[0]])
8784 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
8788 emit_ldm_seq (rtx *operands, int nops)
8792 HOST_WIDE_INT offset;
8796 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8799 strcpy (buf, "ldm%(ia%)\t");
8803 strcpy (buf, "ldm%(ib%)\t");
8807 strcpy (buf, "ldm%(da%)\t");
8811 strcpy (buf, "ldm%(db%)\t");
8816 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8817 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8820 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8821 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8823 output_asm_insn (buf, operands);
8825 strcpy (buf, "ldm%(ia%)\t");
8832 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8833 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8835 for (i = 1; i < nops; i++)
8836 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8837 reg_names[regs[i]]);
8839 strcat (buf, "}\t%@ phole ldm");
8841 output_asm_insn (buf, operands);
8846 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8847 HOST_WIDE_INT * load_offset)
8849 int unsorted_regs[4];
8850 HOST_WIDE_INT unsorted_offsets[4];
8855 /* Can only handle 2, 3, or 4 insns at present, though could be easily
8856 extended if required. */
8857 gcc_assert (nops >= 2 && nops <= 4);
8859 memset (order, 0, 4 * sizeof (int));
8861 /* Loop over the operands and check that the memory references are
8862 suitable (i.e. immediate offsets from the same base register). At
8863 the same time, extract the target register, and the memory
8865 for (i = 0; i < nops; i++)
8870 /* Convert a subreg of a mem into the mem itself. */
8871 if (GET_CODE (operands[nops + i]) == SUBREG)
8872 operands[nops + i] = alter_subreg (operands + (nops + i));
8874 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8876 /* Don't reorder volatile memory references; it doesn't seem worth
8877 looking for the case where the order is ok anyway. */
8878 if (MEM_VOLATILE_P (operands[nops + i]))
8881 offset = const0_rtx;
8883 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8884 || (GET_CODE (reg) == SUBREG
8885 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8886 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8887 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8889 || (GET_CODE (reg) == SUBREG
8890 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8891 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8896 base_reg = REGNO (reg);
8897 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8898 ? REGNO (operands[i])
8899 : REGNO (SUBREG_REG (operands[i])));
8904 if (base_reg != (int) REGNO (reg))
8905 /* Not addressed from the same base register. */
8908 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8909 ? REGNO (operands[i])
8910 : REGNO (SUBREG_REG (operands[i])));
8911 if (unsorted_regs[i] < unsorted_regs[order[0]])
8915 /* If it isn't an integer register, then we can't do this. */
8916 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
8919 unsorted_offsets[i] = INTVAL (offset);
8922 /* Not a suitable memory address. */
8926 /* All the useful information has now been extracted from the
8927 operands into unsorted_regs and unsorted_offsets; additionally,
8928 order[0] has been set to the lowest numbered register in the
8929 list. Sort the registers into order, and check that the memory
8930 offsets are ascending and adjacent. */
8932 for (i = 1; i < nops; i++)
8936 order[i] = order[i - 1];
8937 for (j = 0; j < nops; j++)
8938 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
8939 && (order[i] == order[i - 1]
8940 || unsorted_regs[j] < unsorted_regs[order[i]]))
8943 /* Have we found a suitable register? if not, one must be used more
8945 if (order[i] == order[i - 1])
8948 /* Is the memory address adjacent and ascending? */
8949 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
8957 for (i = 0; i < nops; i++)
8958 regs[i] = unsorted_regs[order[i]];
8960 *load_offset = unsorted_offsets[order[0]];
8963 if (unsorted_offsets[order[0]] == 0)
8964 return 1; /* stmia */
8966 if (unsorted_offsets[order[0]] == 4)
8967 return 2; /* stmib */
8969 if (unsorted_offsets[order[nops - 1]] == 0)
8970 return 3; /* stmda */
8972 if (unsorted_offsets[order[nops - 1]] == -4)
8973 return 4; /* stmdb */
8979 emit_stm_seq (rtx *operands, int nops)
8983 HOST_WIDE_INT offset;
8987 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8990 strcpy (buf, "stm%(ia%)\t");
8994 strcpy (buf, "stm%(ib%)\t");
8998 strcpy (buf, "stm%(da%)\t");
9002 strcpy (buf, "stm%(db%)\t");
9009 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9010 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9012 for (i = 1; i < nops; i++)
9013 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9014 reg_names[regs[i]]);
9016 strcat (buf, "}\t%@ phole stm");
9018 output_asm_insn (buf, operands);
9022 /* Routines for use in generating RTL. */
9025 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9026 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9028 HOST_WIDE_INT offset = *offsetp;
9031 int sign = up ? 1 : -1;
9034 /* XScale has load-store double instructions, but they have stricter
9035 alignment requirements than load-store multiple, so we cannot
9038 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9039 the pipeline until completion.
9047 An ldr instruction takes 1-3 cycles, but does not block the
9056 Best case ldr will always win. However, the more ldr instructions
9057 we issue, the less likely we are to be able to schedule them well.
9058 Using ldr instructions also increases code size.
9060 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9061 for counts of 3 or 4 regs. */
9062 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9068 for (i = 0; i < count; i++)
9070 addr = plus_constant (from, i * 4 * sign);
9071 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9072 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9078 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9088 result = gen_rtx_PARALLEL (VOIDmode,
9089 rtvec_alloc (count + (write_back ? 1 : 0)));
9092 XVECEXP (result, 0, 0)
9093 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9098 for (j = 0; i < count; i++, j++)
9100 addr = plus_constant (from, j * 4 * sign);
9101 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9102 XVECEXP (result, 0, i)
9103 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9114 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9115 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9117 HOST_WIDE_INT offset = *offsetp;
9120 int sign = up ? 1 : -1;
9123 /* See arm_gen_load_multiple for discussion of
9124 the pros/cons of ldm/stm usage for XScale. */
9125 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9131 for (i = 0; i < count; i++)
9133 addr = plus_constant (to, i * 4 * sign);
9134 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9135 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9141 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9151 result = gen_rtx_PARALLEL (VOIDmode,
9152 rtvec_alloc (count + (write_back ? 1 : 0)));
9155 XVECEXP (result, 0, 0)
9156 = gen_rtx_SET (VOIDmode, to,
9157 plus_constant (to, count * 4 * sign));
9162 for (j = 0; i < count; i++, j++)
9164 addr = plus_constant (to, j * 4 * sign);
9165 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9166 XVECEXP (result, 0, i)
9167 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9178 arm_gen_movmemqi (rtx *operands)
9180 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9181 HOST_WIDE_INT srcoffset, dstoffset;
9183 rtx src, dst, srcbase, dstbase;
9184 rtx part_bytes_reg = NULL;
9187 if (GET_CODE (operands[2]) != CONST_INT
9188 || GET_CODE (operands[3]) != CONST_INT
9189 || INTVAL (operands[2]) > 64
9190 || INTVAL (operands[3]) & 3)
9193 dstbase = operands[0];
9194 srcbase = operands[1];
9196 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9197 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9199 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9200 out_words_to_go = INTVAL (operands[2]) / 4;
9201 last_bytes = INTVAL (operands[2]) & 3;
9202 dstoffset = srcoffset = 0;
9204 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9205 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9207 for (i = 0; in_words_to_go >= 2; i+=4)
9209 if (in_words_to_go > 4)
9210 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9211 srcbase, &srcoffset));
9213 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9214 FALSE, srcbase, &srcoffset));
9216 if (out_words_to_go)
9218 if (out_words_to_go > 4)
9219 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9220 dstbase, &dstoffset));
9221 else if (out_words_to_go != 1)
9222 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9226 dstbase, &dstoffset));
9229 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9230 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9231 if (last_bytes != 0)
9233 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9239 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9240 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9243 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9244 if (out_words_to_go)
9248 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9249 sreg = copy_to_reg (mem);
9251 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9252 emit_move_insn (mem, sreg);
9255 gcc_assert (!in_words_to_go); /* Sanity check */
9260 gcc_assert (in_words_to_go > 0);
9262 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9263 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9266 gcc_assert (!last_bytes || part_bytes_reg);
9268 if (BYTES_BIG_ENDIAN && last_bytes)
9270 rtx tmp = gen_reg_rtx (SImode);
9272 /* The bytes we want are in the top end of the word. */
9273 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9274 GEN_INT (8 * (4 - last_bytes))));
9275 part_bytes_reg = tmp;
9279 mem = adjust_automodify_address (dstbase, QImode,
9280 plus_constant (dst, last_bytes - 1),
9281 dstoffset + last_bytes - 1);
9282 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9286 tmp = gen_reg_rtx (SImode);
9287 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9288 part_bytes_reg = tmp;
9297 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9298 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9302 rtx tmp = gen_reg_rtx (SImode);
9303 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9304 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9305 part_bytes_reg = tmp;
9312 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9313 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9320 /* Select a dominance comparison mode if possible for a test of the general
9321 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9322 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9323 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9324 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9325 In all cases OP will be either EQ or NE, but we don't need to know which
9326 here. If we are unable to support a dominance comparison we return
9327 CC mode. This will then fail to match for the RTL expressions that
9328 generate this call. */
9330 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9332 enum rtx_code cond1, cond2;
9335 /* Currently we will probably get the wrong result if the individual
9336 comparisons are not simple. This also ensures that it is safe to
9337 reverse a comparison if necessary. */
9338 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9340 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9344 /* The if_then_else variant of this tests the second condition if the
9345 first passes, but is true if the first fails. Reverse the first
9346 condition to get a true "inclusive-or" expression. */
9347 if (cond_or == DOM_CC_NX_OR_Y)
9348 cond1 = reverse_condition (cond1);
9350 /* If the comparisons are not equal, and one doesn't dominate the other,
9351 then we can't do this. */
9353 && !comparison_dominates_p (cond1, cond2)
9354 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9359 enum rtx_code temp = cond1;
9367 if (cond_or == DOM_CC_X_AND_Y)
9372 case EQ: return CC_DEQmode;
9373 case LE: return CC_DLEmode;
9374 case LEU: return CC_DLEUmode;
9375 case GE: return CC_DGEmode;
9376 case GEU: return CC_DGEUmode;
9377 default: gcc_unreachable ();
9381 if (cond_or == DOM_CC_X_AND_Y)
9397 if (cond_or == DOM_CC_X_AND_Y)
9413 if (cond_or == DOM_CC_X_AND_Y)
9429 if (cond_or == DOM_CC_X_AND_Y)
9444 /* The remaining cases only occur when both comparisons are the
9447 gcc_assert (cond1 == cond2);
9451 gcc_assert (cond1 == cond2);
9455 gcc_assert (cond1 == cond2);
9459 gcc_assert (cond1 == cond2);
9463 gcc_assert (cond1 == cond2);
9472 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9474 /* All floating point compares return CCFP if it is an equality
9475 comparison, and CCFPE otherwise. */
9476 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9496 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9505 /* A compare with a shifted operand. Because of canonicalization, the
9506 comparison will have to be swapped when we emit the assembler. */
9507 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
9508 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9509 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9510 || GET_CODE (x) == ROTATERT))
9513 /* This operation is performed swapped, but since we only rely on the Z
9514 flag we don't need an additional mode. */
9515 if (GET_MODE (y) == SImode && REG_P (y)
9516 && GET_CODE (x) == NEG
9517 && (op == EQ || op == NE))
9520 /* This is a special case that is used by combine to allow a
9521 comparison of a shifted byte load to be split into a zero-extend
9522 followed by a comparison of the shifted integer (only valid for
9523 equalities and unsigned inequalities). */
9524 if (GET_MODE (x) == SImode
9525 && GET_CODE (x) == ASHIFT
9526 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9527 && GET_CODE (XEXP (x, 0)) == SUBREG
9528 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9529 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9530 && (op == EQ || op == NE
9531 || op == GEU || op == GTU || op == LTU || op == LEU)
9532 && GET_CODE (y) == CONST_INT)
9535 /* A construct for a conditional compare, if the false arm contains
9536 0, then both conditions must be true, otherwise either condition
9537 must be true. Not all conditions are possible, so CCmode is
9538 returned if it can't be done. */
9539 if (GET_CODE (x) == IF_THEN_ELSE
9540 && (XEXP (x, 2) == const0_rtx
9541 || XEXP (x, 2) == const1_rtx)
9542 && COMPARISON_P (XEXP (x, 0))
9543 && COMPARISON_P (XEXP (x, 1)))
9544 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9545 INTVAL (XEXP (x, 2)));
9547 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9548 if (GET_CODE (x) == AND
9549 && COMPARISON_P (XEXP (x, 0))
9550 && COMPARISON_P (XEXP (x, 1)))
9551 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9554 if (GET_CODE (x) == IOR
9555 && COMPARISON_P (XEXP (x, 0))
9556 && COMPARISON_P (XEXP (x, 1)))
9557 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9560 /* An operation (on Thumb) where we want to test for a single bit.
9561 This is done by shifting that bit up into the top bit of a
9562 scratch register; we can then branch on the sign bit. */
9564 && GET_MODE (x) == SImode
9565 && (op == EQ || op == NE)
9566 && GET_CODE (x) == ZERO_EXTRACT
9567 && XEXP (x, 1) == const1_rtx)
9570 /* An operation that sets the condition codes as a side-effect, the
9571 V flag is not set correctly, so we can only use comparisons where
9572 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9574 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9575 if (GET_MODE (x) == SImode
9577 && (op == EQ || op == NE || op == LT || op == GE)
9578 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9579 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9580 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9581 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9582 || GET_CODE (x) == LSHIFTRT
9583 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9584 || GET_CODE (x) == ROTATERT
9585 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9588 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9591 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9592 && GET_CODE (x) == PLUS
9593 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9599 /* X and Y are two things to compare using CODE. Emit the compare insn and
9600 return the rtx for register 0 in the proper mode. FP means this is a
9601 floating point compare: I don't think that it is needed on the arm. */
9603 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9605 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9606 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9608 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9613 /* Generate a sequence of insns that will generate the correct return
9614 address mask depending on the physical architecture that the program
9617 arm_gen_return_addr_mask (void)
9619 rtx reg = gen_reg_rtx (Pmode);
9621 emit_insn (gen_return_addr_mask (reg));
9626 arm_reload_in_hi (rtx *operands)
9628 rtx ref = operands[1];
9630 HOST_WIDE_INT offset = 0;
9632 if (GET_CODE (ref) == SUBREG)
9634 offset = SUBREG_BYTE (ref);
9635 ref = SUBREG_REG (ref);
9638 if (GET_CODE (ref) == REG)
9640 /* We have a pseudo which has been spilt onto the stack; there
9641 are two cases here: the first where there is a simple
9642 stack-slot replacement and a second where the stack-slot is
9643 out of range, or is used as a subreg. */
9644 if (reg_equiv_mem[REGNO (ref)])
9646 ref = reg_equiv_mem[REGNO (ref)];
9647 base = find_replacement (&XEXP (ref, 0));
9650 /* The slot is out of range, or was dressed up in a SUBREG. */
9651 base = reg_equiv_address[REGNO (ref)];
9654 base = find_replacement (&XEXP (ref, 0));
9656 /* Handle the case where the address is too complex to be offset by 1. */
9657 if (GET_CODE (base) == MINUS
9658 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9660 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9662 emit_set_insn (base_plus, base);
9665 else if (GET_CODE (base) == PLUS)
9667 /* The addend must be CONST_INT, or we would have dealt with it above. */
9668 HOST_WIDE_INT hi, lo;
9670 offset += INTVAL (XEXP (base, 1));
9671 base = XEXP (base, 0);
9673 /* Rework the address into a legal sequence of insns. */
9674 /* Valid range for lo is -4095 -> 4095 */
9677 : -((-offset) & 0xfff));
9679 /* Corner case, if lo is the max offset then we would be out of range
9680 once we have added the additional 1 below, so bump the msb into the
9681 pre-loading insn(s). */
9685 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9686 ^ (HOST_WIDE_INT) 0x80000000)
9687 - (HOST_WIDE_INT) 0x80000000);
9689 gcc_assert (hi + lo == offset);
9693 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9695 /* Get the base address; addsi3 knows how to handle constants
9696 that require more than one insn. */
9697 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9703 /* Operands[2] may overlap operands[0] (though it won't overlap
9704 operands[1]), that's why we asked for a DImode reg -- so we can
9705 use the bit that does not overlap. */
9706 if (REGNO (operands[2]) == REGNO (operands[0]))
9707 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9709 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9711 emit_insn (gen_zero_extendqisi2 (scratch,
9712 gen_rtx_MEM (QImode,
9713 plus_constant (base,
9715 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
9716 gen_rtx_MEM (QImode,
9717 plus_constant (base,
9719 if (!BYTES_BIG_ENDIAN)
9720 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9721 gen_rtx_IOR (SImode,
9724 gen_rtx_SUBREG (SImode, operands[0], 0),
9728 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9729 gen_rtx_IOR (SImode,
9730 gen_rtx_ASHIFT (SImode, scratch,
9732 gen_rtx_SUBREG (SImode, operands[0], 0)));
9735 /* Handle storing a half-word to memory during reload by synthesizing as two
9736 byte stores. Take care not to clobber the input values until after we
9737 have moved them somewhere safe. This code assumes that if the DImode
9738 scratch in operands[2] overlaps either the input value or output address
9739 in some way, then that value must die in this insn (we absolutely need
9740 two scratch registers for some corner cases). */
9742 arm_reload_out_hi (rtx *operands)
9744 rtx ref = operands[0];
9745 rtx outval = operands[1];
9747 HOST_WIDE_INT offset = 0;
9749 if (GET_CODE (ref) == SUBREG)
9751 offset = SUBREG_BYTE (ref);
9752 ref = SUBREG_REG (ref);
9755 if (GET_CODE (ref) == REG)
9757 /* We have a pseudo which has been spilt onto the stack; there
9758 are two cases here: the first where there is a simple
9759 stack-slot replacement and a second where the stack-slot is
9760 out of range, or is used as a subreg. */
9761 if (reg_equiv_mem[REGNO (ref)])
9763 ref = reg_equiv_mem[REGNO (ref)];
9764 base = find_replacement (&XEXP (ref, 0));
9767 /* The slot is out of range, or was dressed up in a SUBREG. */
9768 base = reg_equiv_address[REGNO (ref)];
9771 base = find_replacement (&XEXP (ref, 0));
9773 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9775 /* Handle the case where the address is too complex to be offset by 1. */
9776 if (GET_CODE (base) == MINUS
9777 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9779 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9781 /* Be careful not to destroy OUTVAL. */
9782 if (reg_overlap_mentioned_p (base_plus, outval))
9784 /* Updating base_plus might destroy outval, see if we can
9785 swap the scratch and base_plus. */
9786 if (!reg_overlap_mentioned_p (scratch, outval))
9789 scratch = base_plus;
9794 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9796 /* Be conservative and copy OUTVAL into the scratch now,
9797 this should only be necessary if outval is a subreg
9798 of something larger than a word. */
9799 /* XXX Might this clobber base? I can't see how it can,
9800 since scratch is known to overlap with OUTVAL, and
9801 must be wider than a word. */
9802 emit_insn (gen_movhi (scratch_hi, outval));
9803 outval = scratch_hi;
9807 emit_set_insn (base_plus, base);
9810 else if (GET_CODE (base) == PLUS)
9812 /* The addend must be CONST_INT, or we would have dealt with it above. */
9813 HOST_WIDE_INT hi, lo;
9815 offset += INTVAL (XEXP (base, 1));
9816 base = XEXP (base, 0);
9818 /* Rework the address into a legal sequence of insns. */
9819 /* Valid range for lo is -4095 -> 4095 */
9822 : -((-offset) & 0xfff));
9824 /* Corner case, if lo is the max offset then we would be out of range
9825 once we have added the additional 1 below, so bump the msb into the
9826 pre-loading insn(s). */
9830 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9831 ^ (HOST_WIDE_INT) 0x80000000)
9832 - (HOST_WIDE_INT) 0x80000000);
9834 gcc_assert (hi + lo == offset);
9838 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9840 /* Be careful not to destroy OUTVAL. */
9841 if (reg_overlap_mentioned_p (base_plus, outval))
9843 /* Updating base_plus might destroy outval, see if we
9844 can swap the scratch and base_plus. */
9845 if (!reg_overlap_mentioned_p (scratch, outval))
9848 scratch = base_plus;
9853 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9855 /* Be conservative and copy outval into scratch now,
9856 this should only be necessary if outval is a
9857 subreg of something larger than a word. */
9858 /* XXX Might this clobber base? I can't see how it
9859 can, since scratch is known to overlap with
9861 emit_insn (gen_movhi (scratch_hi, outval));
9862 outval = scratch_hi;
9866 /* Get the base address; addsi3 knows how to handle constants
9867 that require more than one insn. */
9868 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9874 if (BYTES_BIG_ENDIAN)
9876 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9877 plus_constant (base, offset + 1)),
9878 gen_lowpart (QImode, outval)));
9879 emit_insn (gen_lshrsi3 (scratch,
9880 gen_rtx_SUBREG (SImode, outval, 0),
9882 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9883 gen_lowpart (QImode, scratch)));
9887 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9888 gen_lowpart (QImode, outval)));
9889 emit_insn (gen_lshrsi3 (scratch,
9890 gen_rtx_SUBREG (SImode, outval, 0),
9892 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9893 plus_constant (base, offset + 1)),
9894 gen_lowpart (QImode, scratch)));
9898 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
9899 (padded to the size of a word) should be passed in a register. */
9902 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
9904 if (TARGET_AAPCS_BASED)
9905 return must_pass_in_stack_var_size (mode, type);
9907 return must_pass_in_stack_var_size_or_pad (mode, type);
9911 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
9912 Return true if an argument passed on the stack should be padded upwards,
9913 i.e. if the least-significant byte has useful data.
9914 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
9915 aggregate types are placed in the lowest memory address. */
9918 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
9920 if (!TARGET_AAPCS_BASED)
9921 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
9923 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
9930 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
9931 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
9932 byte of the register has useful data, and return the opposite if the
9933 most significant byte does.
9934 For AAPCS, small aggregates and small complex types are always padded
9938 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
9939 tree type, int first ATTRIBUTE_UNUSED)
9941 if (TARGET_AAPCS_BASED
9943 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
9944 && int_size_in_bytes (type) <= 4)
9947 /* Otherwise, use default padding. */
9948 return !BYTES_BIG_ENDIAN;
9952 /* Print a symbolic form of X to the debug file, F. */
9954 arm_print_value (FILE *f, rtx x)
9956 switch (GET_CODE (x))
9959 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
9963 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
9971 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
9973 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
9974 if (i < (CONST_VECTOR_NUNITS (x) - 1))
9982 fprintf (f, "\"%s\"", XSTR (x, 0));
9986 fprintf (f, "`%s'", XSTR (x, 0));
9990 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
9994 arm_print_value (f, XEXP (x, 0));
9998 arm_print_value (f, XEXP (x, 0));
10000 arm_print_value (f, XEXP (x, 1));
10008 fprintf (f, "????");
10013 /* Routines for manipulation of the constant pool. */
10015 /* Arm instructions cannot load a large constant directly into a
10016 register; they have to come from a pc relative load. The constant
10017 must therefore be placed in the addressable range of the pc
10018 relative load. Depending on the precise pc relative load
10019 instruction the range is somewhere between 256 bytes and 4k. This
10020 means that we often have to dump a constant inside a function, and
10021 generate code to branch around it.
10023 It is important to minimize this, since the branches will slow
10024 things down and make the code larger.
10026 Normally we can hide the table after an existing unconditional
10027 branch so that there is no interruption of the flow, but in the
10028 worst case the code looks like this:
10046 We fix this by performing a scan after scheduling, which notices
10047 which instructions need to have their operands fetched from the
10048 constant table and builds the table.
10050 The algorithm starts by building a table of all the constants that
10051 need fixing up and all the natural barriers in the function (places
10052 where a constant table can be dropped without breaking the flow).
10053 For each fixup we note how far the pc-relative replacement will be
10054 able to reach and the offset of the instruction into the function.
10056 Having built the table we then group the fixes together to form
10057 tables that are as large as possible (subject to addressing
10058 constraints) and emit each table of constants after the last
10059 barrier that is within range of all the instructions in the group.
10060 If a group does not contain a barrier, then we forcibly create one
10061 by inserting a jump instruction into the flow. Once the table has
10062 been inserted, the insns are then modified to reference the
10063 relevant entry in the pool.
10065 Possible enhancements to the algorithm (not implemented) are:
10067 1) For some processors and object formats, there may be benefit in
10068 aligning the pools to the start of cache lines; this alignment
10069 would need to be taken into account when calculating addressability
10072 /* These typedefs are located at the start of this file, so that
10073 they can be used in the prototypes there. This comment is to
10074 remind readers of that fact so that the following structures
10075 can be understood more easily.
10077 typedef struct minipool_node Mnode;
10078 typedef struct minipool_fixup Mfix; */
10080 struct minipool_node
10082 /* Doubly linked chain of entries. */
10085 /* The maximum offset into the code that this entry can be placed. While
10086 pushing fixes for forward references, all entries are sorted in order
10087 of increasing max_address. */
10088 HOST_WIDE_INT max_address;
10089 /* Similarly for an entry inserted for a backwards ref. */
10090 HOST_WIDE_INT min_address;
10091 /* The number of fixes referencing this entry. This can become zero
10092 if we "unpush" an entry. In this case we ignore the entry when we
10093 come to emit the code. */
10095 /* The offset from the start of the minipool. */
10096 HOST_WIDE_INT offset;
10097 /* The value in table. */
10099 /* The mode of value. */
10100 enum machine_mode mode;
10101 /* The size of the value. With iWMMXt enabled
10102 sizes > 4 also imply an alignment of 8-bytes. */
10106 struct minipool_fixup
10110 HOST_WIDE_INT address;
10112 enum machine_mode mode;
10116 HOST_WIDE_INT forwards;
10117 HOST_WIDE_INT backwards;
10120 /* Fixes less than a word need padding out to a word boundary. */
10121 #define MINIPOOL_FIX_SIZE(mode) \
10122 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10124 static Mnode * minipool_vector_head;
10125 static Mnode * minipool_vector_tail;
10126 static rtx minipool_vector_label;
10127 static int minipool_pad;
10129 /* The linked list of all minipool fixes required for this function. */
10130 Mfix * minipool_fix_head;
10131 Mfix * minipool_fix_tail;
10132 /* The fix entry for the current minipool, once it has been placed. */
10133 Mfix * minipool_barrier;
10135 /* Determines if INSN is the start of a jump table. Returns the end
10136 of the TABLE or NULL_RTX. */
10138 is_jump_table (rtx insn)
10142 if (GET_CODE (insn) == JUMP_INSN
10143 && JUMP_LABEL (insn) != NULL
10144 && ((table = next_real_insn (JUMP_LABEL (insn)))
10145 == next_real_insn (insn))
10147 && GET_CODE (table) == JUMP_INSN
10148 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10149 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10155 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10156 #define JUMP_TABLES_IN_TEXT_SECTION 0
10159 static HOST_WIDE_INT
10160 get_jump_table_size (rtx insn)
10162 /* ADDR_VECs only take room if read-only data does into the text
10164 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10166 rtx body = PATTERN (insn);
10167 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10168 HOST_WIDE_INT size;
10169 HOST_WIDE_INT modesize;
10171 modesize = GET_MODE_SIZE (GET_MODE (body));
10172 size = modesize * XVECLEN (body, elt);
10176 /* Round up size of TBB table to a halfword boundary. */
10177 size = (size + 1) & ~(HOST_WIDE_INT)1;
10180 /* No padding necessary for TBH. */
10183 /* Add two bytes for alignment on Thumb. */
10188 gcc_unreachable ();
10196 /* Move a minipool fix MP from its current location to before MAX_MP.
10197 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10198 constraints may need updating. */
10200 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10201 HOST_WIDE_INT max_address)
10203 /* The code below assumes these are different. */
10204 gcc_assert (mp != max_mp);
10206 if (max_mp == NULL)
10208 if (max_address < mp->max_address)
10209 mp->max_address = max_address;
10213 if (max_address > max_mp->max_address - mp->fix_size)
10214 mp->max_address = max_mp->max_address - mp->fix_size;
10216 mp->max_address = max_address;
10218 /* Unlink MP from its current position. Since max_mp is non-null,
10219 mp->prev must be non-null. */
10220 mp->prev->next = mp->next;
10221 if (mp->next != NULL)
10222 mp->next->prev = mp->prev;
10224 minipool_vector_tail = mp->prev;
10226 /* Re-insert it before MAX_MP. */
10228 mp->prev = max_mp->prev;
10231 if (mp->prev != NULL)
10232 mp->prev->next = mp;
10234 minipool_vector_head = mp;
10237 /* Save the new entry. */
10240 /* Scan over the preceding entries and adjust their addresses as
10242 while (mp->prev != NULL
10243 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10245 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10252 /* Add a constant to the minipool for a forward reference. Returns the
10253 node added or NULL if the constant will not fit in this pool. */
10255 add_minipool_forward_ref (Mfix *fix)
10257 /* If set, max_mp is the first pool_entry that has a lower
10258 constraint than the one we are trying to add. */
10259 Mnode * max_mp = NULL;
10260 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10263 /* If the minipool starts before the end of FIX->INSN then this FIX
10264 can not be placed into the current pool. Furthermore, adding the
10265 new constant pool entry may cause the pool to start FIX_SIZE bytes
10267 if (minipool_vector_head &&
10268 (fix->address + get_attr_length (fix->insn)
10269 >= minipool_vector_head->max_address - fix->fix_size))
10272 /* Scan the pool to see if a constant with the same value has
10273 already been added. While we are doing this, also note the
10274 location where we must insert the constant if it doesn't already
10276 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10278 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10279 && fix->mode == mp->mode
10280 && (GET_CODE (fix->value) != CODE_LABEL
10281 || (CODE_LABEL_NUMBER (fix->value)
10282 == CODE_LABEL_NUMBER (mp->value)))
10283 && rtx_equal_p (fix->value, mp->value))
10285 /* More than one fix references this entry. */
10287 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10290 /* Note the insertion point if necessary. */
10292 && mp->max_address > max_address)
10295 /* If we are inserting an 8-bytes aligned quantity and
10296 we have not already found an insertion point, then
10297 make sure that all such 8-byte aligned quantities are
10298 placed at the start of the pool. */
10299 if (ARM_DOUBLEWORD_ALIGN
10301 && fix->fix_size >= 8
10302 && mp->fix_size < 8)
10305 max_address = mp->max_address;
10309 /* The value is not currently in the minipool, so we need to create
10310 a new entry for it. If MAX_MP is NULL, the entry will be put on
10311 the end of the list since the placement is less constrained than
10312 any existing entry. Otherwise, we insert the new fix before
10313 MAX_MP and, if necessary, adjust the constraints on the other
10316 mp->fix_size = fix->fix_size;
10317 mp->mode = fix->mode;
10318 mp->value = fix->value;
10320 /* Not yet required for a backwards ref. */
10321 mp->min_address = -65536;
10323 if (max_mp == NULL)
10325 mp->max_address = max_address;
10327 mp->prev = minipool_vector_tail;
10329 if (mp->prev == NULL)
10331 minipool_vector_head = mp;
10332 minipool_vector_label = gen_label_rtx ();
10335 mp->prev->next = mp;
10337 minipool_vector_tail = mp;
10341 if (max_address > max_mp->max_address - mp->fix_size)
10342 mp->max_address = max_mp->max_address - mp->fix_size;
10344 mp->max_address = max_address;
10347 mp->prev = max_mp->prev;
10349 if (mp->prev != NULL)
10350 mp->prev->next = mp;
10352 minipool_vector_head = mp;
10355 /* Save the new entry. */
10358 /* Scan over the preceding entries and adjust their addresses as
10360 while (mp->prev != NULL
10361 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10363 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10371 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10372 HOST_WIDE_INT min_address)
10374 HOST_WIDE_INT offset;
10376 /* The code below assumes these are different. */
10377 gcc_assert (mp != min_mp);
10379 if (min_mp == NULL)
10381 if (min_address > mp->min_address)
10382 mp->min_address = min_address;
10386 /* We will adjust this below if it is too loose. */
10387 mp->min_address = min_address;
10389 /* Unlink MP from its current position. Since min_mp is non-null,
10390 mp->next must be non-null. */
10391 mp->next->prev = mp->prev;
10392 if (mp->prev != NULL)
10393 mp->prev->next = mp->next;
10395 minipool_vector_head = mp->next;
10397 /* Reinsert it after MIN_MP. */
10399 mp->next = min_mp->next;
10401 if (mp->next != NULL)
10402 mp->next->prev = mp;
10404 minipool_vector_tail = mp;
10410 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10412 mp->offset = offset;
10413 if (mp->refcount > 0)
10414 offset += mp->fix_size;
10416 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10417 mp->next->min_address = mp->min_address + mp->fix_size;
10423 /* Add a constant to the minipool for a backward reference. Returns the
10424 node added or NULL if the constant will not fit in this pool.
10426 Note that the code for insertion for a backwards reference can be
10427 somewhat confusing because the calculated offsets for each fix do
10428 not take into account the size of the pool (which is still under
10431 add_minipool_backward_ref (Mfix *fix)
10433 /* If set, min_mp is the last pool_entry that has a lower constraint
10434 than the one we are trying to add. */
10435 Mnode *min_mp = NULL;
10436 /* This can be negative, since it is only a constraint. */
10437 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10440 /* If we can't reach the current pool from this insn, or if we can't
10441 insert this entry at the end of the pool without pushing other
10442 fixes out of range, then we don't try. This ensures that we
10443 can't fail later on. */
10444 if (min_address >= minipool_barrier->address
10445 || (minipool_vector_tail->min_address + fix->fix_size
10446 >= minipool_barrier->address))
10449 /* Scan the pool to see if a constant with the same value has
10450 already been added. While we are doing this, also note the
10451 location where we must insert the constant if it doesn't already
10453 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10455 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10456 && fix->mode == mp->mode
10457 && (GET_CODE (fix->value) != CODE_LABEL
10458 || (CODE_LABEL_NUMBER (fix->value)
10459 == CODE_LABEL_NUMBER (mp->value)))
10460 && rtx_equal_p (fix->value, mp->value)
10461 /* Check that there is enough slack to move this entry to the
10462 end of the table (this is conservative). */
10463 && (mp->max_address
10464 > (minipool_barrier->address
10465 + minipool_vector_tail->offset
10466 + minipool_vector_tail->fix_size)))
10469 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10472 if (min_mp != NULL)
10473 mp->min_address += fix->fix_size;
10476 /* Note the insertion point if necessary. */
10477 if (mp->min_address < min_address)
10479 /* For now, we do not allow the insertion of 8-byte alignment
10480 requiring nodes anywhere but at the start of the pool. */
10481 if (ARM_DOUBLEWORD_ALIGN
10482 && fix->fix_size >= 8 && mp->fix_size < 8)
10487 else if (mp->max_address
10488 < minipool_barrier->address + mp->offset + fix->fix_size)
10490 /* Inserting before this entry would push the fix beyond
10491 its maximum address (which can happen if we have
10492 re-located a forwards fix); force the new fix to come
10494 if (ARM_DOUBLEWORD_ALIGN
10495 && fix->fix_size >= 8 && mp->fix_size < 8)
10500 min_address = mp->min_address + fix->fix_size;
10503 /* Do not insert a non-8-byte aligned quantity before 8-byte
10504 aligned quantities. */
10505 else if (ARM_DOUBLEWORD_ALIGN
10506 && fix->fix_size < 8
10507 && mp->fix_size >= 8)
10510 min_address = mp->min_address + fix->fix_size;
10515 /* We need to create a new entry. */
10517 mp->fix_size = fix->fix_size;
10518 mp->mode = fix->mode;
10519 mp->value = fix->value;
10521 mp->max_address = minipool_barrier->address + 65536;
10523 mp->min_address = min_address;
10525 if (min_mp == NULL)
10528 mp->next = minipool_vector_head;
10530 if (mp->next == NULL)
10532 minipool_vector_tail = mp;
10533 minipool_vector_label = gen_label_rtx ();
10536 mp->next->prev = mp;
10538 minipool_vector_head = mp;
10542 mp->next = min_mp->next;
10546 if (mp->next != NULL)
10547 mp->next->prev = mp;
10549 minipool_vector_tail = mp;
10552 /* Save the new entry. */
10560 /* Scan over the following entries and adjust their offsets. */
10561 while (mp->next != NULL)
10563 if (mp->next->min_address < mp->min_address + mp->fix_size)
10564 mp->next->min_address = mp->min_address + mp->fix_size;
10567 mp->next->offset = mp->offset + mp->fix_size;
10569 mp->next->offset = mp->offset;
10578 assign_minipool_offsets (Mfix *barrier)
10580 HOST_WIDE_INT offset = 0;
10583 minipool_barrier = barrier;
10585 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10587 mp->offset = offset;
10589 if (mp->refcount > 0)
10590 offset += mp->fix_size;
10594 /* Output the literal table */
10596 dump_minipool (rtx scan)
10602 if (ARM_DOUBLEWORD_ALIGN)
10603 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10604 if (mp->refcount > 0 && mp->fix_size >= 8)
10611 fprintf (dump_file,
10612 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10613 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10615 scan = emit_label_after (gen_label_rtx (), scan);
10616 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10617 scan = emit_label_after (minipool_vector_label, scan);
10619 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10621 if (mp->refcount > 0)
10625 fprintf (dump_file,
10626 ";; Offset %u, min %ld, max %ld ",
10627 (unsigned) mp->offset, (unsigned long) mp->min_address,
10628 (unsigned long) mp->max_address);
10629 arm_print_value (dump_file, mp->value);
10630 fputc ('\n', dump_file);
10633 switch (mp->fix_size)
10635 #ifdef HAVE_consttable_1
10637 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10641 #ifdef HAVE_consttable_2
10643 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10647 #ifdef HAVE_consttable_4
10649 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10653 #ifdef HAVE_consttable_8
10655 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10659 #ifdef HAVE_consttable_16
10661 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10666 gcc_unreachable ();
10674 minipool_vector_head = minipool_vector_tail = NULL;
10675 scan = emit_insn_after (gen_consttable_end (), scan);
10676 scan = emit_barrier_after (scan);
10679 /* Return the cost of forcibly inserting a barrier after INSN. */
10681 arm_barrier_cost (rtx insn)
10683 /* Basing the location of the pool on the loop depth is preferable,
10684 but at the moment, the basic block information seems to be
10685 corrupt by this stage of the compilation. */
10686 int base_cost = 50;
10687 rtx next = next_nonnote_insn (insn);
10689 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10692 switch (GET_CODE (insn))
10695 /* It will always be better to place the table before the label, rather
10704 return base_cost - 10;
10707 return base_cost + 10;
10711 /* Find the best place in the insn stream in the range
10712 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
10713 Create the barrier by inserting a jump and add a new fix entry for
10716 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
10718 HOST_WIDE_INT count = 0;
10720 rtx from = fix->insn;
10721 /* The instruction after which we will insert the jump. */
10722 rtx selected = NULL;
10724 /* The address at which the jump instruction will be placed. */
10725 HOST_WIDE_INT selected_address;
10727 HOST_WIDE_INT max_count = max_address - fix->address;
10728 rtx label = gen_label_rtx ();
10730 selected_cost = arm_barrier_cost (from);
10731 selected_address = fix->address;
10733 while (from && count < max_count)
10738 /* This code shouldn't have been called if there was a natural barrier
10740 gcc_assert (GET_CODE (from) != BARRIER);
10742 /* Count the length of this insn. */
10743 count += get_attr_length (from);
10745 /* If there is a jump table, add its length. */
10746 tmp = is_jump_table (from);
10749 count += get_jump_table_size (tmp);
10751 /* Jump tables aren't in a basic block, so base the cost on
10752 the dispatch insn. If we select this location, we will
10753 still put the pool after the table. */
10754 new_cost = arm_barrier_cost (from);
10756 if (count < max_count
10757 && (!selected || new_cost <= selected_cost))
10760 selected_cost = new_cost;
10761 selected_address = fix->address + count;
10764 /* Continue after the dispatch table. */
10765 from = NEXT_INSN (tmp);
10769 new_cost = arm_barrier_cost (from);
10771 if (count < max_count
10772 && (!selected || new_cost <= selected_cost))
10775 selected_cost = new_cost;
10776 selected_address = fix->address + count;
10779 from = NEXT_INSN (from);
10782 /* Make sure that we found a place to insert the jump. */
10783 gcc_assert (selected);
10785 /* Create a new JUMP_INSN that branches around a barrier. */
10786 from = emit_jump_insn_after (gen_jump (label), selected);
10787 JUMP_LABEL (from) = label;
10788 barrier = emit_barrier_after (from);
10789 emit_label_after (label, barrier);
10791 /* Create a minipool barrier entry for the new barrier. */
10792 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
10793 new_fix->insn = barrier;
10794 new_fix->address = selected_address;
10795 new_fix->next = fix->next;
10796 fix->next = new_fix;
10801 /* Record that there is a natural barrier in the insn stream at
10804 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
10806 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10809 fix->address = address;
10812 if (minipool_fix_head != NULL)
10813 minipool_fix_tail->next = fix;
10815 minipool_fix_head = fix;
10817 minipool_fix_tail = fix;
10820 /* Record INSN, which will need fixing up to load a value from the
10821 minipool. ADDRESS is the offset of the insn since the start of the
10822 function; LOC is a pointer to the part of the insn which requires
10823 fixing; VALUE is the constant that must be loaded, which is of type
10826 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
10827 enum machine_mode mode, rtx value)
10829 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10832 fix->address = address;
10835 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
10836 fix->value = value;
10837 fix->forwards = get_attr_pool_range (insn);
10838 fix->backwards = get_attr_neg_pool_range (insn);
10839 fix->minipool = NULL;
10841 /* If an insn doesn't have a range defined for it, then it isn't
10842 expecting to be reworked by this code. Better to stop now than
10843 to generate duff assembly code. */
10844 gcc_assert (fix->forwards || fix->backwards);
10846 /* If an entry requires 8-byte alignment then assume all constant pools
10847 require 4 bytes of padding. Trying to do this later on a per-pool
10848 basis is awkward because existing pool entries have to be modified. */
10849 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
10854 fprintf (dump_file,
10855 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
10856 GET_MODE_NAME (mode),
10857 INSN_UID (insn), (unsigned long) address,
10858 -1 * (long)fix->backwards, (long)fix->forwards);
10859 arm_print_value (dump_file, fix->value);
10860 fprintf (dump_file, "\n");
10863 /* Add it to the chain of fixes. */
10866 if (minipool_fix_head != NULL)
10867 minipool_fix_tail->next = fix;
10869 minipool_fix_head = fix;
10871 minipool_fix_tail = fix;
10874 /* Return the cost of synthesizing a 64-bit constant VAL inline.
10875 Returns the number of insns needed, or 99 if we don't know how to
10878 arm_const_double_inline_cost (rtx val)
10880 rtx lowpart, highpart;
10881 enum machine_mode mode;
10883 mode = GET_MODE (val);
10885 if (mode == VOIDmode)
10888 gcc_assert (GET_MODE_SIZE (mode) == 8);
10890 lowpart = gen_lowpart (SImode, val);
10891 highpart = gen_highpart_mode (SImode, mode, val);
10893 gcc_assert (GET_CODE (lowpart) == CONST_INT);
10894 gcc_assert (GET_CODE (highpart) == CONST_INT);
10896 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
10897 NULL_RTX, NULL_RTX, 0, 0)
10898 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
10899 NULL_RTX, NULL_RTX, 0, 0));
10902 /* Return true if it is worthwhile to split a 64-bit constant into two
10903 32-bit operations. This is the case if optimizing for size, or
10904 if we have load delay slots, or if one 32-bit part can be done with
10905 a single data operation. */
10907 arm_const_double_by_parts (rtx val)
10909 enum machine_mode mode = GET_MODE (val);
10912 if (optimize_size || arm_ld_sched)
10915 if (mode == VOIDmode)
10918 part = gen_highpart_mode (SImode, mode, val);
10920 gcc_assert (GET_CODE (part) == CONST_INT);
10922 if (const_ok_for_arm (INTVAL (part))
10923 || const_ok_for_arm (~INTVAL (part)))
10926 part = gen_lowpart (SImode, val);
10928 gcc_assert (GET_CODE (part) == CONST_INT);
10930 if (const_ok_for_arm (INTVAL (part))
10931 || const_ok_for_arm (~INTVAL (part)))
10937 /* Scan INSN and note any of its operands that need fixing.
10938 If DO_PUSHES is false we do not actually push any of the fixups
10939 needed. The function returns TRUE if any fixups were needed/pushed.
10940 This is used by arm_memory_load_p() which needs to know about loads
10941 of constants that will be converted into minipool loads. */
10943 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
10945 bool result = false;
10948 extract_insn (insn);
10950 if (!constrain_operands (1))
10951 fatal_insn_not_found (insn);
10953 if (recog_data.n_alternatives == 0)
10956 /* Fill in recog_op_alt with information about the constraints of
10958 preprocess_constraints ();
10960 for (opno = 0; opno < recog_data.n_operands; opno++)
10962 /* Things we need to fix can only occur in inputs. */
10963 if (recog_data.operand_type[opno] != OP_IN)
10966 /* If this alternative is a memory reference, then any mention
10967 of constants in this alternative is really to fool reload
10968 into allowing us to accept one there. We need to fix them up
10969 now so that we output the right code. */
10970 if (recog_op_alt[opno][which_alternative].memory_ok)
10972 rtx op = recog_data.operand[opno];
10974 if (CONSTANT_P (op))
10977 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
10978 recog_data.operand_mode[opno], op);
10981 else if (GET_CODE (op) == MEM
10982 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
10983 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
10987 rtx cop = avoid_constant_pool_reference (op);
10989 /* Casting the address of something to a mode narrower
10990 than a word can cause avoid_constant_pool_reference()
10991 to return the pool reference itself. That's no good to
10992 us here. Lets just hope that we can use the
10993 constant pool value directly. */
10995 cop = get_pool_constant (XEXP (op, 0));
10997 push_minipool_fix (insn, address,
10998 recog_data.operand_loc[opno],
10999 recog_data.operand_mode[opno], cop);
11010 /* Gcc puts the pool in the wrong place for ARM, since we can only
11011 load addresses a limited distance around the pc. We do some
11012 special munging to move the constant pool values to the correct
11013 point in the code. */
11018 HOST_WIDE_INT address = 0;
11021 minipool_fix_head = minipool_fix_tail = NULL;
11023 /* The first insn must always be a note, or the code below won't
11024 scan it properly. */
11025 insn = get_insns ();
11026 gcc_assert (GET_CODE (insn) == NOTE);
11029 /* Scan all the insns and record the operands that will need fixing. */
11030 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11032 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11033 && (arm_cirrus_insn_p (insn)
11034 || GET_CODE (insn) == JUMP_INSN
11035 || arm_memory_load_p (insn)))
11036 cirrus_reorg (insn);
11038 if (GET_CODE (insn) == BARRIER)
11039 push_minipool_barrier (insn, address);
11040 else if (INSN_P (insn))
11044 note_invalid_constants (insn, address, true);
11045 address += get_attr_length (insn);
11047 /* If the insn is a vector jump, add the size of the table
11048 and skip the table. */
11049 if ((table = is_jump_table (insn)) != NULL)
11051 address += get_jump_table_size (table);
11057 fix = minipool_fix_head;
11059 /* Now scan the fixups and perform the required changes. */
11064 Mfix * last_added_fix;
11065 Mfix * last_barrier = NULL;
11068 /* Skip any further barriers before the next fix. */
11069 while (fix && GET_CODE (fix->insn) == BARRIER)
11072 /* No more fixes. */
11076 last_added_fix = NULL;
11078 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11080 if (GET_CODE (ftmp->insn) == BARRIER)
11082 if (ftmp->address >= minipool_vector_head->max_address)
11085 last_barrier = ftmp;
11087 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11090 last_added_fix = ftmp; /* Keep track of the last fix added. */
11093 /* If we found a barrier, drop back to that; any fixes that we
11094 could have reached but come after the barrier will now go in
11095 the next mini-pool. */
11096 if (last_barrier != NULL)
11098 /* Reduce the refcount for those fixes that won't go into this
11100 for (fdel = last_barrier->next;
11101 fdel && fdel != ftmp;
11104 fdel->minipool->refcount--;
11105 fdel->minipool = NULL;
11108 ftmp = last_barrier;
11112 /* ftmp is first fix that we can't fit into this pool and
11113 there no natural barriers that we could use. Insert a
11114 new barrier in the code somewhere between the previous
11115 fix and this one, and arrange to jump around it. */
11116 HOST_WIDE_INT max_address;
11118 /* The last item on the list of fixes must be a barrier, so
11119 we can never run off the end of the list of fixes without
11120 last_barrier being set. */
11123 max_address = minipool_vector_head->max_address;
11124 /* Check that there isn't another fix that is in range that
11125 we couldn't fit into this pool because the pool was
11126 already too large: we need to put the pool before such an
11127 instruction. The pool itself may come just after the
11128 fix because create_fix_barrier also allows space for a
11129 jump instruction. */
11130 if (ftmp->address < max_address)
11131 max_address = ftmp->address + 1;
11133 last_barrier = create_fix_barrier (last_added_fix, max_address);
11136 assign_minipool_offsets (last_barrier);
11140 if (GET_CODE (ftmp->insn) != BARRIER
11141 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11148 /* Scan over the fixes we have identified for this pool, fixing them
11149 up and adding the constants to the pool itself. */
11150 for (this_fix = fix; this_fix && ftmp != this_fix;
11151 this_fix = this_fix->next)
11152 if (GET_CODE (this_fix->insn) != BARRIER)
11155 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11156 minipool_vector_label),
11157 this_fix->minipool->offset);
11158 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11161 dump_minipool (last_barrier->insn);
11165 /* From now on we must synthesize any constants that we can't handle
11166 directly. This can happen if the RTL gets split during final
11167 instruction generation. */
11168 after_arm_reorg = 1;
11170 /* Free the minipool memory. */
11171 obstack_free (&minipool_obstack, minipool_startobj);
11174 /* Routines to output assembly language. */
11176 /* If the rtx is the correct value then return the string of the number.
11177 In this way we can ensure that valid double constants are generated even
11178 when cross compiling. */
11180 fp_immediate_constant (rtx x)
11185 if (!fp_consts_inited)
11188 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11189 for (i = 0; i < 8; i++)
11190 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11191 return strings_fp[i];
11193 gcc_unreachable ();
11196 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11197 static const char *
11198 fp_const_from_val (REAL_VALUE_TYPE *r)
11202 if (!fp_consts_inited)
11205 for (i = 0; i < 8; i++)
11206 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11207 return strings_fp[i];
11209 gcc_unreachable ();
11212 /* Output the operands of a LDM/STM instruction to STREAM.
11213 MASK is the ARM register set mask of which only bits 0-15 are important.
11214 REG is the base register, either the frame pointer or the stack pointer,
11215 INSTR is the possibly suffixed load or store instruction.
11216 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11219 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11220 unsigned long mask, int rfe)
11223 bool not_first = FALSE;
11225 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11226 fputc ('\t', stream);
11227 asm_fprintf (stream, instr, reg);
11228 fputc ('{', stream);
11230 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11231 if (mask & (1 << i))
11234 fprintf (stream, ", ");
11236 asm_fprintf (stream, "%r", i);
11241 fprintf (stream, "}^\n");
11243 fprintf (stream, "}\n");
11247 /* Output a FLDMD instruction to STREAM.
11248 BASE if the register containing the address.
11249 REG and COUNT specify the register range.
11250 Extra registers may be added to avoid hardware bugs.
11252 We output FLDMD even for ARMv5 VFP implementations. Although
11253 FLDMD is technically not supported until ARMv6, it is believed
11254 that all VFP implementations support its use in this context. */
11257 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11261 /* Workaround ARM10 VFPr1 bug. */
11262 if (count == 2 && !arm_arch6)
11269 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11270 load into multiple parts if we have to handle more than 16 registers. */
11273 vfp_output_fldmd (stream, base, reg, 16);
11274 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11278 fputc ('\t', stream);
11279 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11281 for (i = reg; i < reg + count; i++)
11284 fputs (", ", stream);
11285 asm_fprintf (stream, "d%d", i);
11287 fputs ("}\n", stream);
11292 /* Output the assembly for a store multiple. */
11295 vfp_output_fstmd (rtx * operands)
11302 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11303 p = strlen (pattern);
11305 gcc_assert (GET_CODE (operands[1]) == REG);
11307 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11308 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11310 p += sprintf (&pattern[p], ", d%d", base + i);
11312 strcpy (&pattern[p], "}");
11314 output_asm_insn (pattern, operands);
11319 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11320 number of bytes pushed. */
11323 vfp_emit_fstmd (int base_reg, int count)
11330 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11331 register pairs are stored by a store multiple insn. We avoid this
11332 by pushing an extra pair. */
11333 if (count == 2 && !arm_arch6)
11335 if (base_reg == LAST_VFP_REGNUM - 3)
11340 /* FSTMD may not store more than 16 doubleword registers at once. Split
11341 larger stores into multiple parts (up to a maximum of two, in
11346 /* NOTE: base_reg is an internal register number, so each D register
11348 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11349 saved += vfp_emit_fstmd (base_reg, 16);
11353 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11354 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11356 reg = gen_rtx_REG (DFmode, base_reg);
11359 XVECEXP (par, 0, 0)
11360 = gen_rtx_SET (VOIDmode,
11361 gen_frame_mem (BLKmode,
11362 gen_rtx_PRE_DEC (BLKmode,
11363 stack_pointer_rtx)),
11364 gen_rtx_UNSPEC (BLKmode,
11365 gen_rtvec (1, reg),
11366 UNSPEC_PUSH_MULT));
11368 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11369 plus_constant (stack_pointer_rtx, -(count * 8)));
11370 RTX_FRAME_RELATED_P (tmp) = 1;
11371 XVECEXP (dwarf, 0, 0) = tmp;
11373 tmp = gen_rtx_SET (VOIDmode,
11374 gen_frame_mem (DFmode, stack_pointer_rtx),
11376 RTX_FRAME_RELATED_P (tmp) = 1;
11377 XVECEXP (dwarf, 0, 1) = tmp;
11379 for (i = 1; i < count; i++)
11381 reg = gen_rtx_REG (DFmode, base_reg);
11383 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11385 tmp = gen_rtx_SET (VOIDmode,
11386 gen_frame_mem (DFmode,
11387 plus_constant (stack_pointer_rtx,
11390 RTX_FRAME_RELATED_P (tmp) = 1;
11391 XVECEXP (dwarf, 0, i + 1) = tmp;
11394 par = emit_insn (par);
11395 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11396 RTX_FRAME_RELATED_P (par) = 1;
11401 /* Emit a call instruction with pattern PAT. ADDR is the address of
11402 the call target. */
11405 arm_emit_call_insn (rtx pat, rtx addr)
11409 insn = emit_call_insn (pat);
11411 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11412 If the call might use such an entry, add a use of the PIC register
11413 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11414 if (TARGET_VXWORKS_RTP
11416 && GET_CODE (addr) == SYMBOL_REF
11417 && (SYMBOL_REF_DECL (addr)
11418 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11419 : !SYMBOL_REF_LOCAL_P (addr)))
11421 require_pic_register ();
11422 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11426 /* Output a 'call' insn. */
11428 output_call (rtx *operands)
11430 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11432 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11433 if (REGNO (operands[0]) == LR_REGNUM)
11435 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11436 output_asm_insn ("mov%?\t%0, %|lr", operands);
11439 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11441 if (TARGET_INTERWORK || arm_arch4t)
11442 output_asm_insn ("bx%?\t%0", operands);
11444 output_asm_insn ("mov%?\t%|pc, %0", operands);
11449 /* Output a 'call' insn that is a reference in memory. */
11451 output_call_mem (rtx *operands)
11453 if (TARGET_INTERWORK && !arm_arch5)
11455 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11456 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11457 output_asm_insn ("bx%?\t%|ip", operands);
11459 else if (regno_use_in (LR_REGNUM, operands[0]))
11461 /* LR is used in the memory address. We load the address in the
11462 first instruction. It's safe to use IP as the target of the
11463 load since the call will kill it anyway. */
11464 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11466 output_asm_insn ("blx%?\t%|ip", operands);
11469 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11471 output_asm_insn ("bx%?\t%|ip", operands);
11473 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11478 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11479 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11486 /* Output a move from arm registers to an fpa registers.
11487 OPERANDS[0] is an fpa register.
11488 OPERANDS[1] is the first registers of an arm register pair. */
11490 output_mov_long_double_fpa_from_arm (rtx *operands)
11492 int arm_reg0 = REGNO (operands[1]);
11495 gcc_assert (arm_reg0 != IP_REGNUM);
11497 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11498 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11499 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11501 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11502 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11507 /* Output a move from an fpa register to arm registers.
11508 OPERANDS[0] is the first registers of an arm register pair.
11509 OPERANDS[1] is an fpa register. */
11511 output_mov_long_double_arm_from_fpa (rtx *operands)
11513 int arm_reg0 = REGNO (operands[0]);
11516 gcc_assert (arm_reg0 != IP_REGNUM);
11518 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11519 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11520 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11522 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11523 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11527 /* Output a move from arm registers to arm registers of a long double
11528 OPERANDS[0] is the destination.
11529 OPERANDS[1] is the source. */
11531 output_mov_long_double_arm_from_arm (rtx *operands)
11533 /* We have to be careful here because the two might overlap. */
11534 int dest_start = REGNO (operands[0]);
11535 int src_start = REGNO (operands[1]);
11539 if (dest_start < src_start)
11541 for (i = 0; i < 3; i++)
11543 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11544 ops[1] = gen_rtx_REG (SImode, src_start + i);
11545 output_asm_insn ("mov%?\t%0, %1", ops);
11550 for (i = 2; i >= 0; i--)
11552 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11553 ops[1] = gen_rtx_REG (SImode, src_start + i);
11554 output_asm_insn ("mov%?\t%0, %1", ops);
11562 /* Emit a MOVW/MOVT pair. */
11563 void arm_emit_movpair (rtx dest, rtx src)
11565 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11566 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11570 /* Output a move from arm registers to an fpa registers.
11571 OPERANDS[0] is an fpa register.
11572 OPERANDS[1] is the first registers of an arm register pair. */
11574 output_mov_double_fpa_from_arm (rtx *operands)
11576 int arm_reg0 = REGNO (operands[1]);
11579 gcc_assert (arm_reg0 != IP_REGNUM);
11581 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11582 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11583 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11584 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11588 /* Output a move from an fpa register to arm registers.
11589 OPERANDS[0] is the first registers of an arm register pair.
11590 OPERANDS[1] is an fpa register. */
11592 output_mov_double_arm_from_fpa (rtx *operands)
11594 int arm_reg0 = REGNO (operands[0]);
11597 gcc_assert (arm_reg0 != IP_REGNUM);
11599 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11600 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11601 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11602 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11606 /* Output a move between double words.
11607 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11608 or MEM<-REG and all MEMs must be offsettable addresses. */
11610 output_move_double (rtx *operands)
11612 enum rtx_code code0 = GET_CODE (operands[0]);
11613 enum rtx_code code1 = GET_CODE (operands[1]);
11618 unsigned int reg0 = REGNO (operands[0]);
11620 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11622 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11624 switch (GET_CODE (XEXP (operands[1], 0)))
11628 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11629 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11631 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11635 gcc_assert (TARGET_LDRD);
11636 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11641 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11643 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11648 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11650 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11654 gcc_assert (TARGET_LDRD);
11655 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11660 /* Autoicrement addressing modes should never have overlapping
11661 base and destination registers, and overlapping index registers
11662 are already prohibited, so this doesn't need to worry about
11664 otherops[0] = operands[0];
11665 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11666 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11668 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11670 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11672 /* Registers overlap so split out the increment. */
11673 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11674 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11678 /* Use a single insn if we can.
11679 FIXME: IWMMXT allows offsets larger than ldrd can
11680 handle, fix these up with a pair of ldr. */
11682 || GET_CODE (otherops[2]) != CONST_INT
11683 || (INTVAL (otherops[2]) > -256
11684 && INTVAL (otherops[2]) < 256))
11685 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11688 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11689 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11695 /* Use a single insn if we can.
11696 FIXME: IWMMXT allows offsets larger than ldrd can handle,
11697 fix these up with a pair of ldr. */
11699 || GET_CODE (otherops[2]) != CONST_INT
11700 || (INTVAL (otherops[2]) > -256
11701 && INTVAL (otherops[2]) < 256))
11702 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
11705 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11706 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11713 /* We might be able to use ldrd %0, %1 here. However the range is
11714 different to ldr/adr, and it is broken on some ARMv7-M
11715 implementations. */
11716 /* Use the second register of the pair to avoid problematic
11718 otherops[1] = operands[1];
11719 output_asm_insn ("adr%?\t%0, %1", otherops);
11720 operands[1] = otherops[0];
11722 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11724 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
11727 /* ??? This needs checking for thumb2. */
11729 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
11730 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
11732 otherops[0] = operands[0];
11733 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
11734 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
11736 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
11738 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
11740 switch ((int) INTVAL (otherops[2]))
11743 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
11748 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
11753 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
11757 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
11758 operands[1] = otherops[0];
11760 && (GET_CODE (otherops[2]) == REG
11762 || (GET_CODE (otherops[2]) == CONST_INT
11763 && INTVAL (otherops[2]) > -256
11764 && INTVAL (otherops[2]) < 256)))
11766 if (reg_overlap_mentioned_p (operands[0],
11770 /* Swap base and index registers over to
11771 avoid a conflict. */
11773 otherops[1] = otherops[2];
11776 /* If both registers conflict, it will usually
11777 have been fixed by a splitter. */
11778 if (reg_overlap_mentioned_p (operands[0], otherops[2])
11779 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
11781 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11782 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11786 otherops[0] = operands[0];
11787 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
11792 if (GET_CODE (otherops[2]) == CONST_INT)
11794 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
11795 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
11797 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11800 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11803 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
11806 return "ldr%(d%)\t%0, [%1]";
11808 return "ldm%(ia%)\t%1, %M0";
11812 otherops[1] = adjust_address (operands[1], SImode, 4);
11813 /* Take care of overlapping base/data reg. */
11814 if (reg_mentioned_p (operands[0], operands[1]))
11816 output_asm_insn ("ldr%?\t%0, %1", otherops);
11817 output_asm_insn ("ldr%?\t%0, %1", operands);
11821 output_asm_insn ("ldr%?\t%0, %1", operands);
11822 output_asm_insn ("ldr%?\t%0, %1", otherops);
11829 /* Constraints should ensure this. */
11830 gcc_assert (code0 == MEM && code1 == REG);
11831 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
11833 switch (GET_CODE (XEXP (operands[0], 0)))
11837 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
11839 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11843 gcc_assert (TARGET_LDRD);
11844 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
11849 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
11851 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
11856 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
11858 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
11862 gcc_assert (TARGET_LDRD);
11863 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
11868 otherops[0] = operands[1];
11869 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
11870 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
11872 /* IWMMXT allows offsets larger than ldrd can handle,
11873 fix these up with a pair of ldr. */
11875 && GET_CODE (otherops[2]) == CONST_INT
11876 && (INTVAL(otherops[2]) <= -256
11877 || INTVAL(otherops[2]) >= 256))
11879 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
11881 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11882 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11886 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11887 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11890 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
11891 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
11893 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
11897 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
11898 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
11900 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
11903 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
11909 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
11915 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
11920 && (GET_CODE (otherops[2]) == REG
11922 || (GET_CODE (otherops[2]) == CONST_INT
11923 && INTVAL (otherops[2]) > -256
11924 && INTVAL (otherops[2]) < 256)))
11926 otherops[0] = operands[1];
11927 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
11928 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
11934 otherops[0] = adjust_address (operands[0], SImode, 4);
11935 otherops[1] = operands[1];
11936 output_asm_insn ("str%?\t%1, %0", operands);
11937 output_asm_insn ("str%?\t%H1, %0", otherops);
11944 /* Output a move, load or store for quad-word vectors in ARM registers. Only
11945 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
11948 output_move_quad (rtx *operands)
11950 if (REG_P (operands[0]))
11952 /* Load, or reg->reg move. */
11954 if (MEM_P (operands[1]))
11956 switch (GET_CODE (XEXP (operands[1], 0)))
11959 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11964 output_asm_insn ("adr%?\t%0, %1", operands);
11965 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
11969 gcc_unreachable ();
11977 gcc_assert (REG_P (operands[1]));
11979 dest = REGNO (operands[0]);
11980 src = REGNO (operands[1]);
11982 /* This seems pretty dumb, but hopefully GCC won't try to do it
11985 for (i = 0; i < 4; i++)
11987 ops[0] = gen_rtx_REG (SImode, dest + i);
11988 ops[1] = gen_rtx_REG (SImode, src + i);
11989 output_asm_insn ("mov%?\t%0, %1", ops);
11992 for (i = 3; i >= 0; i--)
11994 ops[0] = gen_rtx_REG (SImode, dest + i);
11995 ops[1] = gen_rtx_REG (SImode, src + i);
11996 output_asm_insn ("mov%?\t%0, %1", ops);
12002 gcc_assert (MEM_P (operands[0]));
12003 gcc_assert (REG_P (operands[1]));
12004 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12006 switch (GET_CODE (XEXP (operands[0], 0)))
12009 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12013 gcc_unreachable ();
12020 /* Output a VFP load or store instruction. */
12023 output_move_vfp (rtx *operands)
12025 rtx reg, mem, addr, ops[2];
12026 int load = REG_P (operands[0]);
12027 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12028 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12031 enum machine_mode mode;
12033 reg = operands[!load];
12034 mem = operands[load];
12036 mode = GET_MODE (reg);
12038 gcc_assert (REG_P (reg));
12039 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12040 gcc_assert (mode == SFmode
12044 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12045 gcc_assert (MEM_P (mem));
12047 addr = XEXP (mem, 0);
12049 switch (GET_CODE (addr))
12052 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12053 ops[0] = XEXP (addr, 0);
12058 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12059 ops[0] = XEXP (addr, 0);
12064 templ = "f%s%c%%?\t%%%s0, %%1%s";
12070 sprintf (buff, templ,
12071 load ? "ld" : "st",
12074 integer_p ? "\t%@ int" : "");
12075 output_asm_insn (buff, ops);
12080 /* Output a Neon quad-word load or store, or a load or store for
12081 larger structure modes.
12083 WARNING: The ordering of elements is weird in big-endian mode,
12084 because we use VSTM, as required by the EABI. GCC RTL defines
12085 element ordering based on in-memory order. This can be differ
12086 from the architectural ordering of elements within a NEON register.
12087 The intrinsics defined in arm_neon.h use the NEON register element
12088 ordering, not the GCC RTL element ordering.
12090 For example, the in-memory ordering of a big-endian a quadword
12091 vector with 16-bit elements when stored from register pair {d0,d1}
12092 will be (lowest address first, d0[N] is NEON register element N):
12094 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12096 When necessary, quadword registers (dN, dN+1) are moved to ARM
12097 registers from rN in the order:
12099 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12101 So that STM/LDM can be used on vectors in ARM registers, and the
12102 same memory layout will result as if VSTM/VLDM were used. */
12105 output_move_neon (rtx *operands)
12107 rtx reg, mem, addr, ops[2];
12108 int regno, load = REG_P (operands[0]);
12111 enum machine_mode mode;
12113 reg = operands[!load];
12114 mem = operands[load];
12116 mode = GET_MODE (reg);
12118 gcc_assert (REG_P (reg));
12119 regno = REGNO (reg);
12120 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12121 || NEON_REGNO_OK_FOR_QUAD (regno));
12122 gcc_assert (VALID_NEON_DREG_MODE (mode)
12123 || VALID_NEON_QREG_MODE (mode)
12124 || VALID_NEON_STRUCT_MODE (mode));
12125 gcc_assert (MEM_P (mem));
12127 addr = XEXP (mem, 0);
12129 /* Strip off const from addresses like (const (plus (...))). */
12130 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12131 addr = XEXP (addr, 0);
12133 switch (GET_CODE (addr))
12136 templ = "v%smia%%?\t%%0!, %%h1";
12137 ops[0] = XEXP (addr, 0);
12142 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12143 templ = "v%smdb%%?\t%%0!, %%h1";
12144 ops[0] = XEXP (addr, 0);
12149 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12150 gcc_unreachable ();
12155 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12158 for (i = 0; i < nregs; i++)
12160 /* We're only using DImode here because it's a convenient size. */
12161 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12162 ops[1] = adjust_address (mem, SImode, 8 * i);
12163 if (reg_overlap_mentioned_p (ops[0], mem))
12165 gcc_assert (overlap == -1);
12170 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12171 output_asm_insn (buff, ops);
12176 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12177 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12178 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12179 output_asm_insn (buff, ops);
12186 templ = "v%smia%%?\t%%m0, %%h1";
12191 sprintf (buff, templ, load ? "ld" : "st");
12192 output_asm_insn (buff, ops);
12197 /* Output an ADD r, s, #n where n may be too big for one instruction.
12198 If adding zero to one register, output nothing. */
12200 output_add_immediate (rtx *operands)
12202 HOST_WIDE_INT n = INTVAL (operands[2]);
12204 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12207 output_multi_immediate (operands,
12208 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12211 output_multi_immediate (operands,
12212 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12219 /* Output a multiple immediate operation.
12220 OPERANDS is the vector of operands referred to in the output patterns.
12221 INSTR1 is the output pattern to use for the first constant.
12222 INSTR2 is the output pattern to use for subsequent constants.
12223 IMMED_OP is the index of the constant slot in OPERANDS.
12224 N is the constant value. */
12225 static const char *
12226 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12227 int immed_op, HOST_WIDE_INT n)
12229 #if HOST_BITS_PER_WIDE_INT > 32
12235 /* Quick and easy output. */
12236 operands[immed_op] = const0_rtx;
12237 output_asm_insn (instr1, operands);
12242 const char * instr = instr1;
12244 /* Note that n is never zero here (which would give no output). */
12245 for (i = 0; i < 32; i += 2)
12249 operands[immed_op] = GEN_INT (n & (255 << i));
12250 output_asm_insn (instr, operands);
12260 /* Return the name of a shifter operation. */
12261 static const char *
12262 arm_shift_nmem(enum rtx_code code)
12267 return ARM_LSL_NAME;
12283 /* Return the appropriate ARM instruction for the operation code.
12284 The returned result should not be overwritten. OP is the rtx of the
12285 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12288 arithmetic_instr (rtx op, int shift_first_arg)
12290 switch (GET_CODE (op))
12296 return shift_first_arg ? "rsb" : "sub";
12311 return arm_shift_nmem(GET_CODE(op));
12314 gcc_unreachable ();
12318 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12319 for the operation code. The returned result should not be overwritten.
12320 OP is the rtx code of the shift.
12321 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12323 static const char *
12324 shift_op (rtx op, HOST_WIDE_INT *amountp)
12327 enum rtx_code code = GET_CODE (op);
12329 switch (GET_CODE (XEXP (op, 1)))
12337 *amountp = INTVAL (XEXP (op, 1));
12341 gcc_unreachable ();
12347 gcc_assert (*amountp != -1);
12348 *amountp = 32 - *amountp;
12351 /* Fall through. */
12357 mnem = arm_shift_nmem(code);
12361 /* We never have to worry about the amount being other than a
12362 power of 2, since this case can never be reloaded from a reg. */
12363 gcc_assert (*amountp != -1);
12364 *amountp = int_log2 (*amountp);
12365 return ARM_LSL_NAME;
12368 gcc_unreachable ();
12371 if (*amountp != -1)
12373 /* This is not 100% correct, but follows from the desire to merge
12374 multiplication by a power of 2 with the recognizer for a
12375 shift. >=32 is not a valid shift for "lsl", so we must try and
12376 output a shift that produces the correct arithmetical result.
12377 Using lsr #32 is identical except for the fact that the carry bit
12378 is not set correctly if we set the flags; but we never use the
12379 carry bit from such an operation, so we can ignore that. */
12380 if (code == ROTATERT)
12381 /* Rotate is just modulo 32. */
12383 else if (*amountp != (*amountp & 31))
12385 if (code == ASHIFT)
12390 /* Shifts of 0 are no-ops. */
12398 /* Obtain the shift from the POWER of two. */
12400 static HOST_WIDE_INT
12401 int_log2 (HOST_WIDE_INT power)
12403 HOST_WIDE_INT shift = 0;
12405 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12407 gcc_assert (shift <= 31);
12414 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12415 because /bin/as is horribly restrictive. The judgement about
12416 whether or not each character is 'printable' (and can be output as
12417 is) or not (and must be printed with an octal escape) must be made
12418 with reference to the *host* character set -- the situation is
12419 similar to that discussed in the comments above pp_c_char in
12420 c-pretty-print.c. */
12422 #define MAX_ASCII_LEN 51
12425 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12428 int len_so_far = 0;
12430 fputs ("\t.ascii\t\"", stream);
12432 for (i = 0; i < len; i++)
12436 if (len_so_far >= MAX_ASCII_LEN)
12438 fputs ("\"\n\t.ascii\t\"", stream);
12444 if (c == '\\' || c == '\"')
12446 putc ('\\', stream);
12454 fprintf (stream, "\\%03o", c);
12459 fputs ("\"\n", stream);
12462 /* Compute the register save mask for registers 0 through 12
12463 inclusive. This code is used by arm_compute_save_reg_mask. */
12465 static unsigned long
12466 arm_compute_save_reg0_reg12_mask (void)
12468 unsigned long func_type = arm_current_func_type ();
12469 unsigned long save_reg_mask = 0;
12472 if (IS_INTERRUPT (func_type))
12474 unsigned int max_reg;
12475 /* Interrupt functions must not corrupt any registers,
12476 even call clobbered ones. If this is a leaf function
12477 we can just examine the registers used by the RTL, but
12478 otherwise we have to assume that whatever function is
12479 called might clobber anything, and so we have to save
12480 all the call-clobbered registers as well. */
12481 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12482 /* FIQ handlers have registers r8 - r12 banked, so
12483 we only need to check r0 - r7, Normal ISRs only
12484 bank r14 and r15, so we must check up to r12.
12485 r13 is the stack pointer which is always preserved,
12486 so we do not need to consider it here. */
12491 for (reg = 0; reg <= max_reg; reg++)
12492 if (df_regs_ever_live_p (reg)
12493 || (! current_function_is_leaf && call_used_regs[reg]))
12494 save_reg_mask |= (1 << reg);
12496 /* Also save the pic base register if necessary. */
12498 && !TARGET_SINGLE_PIC_BASE
12499 && arm_pic_register != INVALID_REGNUM
12500 && crtl->uses_pic_offset_table)
12501 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12505 /* In the normal case we only need to save those registers
12506 which are call saved and which are used by this function. */
12507 for (reg = 0; reg <= 11; reg++)
12508 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12509 save_reg_mask |= (1 << reg);
12511 /* Handle the frame pointer as a special case. */
12512 if (frame_pointer_needed)
12513 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12515 /* If we aren't loading the PIC register,
12516 don't stack it even though it may be live. */
12518 && !TARGET_SINGLE_PIC_BASE
12519 && arm_pic_register != INVALID_REGNUM
12520 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12521 || crtl->uses_pic_offset_table))
12522 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12524 /* The prologue will copy SP into R0, so save it. */
12525 if (IS_STACKALIGN (func_type))
12526 save_reg_mask |= 1;
12529 /* Save registers so the exception handler can modify them. */
12530 if (crtl->calls_eh_return)
12536 reg = EH_RETURN_DATA_REGNO (i);
12537 if (reg == INVALID_REGNUM)
12539 save_reg_mask |= 1 << reg;
12543 return save_reg_mask;
12547 /* Compute the number of bytes used to store the static chain register on the
12548 stack, above the stack frame. We need to know this accurately to get the
12549 alignment of the rest of the stack frame correct. */
12551 static int arm_compute_static_chain_stack_bytes (void)
12553 unsigned long func_type = arm_current_func_type ();
12554 int static_chain_stack_bytes = 0;
12556 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12557 IS_NESTED (func_type) &&
12558 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12559 static_chain_stack_bytes = 4;
12561 return static_chain_stack_bytes;
12565 /* Compute a bit mask of which registers need to be
12566 saved on the stack for the current function.
12567 This is used by arm_get_frame_offsets, which may add extra registers. */
12569 static unsigned long
12570 arm_compute_save_reg_mask (void)
12572 unsigned int save_reg_mask = 0;
12573 unsigned long func_type = arm_current_func_type ();
12576 if (IS_NAKED (func_type))
12577 /* This should never really happen. */
12580 /* If we are creating a stack frame, then we must save the frame pointer,
12581 IP (which will hold the old stack pointer), LR and the PC. */
12582 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12584 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12587 | (1 << PC_REGNUM);
12589 /* Volatile functions do not return, so there
12590 is no need to save any other registers. */
12591 if (IS_VOLATILE (func_type))
12592 return save_reg_mask;
12594 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12596 /* Decide if we need to save the link register.
12597 Interrupt routines have their own banked link register,
12598 so they never need to save it.
12599 Otherwise if we do not use the link register we do not need to save
12600 it. If we are pushing other registers onto the stack however, we
12601 can save an instruction in the epilogue by pushing the link register
12602 now and then popping it back into the PC. This incurs extra memory
12603 accesses though, so we only do it when optimizing for size, and only
12604 if we know that we will not need a fancy return sequence. */
12605 if (df_regs_ever_live_p (LR_REGNUM)
12608 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12609 && !crtl->calls_eh_return))
12610 save_reg_mask |= 1 << LR_REGNUM;
12612 if (cfun->machine->lr_save_eliminated)
12613 save_reg_mask &= ~ (1 << LR_REGNUM);
12615 if (TARGET_REALLY_IWMMXT
12616 && ((bit_count (save_reg_mask)
12617 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12618 arm_compute_static_chain_stack_bytes())
12621 /* The total number of registers that are going to be pushed
12622 onto the stack is odd. We need to ensure that the stack
12623 is 64-bit aligned before we start to save iWMMXt registers,
12624 and also before we start to create locals. (A local variable
12625 might be a double or long long which we will load/store using
12626 an iWMMXt instruction). Therefore we need to push another
12627 ARM register, so that the stack will be 64-bit aligned. We
12628 try to avoid using the arg registers (r0 -r3) as they might be
12629 used to pass values in a tail call. */
12630 for (reg = 4; reg <= 12; reg++)
12631 if ((save_reg_mask & (1 << reg)) == 0)
12635 save_reg_mask |= (1 << reg);
12638 cfun->machine->sibcall_blocked = 1;
12639 save_reg_mask |= (1 << 3);
12643 /* We may need to push an additional register for use initializing the
12644 PIC base register. */
12645 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12646 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12648 reg = thumb_find_work_register (1 << 4);
12649 if (!call_used_regs[reg])
12650 save_reg_mask |= (1 << reg);
12653 return save_reg_mask;
12657 /* Compute a bit mask of which registers need to be
12658 saved on the stack for the current function. */
12659 static unsigned long
12660 thumb1_compute_save_reg_mask (void)
12662 unsigned long mask;
12666 for (reg = 0; reg < 12; reg ++)
12667 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12671 && !TARGET_SINGLE_PIC_BASE
12672 && arm_pic_register != INVALID_REGNUM
12673 && crtl->uses_pic_offset_table)
12674 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12676 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12677 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12678 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12680 /* LR will also be pushed if any lo regs are pushed. */
12681 if (mask & 0xff || thumb_force_lr_save ())
12682 mask |= (1 << LR_REGNUM);
12684 /* Make sure we have a low work register if we need one.
12685 We will need one if we are going to push a high register,
12686 but we are not currently intending to push a low register. */
12687 if ((mask & 0xff) == 0
12688 && ((mask & 0x0f00) || TARGET_BACKTRACE))
12690 /* Use thumb_find_work_register to choose which register
12691 we will use. If the register is live then we will
12692 have to push it. Use LAST_LO_REGNUM as our fallback
12693 choice for the register to select. */
12694 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
12695 /* Make sure the register returned by thumb_find_work_register is
12696 not part of the return value. */
12697 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
12698 reg = LAST_LO_REGNUM;
12700 if (! call_used_regs[reg])
12704 /* The 504 below is 8 bytes less than 512 because there are two possible
12705 alignment words. We can't tell here if they will be present or not so we
12706 have to play it safe and assume that they are. */
12707 if ((CALLER_INTERWORKING_SLOT_SIZE +
12708 ROUND_UP_WORD (get_frame_size ()) +
12709 crtl->outgoing_args_size) >= 504)
12711 /* This is the same as the code in thumb1_expand_prologue() which
12712 determines which register to use for stack decrement. */
12713 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
12714 if (mask & (1 << reg))
12717 if (reg > LAST_LO_REGNUM)
12719 /* Make sure we have a register available for stack decrement. */
12720 mask |= 1 << LAST_LO_REGNUM;
12728 /* Return the number of bytes required to save VFP registers. */
12730 arm_get_vfp_saved_size (void)
12732 unsigned int regno;
12737 /* Space for saved VFP registers. */
12738 if (TARGET_HARD_FLOAT && TARGET_VFP)
12741 for (regno = FIRST_VFP_REGNUM;
12742 regno < LAST_VFP_REGNUM;
12745 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
12746 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
12750 /* Workaround ARM10 VFPr1 bug. */
12751 if (count == 2 && !arm_arch6)
12753 saved += count * 8;
12762 if (count == 2 && !arm_arch6)
12764 saved += count * 8;
12771 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
12772 everything bar the final return instruction. */
12774 output_return_instruction (rtx operand, int really_return, int reverse)
12776 char conditional[10];
12779 unsigned long live_regs_mask;
12780 unsigned long func_type;
12781 arm_stack_offsets *offsets;
12783 func_type = arm_current_func_type ();
12785 if (IS_NAKED (func_type))
12788 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12790 /* If this function was declared non-returning, and we have
12791 found a tail call, then we have to trust that the called
12792 function won't return. */
12797 /* Otherwise, trap an attempted return by aborting. */
12799 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
12801 assemble_external_libcall (ops[1]);
12802 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
12808 gcc_assert (!cfun->calls_alloca || really_return);
12810 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
12812 cfun->machine->return_used_this_function = 1;
12814 offsets = arm_get_frame_offsets ();
12815 live_regs_mask = offsets->saved_regs_mask;
12817 if (live_regs_mask)
12819 const char * return_reg;
12821 /* If we do not have any special requirements for function exit
12822 (e.g. interworking) then we can load the return address
12823 directly into the PC. Otherwise we must load it into LR. */
12825 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
12826 return_reg = reg_names[PC_REGNUM];
12828 return_reg = reg_names[LR_REGNUM];
12830 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
12832 /* There are three possible reasons for the IP register
12833 being saved. 1) a stack frame was created, in which case
12834 IP contains the old stack pointer, or 2) an ISR routine
12835 corrupted it, or 3) it was saved to align the stack on
12836 iWMMXt. In case 1, restore IP into SP, otherwise just
12838 if (frame_pointer_needed)
12840 live_regs_mask &= ~ (1 << IP_REGNUM);
12841 live_regs_mask |= (1 << SP_REGNUM);
12844 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
12847 /* On some ARM architectures it is faster to use LDR rather than
12848 LDM to load a single register. On other architectures, the
12849 cost is the same. In 26 bit mode, or for exception handlers,
12850 we have to use LDM to load the PC so that the CPSR is also
12852 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12853 if (live_regs_mask == (1U << reg))
12856 if (reg <= LAST_ARM_REGNUM
12857 && (reg != LR_REGNUM
12859 || ! IS_INTERRUPT (func_type)))
12861 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
12862 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
12869 /* Generate the load multiple instruction to restore the
12870 registers. Note we can get here, even if
12871 frame_pointer_needed is true, but only if sp already
12872 points to the base of the saved core registers. */
12873 if (live_regs_mask & (1 << SP_REGNUM))
12875 unsigned HOST_WIDE_INT stack_adjust;
12877 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
12878 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
12880 if (stack_adjust && arm_arch5 && TARGET_ARM)
12881 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
12884 /* If we can't use ldmib (SA110 bug),
12885 then try to pop r3 instead. */
12887 live_regs_mask |= 1 << 3;
12888 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
12892 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
12894 p = instr + strlen (instr);
12896 for (reg = 0; reg <= SP_REGNUM; reg++)
12897 if (live_regs_mask & (1 << reg))
12899 int l = strlen (reg_names[reg]);
12905 memcpy (p, ", ", 2);
12909 memcpy (p, "%|", 2);
12910 memcpy (p + 2, reg_names[reg], l);
12914 if (live_regs_mask & (1 << LR_REGNUM))
12916 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
12917 /* If returning from an interrupt, restore the CPSR. */
12918 if (IS_INTERRUPT (func_type))
12925 output_asm_insn (instr, & operand);
12927 /* See if we need to generate an extra instruction to
12928 perform the actual function return. */
12930 && func_type != ARM_FT_INTERWORKED
12931 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
12933 /* The return has already been handled
12934 by loading the LR into the PC. */
12941 switch ((int) ARM_FUNC_TYPE (func_type))
12945 /* ??? This is wrong for unified assembly syntax. */
12946 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
12949 case ARM_FT_INTERWORKED:
12950 sprintf (instr, "bx%s\t%%|lr", conditional);
12953 case ARM_FT_EXCEPTION:
12954 /* ??? This is wrong for unified assembly syntax. */
12955 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
12959 /* Use bx if it's available. */
12960 if (arm_arch5 || arm_arch4t)
12961 sprintf (instr, "bx%s\t%%|lr", conditional);
12963 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
12967 output_asm_insn (instr, & operand);
12973 /* Write the function name into the code section, directly preceding
12974 the function prologue.
12976 Code will be output similar to this:
12978 .ascii "arm_poke_function_name", 0
12981 .word 0xff000000 + (t1 - t0)
12982 arm_poke_function_name
12984 stmfd sp!, {fp, ip, lr, pc}
12987 When performing a stack backtrace, code can inspect the value
12988 of 'pc' stored at 'fp' + 0. If the trace function then looks
12989 at location pc - 12 and the top 8 bits are set, then we know
12990 that there is a function name embedded immediately preceding this
12991 location and has length ((pc[-3]) & 0xff000000).
12993 We assume that pc is declared as a pointer to an unsigned long.
12995 It is of no benefit to output the function name if we are assembling
12996 a leaf function. These function types will not contain a stack
12997 backtrace structure, therefore it is not possible to determine the
13000 arm_poke_function_name (FILE *stream, const char *name)
13002 unsigned long alignlength;
13003 unsigned long length;
13006 length = strlen (name) + 1;
13007 alignlength = ROUND_UP_WORD (length);
13009 ASM_OUTPUT_ASCII (stream, name, length);
13010 ASM_OUTPUT_ALIGN (stream, 2);
13011 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13012 assemble_aligned_integer (UNITS_PER_WORD, x);
13015 /* Place some comments into the assembler stream
13016 describing the current function. */
13018 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13020 unsigned long func_type;
13024 thumb1_output_function_prologue (f, frame_size);
13028 /* Sanity check. */
13029 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13031 func_type = arm_current_func_type ();
13033 switch ((int) ARM_FUNC_TYPE (func_type))
13036 case ARM_FT_NORMAL:
13038 case ARM_FT_INTERWORKED:
13039 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13042 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13045 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13047 case ARM_FT_EXCEPTION:
13048 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13052 if (IS_NAKED (func_type))
13053 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13055 if (IS_VOLATILE (func_type))
13056 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13058 if (IS_NESTED (func_type))
13059 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13060 if (IS_STACKALIGN (func_type))
13061 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13063 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13065 crtl->args.pretend_args_size, frame_size);
13067 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13068 frame_pointer_needed,
13069 cfun->machine->uses_anonymous_args);
13071 if (cfun->machine->lr_save_eliminated)
13072 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13074 if (crtl->calls_eh_return)
13075 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13080 arm_output_epilogue (rtx sibling)
13083 unsigned long saved_regs_mask;
13084 unsigned long func_type;
13085 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13086 frame that is $fp + 4 for a non-variadic function. */
13087 int floats_offset = 0;
13089 FILE * f = asm_out_file;
13090 unsigned int lrm_count = 0;
13091 int really_return = (sibling == NULL);
13093 arm_stack_offsets *offsets;
13095 /* If we have already generated the return instruction
13096 then it is futile to generate anything else. */
13097 if (use_return_insn (FALSE, sibling) &&
13098 (cfun->machine->return_used_this_function != 0))
13101 func_type = arm_current_func_type ();
13103 if (IS_NAKED (func_type))
13104 /* Naked functions don't have epilogues. */
13107 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13111 /* A volatile function should never return. Call abort. */
13112 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13113 assemble_external_libcall (op);
13114 output_asm_insn ("bl\t%a0", &op);
13119 /* If we are throwing an exception, then we really must be doing a
13120 return, so we can't tail-call. */
13121 gcc_assert (!crtl->calls_eh_return || really_return);
13123 offsets = arm_get_frame_offsets ();
13124 saved_regs_mask = offsets->saved_regs_mask;
13127 lrm_count = bit_count (saved_regs_mask);
13129 floats_offset = offsets->saved_args;
13130 /* Compute how far away the floats will be. */
13131 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13132 if (saved_regs_mask & (1 << reg))
13133 floats_offset += 4;
13135 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13137 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13138 int vfp_offset = offsets->frame;
13140 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13142 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13143 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13145 floats_offset += 12;
13146 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13147 reg, FP_REGNUM, floats_offset - vfp_offset);
13152 start_reg = LAST_FPA_REGNUM;
13154 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13156 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13158 floats_offset += 12;
13160 /* We can't unstack more than four registers at once. */
13161 if (start_reg - reg == 3)
13163 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13164 reg, FP_REGNUM, floats_offset - vfp_offset);
13165 start_reg = reg - 1;
13170 if (reg != start_reg)
13171 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13172 reg + 1, start_reg - reg,
13173 FP_REGNUM, floats_offset - vfp_offset);
13174 start_reg = reg - 1;
13178 /* Just in case the last register checked also needs unstacking. */
13179 if (reg != start_reg)
13180 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13181 reg + 1, start_reg - reg,
13182 FP_REGNUM, floats_offset - vfp_offset);
13185 if (TARGET_HARD_FLOAT && TARGET_VFP)
13189 /* The fldmd insns do not have base+offset addressing
13190 modes, so we use IP to hold the address. */
13191 saved_size = arm_get_vfp_saved_size ();
13193 if (saved_size > 0)
13195 floats_offset += saved_size;
13196 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13197 FP_REGNUM, floats_offset - vfp_offset);
13199 start_reg = FIRST_VFP_REGNUM;
13200 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13202 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13203 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13205 if (start_reg != reg)
13206 vfp_output_fldmd (f, IP_REGNUM,
13207 (start_reg - FIRST_VFP_REGNUM) / 2,
13208 (reg - start_reg) / 2);
13209 start_reg = reg + 2;
13212 if (start_reg != reg)
13213 vfp_output_fldmd (f, IP_REGNUM,
13214 (start_reg - FIRST_VFP_REGNUM) / 2,
13215 (reg - start_reg) / 2);
13220 /* The frame pointer is guaranteed to be non-double-word aligned.
13221 This is because it is set to (old_stack_pointer - 4) and the
13222 old_stack_pointer was double word aligned. Thus the offset to
13223 the iWMMXt registers to be loaded must also be non-double-word
13224 sized, so that the resultant address *is* double-word aligned.
13225 We can ignore floats_offset since that was already included in
13226 the live_regs_mask. */
13227 lrm_count += (lrm_count % 2 ? 2 : 1);
13229 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13230 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13232 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13233 reg, FP_REGNUM, lrm_count * 4);
13238 /* saved_regs_mask should contain the IP, which at the time of stack
13239 frame generation actually contains the old stack pointer. So a
13240 quick way to unwind the stack is just pop the IP register directly
13241 into the stack pointer. */
13242 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13243 saved_regs_mask &= ~ (1 << IP_REGNUM);
13244 saved_regs_mask |= (1 << SP_REGNUM);
13246 /* There are two registers left in saved_regs_mask - LR and PC. We
13247 only need to restore the LR register (the return address), but to
13248 save time we can load it directly into the PC, unless we need a
13249 special function exit sequence, or we are not really returning. */
13251 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13252 && !crtl->calls_eh_return)
13253 /* Delete the LR from the register mask, so that the LR on
13254 the stack is loaded into the PC in the register mask. */
13255 saved_regs_mask &= ~ (1 << LR_REGNUM);
13257 saved_regs_mask &= ~ (1 << PC_REGNUM);
13259 /* We must use SP as the base register, because SP is one of the
13260 registers being restored. If an interrupt or page fault
13261 happens in the ldm instruction, the SP might or might not
13262 have been restored. That would be bad, as then SP will no
13263 longer indicate the safe area of stack, and we can get stack
13264 corruption. Using SP as the base register means that it will
13265 be reset correctly to the original value, should an interrupt
13266 occur. If the stack pointer already points at the right
13267 place, then omit the subtraction. */
13268 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13269 || cfun->calls_alloca)
13270 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13271 4 * bit_count (saved_regs_mask));
13272 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13274 if (IS_INTERRUPT (func_type))
13275 /* Interrupt handlers will have pushed the
13276 IP onto the stack, so restore it now. */
13277 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13281 /* This branch is executed for ARM mode (non-apcs frames) and
13282 Thumb-2 mode. Frame layout is essentially the same for those
13283 cases, except that in ARM mode frame pointer points to the
13284 first saved register, while in Thumb-2 mode the frame pointer points
13285 to the last saved register.
13287 It is possible to make frame pointer point to last saved
13288 register in both cases, and remove some conditionals below.
13289 That means that fp setup in prologue would be just "mov fp, sp"
13290 and sp restore in epilogue would be just "mov sp, fp", whereas
13291 now we have to use add/sub in those cases. However, the value
13292 of that would be marginal, as both mov and add/sub are 32-bit
13293 in ARM mode, and it would require extra conditionals
13294 in arm_expand_prologue to distingish ARM-apcs-frame case
13295 (where frame pointer is required to point at first register)
13296 and ARM-non-apcs-frame. Therefore, such change is postponed
13297 until real need arise. */
13298 unsigned HOST_WIDE_INT amount;
13300 /* Restore stack pointer if necessary. */
13301 if (TARGET_ARM && frame_pointer_needed)
13303 operands[0] = stack_pointer_rtx;
13304 operands[1] = hard_frame_pointer_rtx;
13306 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13307 output_add_immediate (operands);
13311 if (frame_pointer_needed)
13313 /* For Thumb-2 restore sp from the frame pointer.
13314 Operand restrictions mean we have to incrememnt FP, then copy
13316 amount = offsets->locals_base - offsets->saved_regs;
13317 operands[0] = hard_frame_pointer_rtx;
13321 unsigned long count;
13322 operands[0] = stack_pointer_rtx;
13323 amount = offsets->outgoing_args - offsets->saved_regs;
13324 /* pop call clobbered registers if it avoids a
13325 separate stack adjustment. */
13326 count = offsets->saved_regs - offsets->saved_args;
13329 && !crtl->calls_eh_return
13330 && bit_count(saved_regs_mask) * 4 == count
13331 && !IS_INTERRUPT (func_type)
13332 && !crtl->tail_call_emit)
13334 unsigned long mask;
13335 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13337 mask &= ~saved_regs_mask;
13339 while (bit_count (mask) * 4 > amount)
13341 while ((mask & (1 << reg)) == 0)
13343 mask &= ~(1 << reg);
13345 if (bit_count (mask) * 4 == amount) {
13347 saved_regs_mask |= mask;
13354 operands[1] = operands[0];
13355 operands[2] = GEN_INT (amount);
13356 output_add_immediate (operands);
13358 if (frame_pointer_needed)
13359 asm_fprintf (f, "\tmov\t%r, %r\n",
13360 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13363 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13365 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13366 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13367 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13372 start_reg = FIRST_FPA_REGNUM;
13374 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13376 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13378 if (reg - start_reg == 3)
13380 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13381 start_reg, SP_REGNUM);
13382 start_reg = reg + 1;
13387 if (reg != start_reg)
13388 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13389 start_reg, reg - start_reg,
13392 start_reg = reg + 1;
13396 /* Just in case the last register checked also needs unstacking. */
13397 if (reg != start_reg)
13398 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13399 start_reg, reg - start_reg, SP_REGNUM);
13402 if (TARGET_HARD_FLOAT && TARGET_VFP)
13404 start_reg = FIRST_VFP_REGNUM;
13405 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13407 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13408 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13410 if (start_reg != reg)
13411 vfp_output_fldmd (f, SP_REGNUM,
13412 (start_reg - FIRST_VFP_REGNUM) / 2,
13413 (reg - start_reg) / 2);
13414 start_reg = reg + 2;
13417 if (start_reg != reg)
13418 vfp_output_fldmd (f, SP_REGNUM,
13419 (start_reg - FIRST_VFP_REGNUM) / 2,
13420 (reg - start_reg) / 2);
13423 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13424 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13425 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13427 /* If we can, restore the LR into the PC. */
13428 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13429 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13430 && !IS_STACKALIGN (func_type)
13432 && crtl->args.pretend_args_size == 0
13433 && saved_regs_mask & (1 << LR_REGNUM)
13434 && !crtl->calls_eh_return)
13436 saved_regs_mask &= ~ (1 << LR_REGNUM);
13437 saved_regs_mask |= (1 << PC_REGNUM);
13438 rfe = IS_INTERRUPT (func_type);
13443 /* Load the registers off the stack. If we only have one register
13444 to load use the LDR instruction - it is faster. For Thumb-2
13445 always use pop and the assembler will pick the best instruction.*/
13446 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13447 && !IS_INTERRUPT(func_type))
13449 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13451 else if (saved_regs_mask)
13453 if (saved_regs_mask & (1 << SP_REGNUM))
13454 /* Note - write back to the stack register is not enabled
13455 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13456 in the list of registers and if we add writeback the
13457 instruction becomes UNPREDICTABLE. */
13458 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13460 else if (TARGET_ARM)
13461 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13464 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13467 if (crtl->args.pretend_args_size)
13469 /* Unwind the pre-pushed regs. */
13470 operands[0] = operands[1] = stack_pointer_rtx;
13471 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13472 output_add_immediate (operands);
13476 /* We may have already restored PC directly from the stack. */
13477 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13480 /* Stack adjustment for exception handler. */
13481 if (crtl->calls_eh_return)
13482 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13483 ARM_EH_STACKADJ_REGNUM);
13485 /* Generate the return instruction. */
13486 switch ((int) ARM_FUNC_TYPE (func_type))
13490 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13493 case ARM_FT_EXCEPTION:
13494 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13497 case ARM_FT_INTERWORKED:
13498 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13502 if (IS_STACKALIGN (func_type))
13504 /* See comment in arm_expand_prologue. */
13505 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13507 if (arm_arch5 || arm_arch4t)
13508 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13510 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13518 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13519 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13521 arm_stack_offsets *offsets;
13527 /* Emit any call-via-reg trampolines that are needed for v4t support
13528 of call_reg and call_value_reg type insns. */
13529 for (regno = 0; regno < LR_REGNUM; regno++)
13531 rtx label = cfun->machine->call_via[regno];
13535 switch_to_section (function_section (current_function_decl));
13536 targetm.asm_out.internal_label (asm_out_file, "L",
13537 CODE_LABEL_NUMBER (label));
13538 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13542 /* ??? Probably not safe to set this here, since it assumes that a
13543 function will be emitted as assembly immediately after we generate
13544 RTL for it. This does not happen for inline functions. */
13545 cfun->machine->return_used_this_function = 0;
13547 else /* TARGET_32BIT */
13549 /* We need to take into account any stack-frame rounding. */
13550 offsets = arm_get_frame_offsets ();
13552 gcc_assert (!use_return_insn (FALSE, NULL)
13553 || (cfun->machine->return_used_this_function != 0)
13554 || offsets->saved_regs == offsets->outgoing_args
13555 || frame_pointer_needed);
13557 /* Reset the ARM-specific per-function variables. */
13558 after_arm_reorg = 0;
13562 /* Generate and emit an insn that we will recognize as a push_multi.
13563 Unfortunately, since this insn does not reflect very well the actual
13564 semantics of the operation, we need to annotate the insn for the benefit
13565 of DWARF2 frame unwind information. */
13567 emit_multi_reg_push (unsigned long mask)
13570 int num_dwarf_regs;
13574 int dwarf_par_index;
13577 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13578 if (mask & (1 << i))
13581 gcc_assert (num_regs && num_regs <= 16);
13583 /* We don't record the PC in the dwarf frame information. */
13584 num_dwarf_regs = num_regs;
13585 if (mask & (1 << PC_REGNUM))
13588 /* For the body of the insn we are going to generate an UNSPEC in
13589 parallel with several USEs. This allows the insn to be recognized
13590 by the push_multi pattern in the arm.md file. The insn looks
13591 something like this:
13594 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
13595 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13596 (use (reg:SI 11 fp))
13597 (use (reg:SI 12 ip))
13598 (use (reg:SI 14 lr))
13599 (use (reg:SI 15 pc))
13602 For the frame note however, we try to be more explicit and actually
13603 show each register being stored into the stack frame, plus a (single)
13604 decrement of the stack pointer. We do it this way in order to be
13605 friendly to the stack unwinding code, which only wants to see a single
13606 stack decrement per instruction. The RTL we generate for the note looks
13607 something like this:
13610 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13611 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13612 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
13613 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
13614 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
13617 This sequence is used both by the code to support stack unwinding for
13618 exceptions handlers and the code to generate dwarf2 frame debugging. */
13620 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13621 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13622 dwarf_par_index = 1;
13624 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13626 if (mask & (1 << i))
13628 reg = gen_rtx_REG (SImode, i);
13630 XVECEXP (par, 0, 0)
13631 = gen_rtx_SET (VOIDmode,
13632 gen_frame_mem (BLKmode,
13633 gen_rtx_PRE_DEC (BLKmode,
13634 stack_pointer_rtx)),
13635 gen_rtx_UNSPEC (BLKmode,
13636 gen_rtvec (1, reg),
13637 UNSPEC_PUSH_MULT));
13639 if (i != PC_REGNUM)
13641 tmp = gen_rtx_SET (VOIDmode,
13642 gen_frame_mem (SImode, stack_pointer_rtx),
13644 RTX_FRAME_RELATED_P (tmp) = 1;
13645 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13653 for (j = 1, i++; j < num_regs; i++)
13655 if (mask & (1 << i))
13657 reg = gen_rtx_REG (SImode, i);
13659 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
13661 if (i != PC_REGNUM)
13664 = gen_rtx_SET (VOIDmode,
13665 gen_frame_mem (SImode,
13666 plus_constant (stack_pointer_rtx,
13669 RTX_FRAME_RELATED_P (tmp) = 1;
13670 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
13677 par = emit_insn (par);
13679 tmp = gen_rtx_SET (VOIDmode,
13681 plus_constant (stack_pointer_rtx, -4 * num_regs));
13682 RTX_FRAME_RELATED_P (tmp) = 1;
13683 XVECEXP (dwarf, 0, 0) = tmp;
13685 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13690 /* Calculate the size of the return value that is passed in registers. */
13692 arm_size_return_regs (void)
13694 enum machine_mode mode;
13696 if (crtl->return_rtx != 0)
13697 mode = GET_MODE (crtl->return_rtx);
13699 mode = DECL_MODE (DECL_RESULT (current_function_decl));
13701 return GET_MODE_SIZE (mode);
13705 emit_sfm (int base_reg, int count)
13712 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13713 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13715 reg = gen_rtx_REG (XFmode, base_reg++);
13717 XVECEXP (par, 0, 0)
13718 = gen_rtx_SET (VOIDmode,
13719 gen_frame_mem (BLKmode,
13720 gen_rtx_PRE_DEC (BLKmode,
13721 stack_pointer_rtx)),
13722 gen_rtx_UNSPEC (BLKmode,
13723 gen_rtvec (1, reg),
13724 UNSPEC_PUSH_MULT));
13725 tmp = gen_rtx_SET (VOIDmode,
13726 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
13727 RTX_FRAME_RELATED_P (tmp) = 1;
13728 XVECEXP (dwarf, 0, 1) = tmp;
13730 for (i = 1; i < count; i++)
13732 reg = gen_rtx_REG (XFmode, base_reg++);
13733 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13735 tmp = gen_rtx_SET (VOIDmode,
13736 gen_frame_mem (XFmode,
13737 plus_constant (stack_pointer_rtx,
13740 RTX_FRAME_RELATED_P (tmp) = 1;
13741 XVECEXP (dwarf, 0, i + 1) = tmp;
13744 tmp = gen_rtx_SET (VOIDmode,
13746 plus_constant (stack_pointer_rtx, -12 * count));
13748 RTX_FRAME_RELATED_P (tmp) = 1;
13749 XVECEXP (dwarf, 0, 0) = tmp;
13751 par = emit_insn (par);
13752 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13758 /* Return true if the current function needs to save/restore LR. */
13761 thumb_force_lr_save (void)
13763 return !cfun->machine->lr_save_eliminated
13764 && (!leaf_function_p ()
13765 || thumb_far_jump_used_p ()
13766 || df_regs_ever_live_p (LR_REGNUM));
13770 /* Compute the distance from register FROM to register TO.
13771 These can be the arg pointer (26), the soft frame pointer (25),
13772 the stack pointer (13) or the hard frame pointer (11).
13773 In thumb mode r7 is used as the soft frame pointer, if needed.
13774 Typical stack layout looks like this:
13776 old stack pointer -> | |
13779 | | saved arguments for
13780 | | vararg functions
13783 hard FP & arg pointer -> | | \
13791 soft frame pointer -> | | /
13796 locals base pointer -> | | /
13801 current stack pointer -> | | /
13804 For a given function some or all of these stack components
13805 may not be needed, giving rise to the possibility of
13806 eliminating some of the registers.
13808 The values returned by this function must reflect the behavior
13809 of arm_expand_prologue() and arm_compute_save_reg_mask().
13811 The sign of the number returned reflects the direction of stack
13812 growth, so the values are positive for all eliminations except
13813 from the soft frame pointer to the hard frame pointer.
13815 SFP may point just inside the local variables block to ensure correct
13819 /* Calculate stack offsets. These are used to calculate register elimination
13820 offsets and in prologue/epilogue code. Also calculates which registers
13821 should be saved. */
13823 static arm_stack_offsets *
13824 arm_get_frame_offsets (void)
13826 struct arm_stack_offsets *offsets;
13827 unsigned long func_type;
13831 HOST_WIDE_INT frame_size;
13834 offsets = &cfun->machine->stack_offsets;
13836 /* We need to know if we are a leaf function. Unfortunately, it
13837 is possible to be called after start_sequence has been called,
13838 which causes get_insns to return the insns for the sequence,
13839 not the function, which will cause leaf_function_p to return
13840 the incorrect result.
13842 to know about leaf functions once reload has completed, and the
13843 frame size cannot be changed after that time, so we can safely
13844 use the cached value. */
13846 if (reload_completed)
13849 /* Initially this is the size of the local variables. It will translated
13850 into an offset once we have determined the size of preceding data. */
13851 frame_size = ROUND_UP_WORD (get_frame_size ());
13853 leaf = leaf_function_p ();
13855 /* Space for variadic functions. */
13856 offsets->saved_args = crtl->args.pretend_args_size;
13858 /* In Thumb mode this is incorrect, but never used. */
13859 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
13860 arm_compute_static_chain_stack_bytes();
13864 unsigned int regno;
13866 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
13867 core_saved = bit_count (offsets->saved_regs_mask) * 4;
13868 saved = core_saved;
13870 /* We know that SP will be doubleword aligned on entry, and we must
13871 preserve that condition at any subroutine call. We also require the
13872 soft frame pointer to be doubleword aligned. */
13874 if (TARGET_REALLY_IWMMXT)
13876 /* Check for the call-saved iWMMXt registers. */
13877 for (regno = FIRST_IWMMXT_REGNUM;
13878 regno <= LAST_IWMMXT_REGNUM;
13880 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
13884 func_type = arm_current_func_type ();
13885 if (! IS_VOLATILE (func_type))
13887 /* Space for saved FPA registers. */
13888 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
13889 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
13892 /* Space for saved VFP registers. */
13893 if (TARGET_HARD_FLOAT && TARGET_VFP)
13894 saved += arm_get_vfp_saved_size ();
13897 else /* TARGET_THUMB1 */
13899 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
13900 core_saved = bit_count (offsets->saved_regs_mask) * 4;
13901 saved = core_saved;
13902 if (TARGET_BACKTRACE)
13906 /* Saved registers include the stack frame. */
13907 offsets->saved_regs = offsets->saved_args + saved +
13908 arm_compute_static_chain_stack_bytes();
13909 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
13910 /* A leaf function does not need any stack alignment if it has nothing
13912 if (leaf && frame_size == 0)
13914 offsets->outgoing_args = offsets->soft_frame;
13915 offsets->locals_base = offsets->soft_frame;
13919 /* Ensure SFP has the correct alignment. */
13920 if (ARM_DOUBLEWORD_ALIGN
13921 && (offsets->soft_frame & 7))
13923 offsets->soft_frame += 4;
13924 /* Try to align stack by pushing an extra reg. Don't bother doing this
13925 when there is a stack frame as the alignment will be rolled into
13926 the normal stack adjustment. */
13927 if (frame_size + crtl->outgoing_args_size == 0)
13931 /* If it is safe to use r3, then do so. This sometimes
13932 generates better code on Thumb-2 by avoiding the need to
13933 use 32-bit push/pop instructions. */
13934 if (!crtl->tail_call_emit
13935 && arm_size_return_regs () <= 12)
13940 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
13942 if ((offsets->saved_regs_mask & (1 << i)) == 0)
13951 offsets->saved_regs += 4;
13952 offsets->saved_regs_mask |= (1 << reg);
13957 offsets->locals_base = offsets->soft_frame + frame_size;
13958 offsets->outgoing_args = (offsets->locals_base
13959 + crtl->outgoing_args_size);
13961 if (ARM_DOUBLEWORD_ALIGN)
13963 /* Ensure SP remains doubleword aligned. */
13964 if (offsets->outgoing_args & 7)
13965 offsets->outgoing_args += 4;
13966 gcc_assert (!(offsets->outgoing_args & 7));
13973 /* Calculate the relative offsets for the different stack pointers. Positive
13974 offsets are in the direction of stack growth. */
13977 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13979 arm_stack_offsets *offsets;
13981 offsets = arm_get_frame_offsets ();
13983 /* OK, now we have enough information to compute the distances.
13984 There must be an entry in these switch tables for each pair
13985 of registers in ELIMINABLE_REGS, even if some of the entries
13986 seem to be redundant or useless. */
13989 case ARG_POINTER_REGNUM:
13992 case THUMB_HARD_FRAME_POINTER_REGNUM:
13995 case FRAME_POINTER_REGNUM:
13996 /* This is the reverse of the soft frame pointer
13997 to hard frame pointer elimination below. */
13998 return offsets->soft_frame - offsets->saved_args;
14000 case ARM_HARD_FRAME_POINTER_REGNUM:
14001 /* This is only non-zero in the case where the static chain register
14002 is stored above the frame. */
14003 return offsets->frame - offsets->saved_args - 4;
14005 case STACK_POINTER_REGNUM:
14006 /* If nothing has been pushed on the stack at all
14007 then this will return -4. This *is* correct! */
14008 return offsets->outgoing_args - (offsets->saved_args + 4);
14011 gcc_unreachable ();
14013 gcc_unreachable ();
14015 case FRAME_POINTER_REGNUM:
14018 case THUMB_HARD_FRAME_POINTER_REGNUM:
14021 case ARM_HARD_FRAME_POINTER_REGNUM:
14022 /* The hard frame pointer points to the top entry in the
14023 stack frame. The soft frame pointer to the bottom entry
14024 in the stack frame. If there is no stack frame at all,
14025 then they are identical. */
14027 return offsets->frame - offsets->soft_frame;
14029 case STACK_POINTER_REGNUM:
14030 return offsets->outgoing_args - offsets->soft_frame;
14033 gcc_unreachable ();
14035 gcc_unreachable ();
14038 /* You cannot eliminate from the stack pointer.
14039 In theory you could eliminate from the hard frame
14040 pointer to the stack pointer, but this will never
14041 happen, since if a stack frame is not needed the
14042 hard frame pointer will never be used. */
14043 gcc_unreachable ();
14048 /* Emit RTL to save coprocessor registers on function entry. Returns the
14049 number of bytes pushed. */
14052 arm_save_coproc_regs(void)
14054 int saved_size = 0;
14056 unsigned start_reg;
14059 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14060 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14062 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14063 insn = gen_rtx_MEM (V2SImode, insn);
14064 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14065 RTX_FRAME_RELATED_P (insn) = 1;
14069 /* Save any floating point call-saved registers used by this
14071 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
14073 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14074 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14076 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14077 insn = gen_rtx_MEM (XFmode, insn);
14078 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14079 RTX_FRAME_RELATED_P (insn) = 1;
14085 start_reg = LAST_FPA_REGNUM;
14087 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14089 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14091 if (start_reg - reg == 3)
14093 insn = emit_sfm (reg, 4);
14094 RTX_FRAME_RELATED_P (insn) = 1;
14096 start_reg = reg - 1;
14101 if (start_reg != reg)
14103 insn = emit_sfm (reg + 1, start_reg - reg);
14104 RTX_FRAME_RELATED_P (insn) = 1;
14105 saved_size += (start_reg - reg) * 12;
14107 start_reg = reg - 1;
14111 if (start_reg != reg)
14113 insn = emit_sfm (reg + 1, start_reg - reg);
14114 saved_size += (start_reg - reg) * 12;
14115 RTX_FRAME_RELATED_P (insn) = 1;
14118 if (TARGET_HARD_FLOAT && TARGET_VFP)
14120 start_reg = FIRST_VFP_REGNUM;
14122 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14124 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14125 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14127 if (start_reg != reg)
14128 saved_size += vfp_emit_fstmd (start_reg,
14129 (reg - start_reg) / 2);
14130 start_reg = reg + 2;
14133 if (start_reg != reg)
14134 saved_size += vfp_emit_fstmd (start_reg,
14135 (reg - start_reg) / 2);
14141 /* Set the Thumb frame pointer from the stack pointer. */
14144 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14146 HOST_WIDE_INT amount;
14149 amount = offsets->outgoing_args - offsets->locals_base;
14151 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14152 stack_pointer_rtx, GEN_INT (amount)));
14155 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14156 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14157 expects the first two operands to be the same. */
14160 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14162 hard_frame_pointer_rtx));
14166 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14167 hard_frame_pointer_rtx,
14168 stack_pointer_rtx));
14170 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14171 plus_constant (stack_pointer_rtx, amount));
14172 RTX_FRAME_RELATED_P (dwarf) = 1;
14173 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14176 RTX_FRAME_RELATED_P (insn) = 1;
14179 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14182 arm_expand_prologue (void)
14187 unsigned long live_regs_mask;
14188 unsigned long func_type;
14190 int saved_pretend_args = 0;
14191 int saved_regs = 0;
14192 unsigned HOST_WIDE_INT args_to_push;
14193 arm_stack_offsets *offsets;
14195 func_type = arm_current_func_type ();
14197 /* Naked functions don't have prologues. */
14198 if (IS_NAKED (func_type))
14201 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14202 args_to_push = crtl->args.pretend_args_size;
14204 /* Compute which register we will have to save onto the stack. */
14205 offsets = arm_get_frame_offsets ();
14206 live_regs_mask = offsets->saved_regs_mask;
14208 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14210 if (IS_STACKALIGN (func_type))
14215 /* Handle a word-aligned stack pointer. We generate the following:
14220 <save and restore r0 in normal prologue/epilogue>
14224 The unwinder doesn't need to know about the stack realignment.
14225 Just tell it we saved SP in r0. */
14226 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14228 r0 = gen_rtx_REG (SImode, 0);
14229 r1 = gen_rtx_REG (SImode, 1);
14230 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14231 compiler won't choke. */
14232 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14233 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14234 insn = gen_movsi (r0, stack_pointer_rtx);
14235 RTX_FRAME_RELATED_P (insn) = 1;
14236 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14238 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14239 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14242 /* For APCS frames, if IP register is clobbered
14243 when creating frame, save that register in a special
14245 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14247 if (IS_INTERRUPT (func_type))
14249 /* Interrupt functions must not corrupt any registers.
14250 Creating a frame pointer however, corrupts the IP
14251 register, so we must push it first. */
14252 insn = emit_multi_reg_push (1 << IP_REGNUM);
14254 /* Do not set RTX_FRAME_RELATED_P on this insn.
14255 The dwarf stack unwinding code only wants to see one
14256 stack decrement per function, and this is not it. If
14257 this instruction is labeled as being part of the frame
14258 creation sequence then dwarf2out_frame_debug_expr will
14259 die when it encounters the assignment of IP to FP
14260 later on, since the use of SP here establishes SP as
14261 the CFA register and not IP.
14263 Anyway this instruction is not really part of the stack
14264 frame creation although it is part of the prologue. */
14266 else if (IS_NESTED (func_type))
14268 /* The Static chain register is the same as the IP register
14269 used as a scratch register during stack frame creation.
14270 To get around this need to find somewhere to store IP
14271 whilst the frame is being created. We try the following
14274 1. The last argument register.
14275 2. A slot on the stack above the frame. (This only
14276 works if the function is not a varargs function).
14277 3. Register r3, after pushing the argument registers
14280 Note - we only need to tell the dwarf2 backend about the SP
14281 adjustment in the second variant; the static chain register
14282 doesn't need to be unwound, as it doesn't contain a value
14283 inherited from the caller. */
14285 if (df_regs_ever_live_p (3) == false)
14286 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14287 else if (args_to_push == 0)
14291 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14294 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14295 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14298 /* Just tell the dwarf backend that we adjusted SP. */
14299 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14300 plus_constant (stack_pointer_rtx,
14302 RTX_FRAME_RELATED_P (insn) = 1;
14303 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14307 /* Store the args on the stack. */
14308 if (cfun->machine->uses_anonymous_args)
14309 insn = emit_multi_reg_push
14310 ((0xf0 >> (args_to_push / 4)) & 0xf);
14313 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14314 GEN_INT (- args_to_push)));
14316 RTX_FRAME_RELATED_P (insn) = 1;
14318 saved_pretend_args = 1;
14319 fp_offset = args_to_push;
14322 /* Now reuse r3 to preserve IP. */
14323 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14327 insn = emit_set_insn (ip_rtx,
14328 plus_constant (stack_pointer_rtx, fp_offset));
14329 RTX_FRAME_RELATED_P (insn) = 1;
14334 /* Push the argument registers, or reserve space for them. */
14335 if (cfun->machine->uses_anonymous_args)
14336 insn = emit_multi_reg_push
14337 ((0xf0 >> (args_to_push / 4)) & 0xf);
14340 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14341 GEN_INT (- args_to_push)));
14342 RTX_FRAME_RELATED_P (insn) = 1;
14345 /* If this is an interrupt service routine, and the link register
14346 is going to be pushed, and we're not generating extra
14347 push of IP (needed when frame is needed and frame layout if apcs),
14348 subtracting four from LR now will mean that the function return
14349 can be done with a single instruction. */
14350 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14351 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14352 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14355 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14357 emit_set_insn (lr, plus_constant (lr, -4));
14360 if (live_regs_mask)
14362 saved_regs += bit_count (live_regs_mask) * 4;
14363 if (optimize_size && !frame_pointer_needed
14364 && saved_regs == offsets->saved_regs - offsets->saved_args)
14366 /* If no coprocessor registers are being pushed and we don't have
14367 to worry about a frame pointer then push extra registers to
14368 create the stack frame. This is done is a way that does not
14369 alter the frame layout, so is independent of the epilogue. */
14373 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14375 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14376 if (frame && n * 4 >= frame)
14379 live_regs_mask |= (1 << n) - 1;
14380 saved_regs += frame;
14383 insn = emit_multi_reg_push (live_regs_mask);
14384 RTX_FRAME_RELATED_P (insn) = 1;
14387 if (! IS_VOLATILE (func_type))
14388 saved_regs += arm_save_coproc_regs ();
14390 if (frame_pointer_needed && TARGET_ARM)
14392 /* Create the new frame pointer. */
14393 if (TARGET_APCS_FRAME)
14395 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14396 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14397 RTX_FRAME_RELATED_P (insn) = 1;
14399 if (IS_NESTED (func_type))
14401 /* Recover the static chain register. */
14402 if (!df_regs_ever_live_p (3)
14403 || saved_pretend_args)
14404 insn = gen_rtx_REG (SImode, 3);
14405 else /* if (crtl->args.pretend_args_size == 0) */
14407 insn = plus_constant (hard_frame_pointer_rtx, 4);
14408 insn = gen_frame_mem (SImode, insn);
14410 emit_set_insn (ip_rtx, insn);
14411 /* Add a USE to stop propagate_one_insn() from barfing. */
14412 emit_insn (gen_prologue_use (ip_rtx));
14417 insn = GEN_INT (saved_regs - 4);
14418 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14419 stack_pointer_rtx, insn));
14420 RTX_FRAME_RELATED_P (insn) = 1;
14424 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14426 /* This add can produce multiple insns for a large constant, so we
14427 need to get tricky. */
14428 rtx last = get_last_insn ();
14430 amount = GEN_INT (offsets->saved_args + saved_regs
14431 - offsets->outgoing_args);
14433 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14437 last = last ? NEXT_INSN (last) : get_insns ();
14438 RTX_FRAME_RELATED_P (last) = 1;
14440 while (last != insn);
14442 /* If the frame pointer is needed, emit a special barrier that
14443 will prevent the scheduler from moving stores to the frame
14444 before the stack adjustment. */
14445 if (frame_pointer_needed)
14446 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14447 hard_frame_pointer_rtx));
14451 if (frame_pointer_needed && TARGET_THUMB2)
14452 thumb_set_frame_pointer (offsets);
14454 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14456 unsigned long mask;
14458 mask = live_regs_mask;
14459 mask &= THUMB2_WORK_REGS;
14460 if (!IS_NESTED (func_type))
14461 mask |= (1 << IP_REGNUM);
14462 arm_load_pic_register (mask);
14465 /* If we are profiling, make sure no instructions are scheduled before
14466 the call to mcount. Similarly if the user has requested no
14467 scheduling in the prolog. Similarly if we want non-call exceptions
14468 using the EABI unwinder, to prevent faulting instructions from being
14469 swapped with a stack adjustment. */
14470 if (crtl->profile || !TARGET_SCHED_PROLOG
14471 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14472 emit_insn (gen_blockage ());
14474 /* If the link register is being kept alive, with the return address in it,
14475 then make sure that it does not get reused by the ce2 pass. */
14476 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14477 cfun->machine->lr_save_eliminated = 1;
14480 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14482 arm_print_condition (FILE *stream)
14484 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14486 /* Branch conversion is not implemented for Thumb-2. */
14489 output_operand_lossage ("predicated Thumb instruction");
14492 if (current_insn_predicate != NULL)
14494 output_operand_lossage
14495 ("predicated instruction in conditional sequence");
14499 fputs (arm_condition_codes[arm_current_cc], stream);
14501 else if (current_insn_predicate)
14503 enum arm_cond_code code;
14507 output_operand_lossage ("predicated Thumb instruction");
14511 code = get_arm_condition_code (current_insn_predicate);
14512 fputs (arm_condition_codes[code], stream);
14517 /* If CODE is 'd', then the X is a condition operand and the instruction
14518 should only be executed if the condition is true.
14519 if CODE is 'D', then the X is a condition operand and the instruction
14520 should only be executed if the condition is false: however, if the mode
14521 of the comparison is CCFPEmode, then always execute the instruction -- we
14522 do this because in these circumstances !GE does not necessarily imply LT;
14523 in these cases the instruction pattern will take care to make sure that
14524 an instruction containing %d will follow, thereby undoing the effects of
14525 doing this instruction unconditionally.
14526 If CODE is 'N' then X is a floating point operand that must be negated
14528 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14529 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14531 arm_print_operand (FILE *stream, rtx x, int code)
14536 fputs (ASM_COMMENT_START, stream);
14540 fputs (user_label_prefix, stream);
14544 fputs (REGISTER_PREFIX, stream);
14548 arm_print_condition (stream);
14552 /* Nothing in unified syntax, otherwise the current condition code. */
14553 if (!TARGET_UNIFIED_ASM)
14554 arm_print_condition (stream);
14558 /* The current condition code in unified syntax, otherwise nothing. */
14559 if (TARGET_UNIFIED_ASM)
14560 arm_print_condition (stream);
14564 /* The current condition code for a condition code setting instruction.
14565 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14566 if (TARGET_UNIFIED_ASM)
14568 fputc('s', stream);
14569 arm_print_condition (stream);
14573 arm_print_condition (stream);
14574 fputc('s', stream);
14579 /* If the instruction is conditionally executed then print
14580 the current condition code, otherwise print 's'. */
14581 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14582 if (current_insn_predicate)
14583 arm_print_condition (stream);
14585 fputc('s', stream);
14588 /* %# is a "break" sequence. It doesn't output anything, but is used to
14589 separate e.g. operand numbers from following text, if that text consists
14590 of further digits which we don't want to be part of the operand
14598 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14599 r = REAL_VALUE_NEGATE (r);
14600 fprintf (stream, "%s", fp_const_from_val (&r));
14604 /* An integer or symbol address without a preceding # sign. */
14606 switch (GET_CODE (x))
14609 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14613 output_addr_const (stream, x);
14617 gcc_unreachable ();
14622 if (GET_CODE (x) == CONST_INT)
14625 val = ARM_SIGN_EXTEND (~INTVAL (x));
14626 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14630 putc ('~', stream);
14631 output_addr_const (stream, x);
14636 /* The low 16 bits of an immediate constant. */
14637 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
14641 fprintf (stream, "%s", arithmetic_instr (x, 1));
14644 /* Truncate Cirrus shift counts. */
14646 if (GET_CODE (x) == CONST_INT)
14648 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
14651 arm_print_operand (stream, x, 0);
14655 fprintf (stream, "%s", arithmetic_instr (x, 0));
14663 if (!shift_operator (x, SImode))
14665 output_operand_lossage ("invalid shift operand");
14669 shift = shift_op (x, &val);
14673 fprintf (stream, ", %s ", shift);
14675 arm_print_operand (stream, XEXP (x, 1), 0);
14677 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
14682 /* An explanation of the 'Q', 'R' and 'H' register operands:
14684 In a pair of registers containing a DI or DF value the 'Q'
14685 operand returns the register number of the register containing
14686 the least significant part of the value. The 'R' operand returns
14687 the register number of the register containing the most
14688 significant part of the value.
14690 The 'H' operand returns the higher of the two register numbers.
14691 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
14692 same as the 'Q' operand, since the most significant part of the
14693 value is held in the lower number register. The reverse is true
14694 on systems where WORDS_BIG_ENDIAN is false.
14696 The purpose of these operands is to distinguish between cases
14697 where the endian-ness of the values is important (for example
14698 when they are added together), and cases where the endian-ness
14699 is irrelevant, but the order of register operations is important.
14700 For example when loading a value from memory into a register
14701 pair, the endian-ness does not matter. Provided that the value
14702 from the lower memory address is put into the lower numbered
14703 register, and the value from the higher address is put into the
14704 higher numbered register, the load will work regardless of whether
14705 the value being loaded is big-wordian or little-wordian. The
14706 order of the two register loads can matter however, if the address
14707 of the memory location is actually held in one of the registers
14708 being overwritten by the load. */
14710 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14712 output_operand_lossage ("invalid operand for code '%c'", code);
14716 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
14720 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14722 output_operand_lossage ("invalid operand for code '%c'", code);
14726 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
14730 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14732 output_operand_lossage ("invalid operand for code '%c'", code);
14736 asm_fprintf (stream, "%r", REGNO (x) + 1);
14740 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14742 output_operand_lossage ("invalid operand for code '%c'", code);
14746 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
14750 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14752 output_operand_lossage ("invalid operand for code '%c'", code);
14756 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
14760 asm_fprintf (stream, "%r",
14761 GET_CODE (XEXP (x, 0)) == REG
14762 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
14766 asm_fprintf (stream, "{%r-%r}",
14768 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
14771 /* Like 'M', but writing doubleword vector registers, for use by Neon
14775 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
14776 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
14778 asm_fprintf (stream, "{d%d}", regno);
14780 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
14785 /* CONST_TRUE_RTX means always -- that's the default. */
14786 if (x == const_true_rtx)
14789 if (!COMPARISON_P (x))
14791 output_operand_lossage ("invalid operand for code '%c'", code);
14795 fputs (arm_condition_codes[get_arm_condition_code (x)],
14800 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
14801 want to do that. */
14802 if (x == const_true_rtx)
14804 output_operand_lossage ("instruction never executed");
14807 if (!COMPARISON_P (x))
14809 output_operand_lossage ("invalid operand for code '%c'", code);
14813 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
14814 (get_arm_condition_code (x))],
14818 /* Cirrus registers can be accessed in a variety of ways:
14819 single floating point (f)
14820 double floating point (d)
14822 64bit integer (dx). */
14823 case 'W': /* Cirrus register in F mode. */
14824 case 'X': /* Cirrus register in D mode. */
14825 case 'Y': /* Cirrus register in FX mode. */
14826 case 'Z': /* Cirrus register in DX mode. */
14827 gcc_assert (GET_CODE (x) == REG
14828 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
14830 fprintf (stream, "mv%s%s",
14832 : code == 'X' ? "d"
14833 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
14837 /* Print cirrus register in the mode specified by the register's mode. */
14840 int mode = GET_MODE (x);
14842 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
14844 output_operand_lossage ("invalid operand for code '%c'", code);
14848 fprintf (stream, "mv%s%s",
14849 mode == DFmode ? "d"
14850 : mode == SImode ? "fx"
14851 : mode == DImode ? "dx"
14852 : "f", reg_names[REGNO (x)] + 2);
14858 if (GET_CODE (x) != REG
14859 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
14860 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
14861 /* Bad value for wCG register number. */
14863 output_operand_lossage ("invalid operand for code '%c'", code);
14868 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
14871 /* Print an iWMMXt control register name. */
14873 if (GET_CODE (x) != CONST_INT
14875 || INTVAL (x) >= 16)
14876 /* Bad value for wC register number. */
14878 output_operand_lossage ("invalid operand for code '%c'", code);
14884 static const char * wc_reg_names [16] =
14886 "wCID", "wCon", "wCSSF", "wCASF",
14887 "wC4", "wC5", "wC6", "wC7",
14888 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
14889 "wC12", "wC13", "wC14", "wC15"
14892 fprintf (stream, wc_reg_names [INTVAL (x)]);
14896 /* Print a VFP/Neon double precision or quad precision register name. */
14900 int mode = GET_MODE (x);
14901 int is_quad = (code == 'q');
14904 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
14906 output_operand_lossage ("invalid operand for code '%c'", code);
14910 if (GET_CODE (x) != REG
14911 || !IS_VFP_REGNUM (REGNO (x)))
14913 output_operand_lossage ("invalid operand for code '%c'", code);
14918 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
14919 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
14921 output_operand_lossage ("invalid operand for code '%c'", code);
14925 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
14926 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
14930 /* These two codes print the low/high doubleword register of a Neon quad
14931 register, respectively. For pair-structure types, can also print
14932 low/high quadword registers. */
14936 int mode = GET_MODE (x);
14939 if ((GET_MODE_SIZE (mode) != 16
14940 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
14942 output_operand_lossage ("invalid operand for code '%c'", code);
14947 if (!NEON_REGNO_OK_FOR_QUAD (regno))
14949 output_operand_lossage ("invalid operand for code '%c'", code);
14953 if (GET_MODE_SIZE (mode) == 16)
14954 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
14955 + (code == 'f' ? 1 : 0));
14957 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
14958 + (code == 'f' ? 1 : 0));
14962 /* Print a VFPv3 floating-point constant, represented as an integer
14966 int index = vfp3_const_double_index (x);
14967 gcc_assert (index != -1);
14968 fprintf (stream, "%d", index);
14972 /* Print bits representing opcode features for Neon.
14974 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
14975 and polynomials as unsigned.
14977 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
14979 Bit 2 is 1 for rounding functions, 0 otherwise. */
14981 /* Identify the type as 's', 'u', 'p' or 'f'. */
14984 HOST_WIDE_INT bits = INTVAL (x);
14985 fputc ("uspf"[bits & 3], stream);
14989 /* Likewise, but signed and unsigned integers are both 'i'. */
14992 HOST_WIDE_INT bits = INTVAL (x);
14993 fputc ("iipf"[bits & 3], stream);
14997 /* As for 'T', but emit 'u' instead of 'p'. */
15000 HOST_WIDE_INT bits = INTVAL (x);
15001 fputc ("usuf"[bits & 3], stream);
15005 /* Bit 2: rounding (vs none). */
15008 HOST_WIDE_INT bits = INTVAL (x);
15009 fputs ((bits & 4) != 0 ? "r" : "", stream);
15013 /* Memory operand for vld1/vst1 instruction. */
15017 bool postinc = FALSE;
15018 gcc_assert (GET_CODE (x) == MEM);
15019 addr = XEXP (x, 0);
15020 if (GET_CODE (addr) == POST_INC)
15023 addr = XEXP (addr, 0);
15025 asm_fprintf (stream, "[%r]", REGNO (addr));
15027 fputs("!", stream);
15031 /* Register specifier for vld1.16/vst1.16. Translate the S register
15032 number into a D register number and element index. */
15035 int mode = GET_MODE (x);
15038 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15040 output_operand_lossage ("invalid operand for code '%c'", code);
15045 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15047 output_operand_lossage ("invalid operand for code '%c'", code);
15051 regno = regno - FIRST_VFP_REGNUM;
15052 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15059 output_operand_lossage ("missing operand");
15063 switch (GET_CODE (x))
15066 asm_fprintf (stream, "%r", REGNO (x));
15070 output_memory_reference_mode = GET_MODE (x);
15071 output_address (XEXP (x, 0));
15078 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15079 sizeof (fpstr), 0, 1);
15080 fprintf (stream, "#%s", fpstr);
15083 fprintf (stream, "#%s", fp_immediate_constant (x));
15087 gcc_assert (GET_CODE (x) != NEG);
15088 fputc ('#', stream);
15089 if (GET_CODE (x) == HIGH)
15091 fputs (":lower16:", stream);
15095 output_addr_const (stream, x);
15101 /* Target hook for assembling integer objects. The ARM version needs to
15102 handle word-sized values specially. */
15104 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15106 enum machine_mode mode;
15108 if (size == UNITS_PER_WORD && aligned_p)
15110 fputs ("\t.word\t", asm_out_file);
15111 output_addr_const (asm_out_file, x);
15113 /* Mark symbols as position independent. We only do this in the
15114 .text segment, not in the .data segment. */
15115 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15116 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15118 /* See legitimize_pic_address for an explanation of the
15119 TARGET_VXWORKS_RTP check. */
15120 if (TARGET_VXWORKS_RTP
15121 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15122 fputs ("(GOT)", asm_out_file);
15124 fputs ("(GOTOFF)", asm_out_file);
15126 fputc ('\n', asm_out_file);
15130 mode = GET_MODE (x);
15132 if (arm_vector_mode_supported_p (mode))
15136 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15138 units = CONST_VECTOR_NUNITS (x);
15139 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15141 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15142 for (i = 0; i < units; i++)
15144 rtx elt = CONST_VECTOR_ELT (x, i);
15146 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15149 for (i = 0; i < units; i++)
15151 rtx elt = CONST_VECTOR_ELT (x, i);
15152 REAL_VALUE_TYPE rval;
15154 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15157 (rval, GET_MODE_INNER (mode),
15158 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15164 return default_assemble_integer (x, size, aligned_p);
15168 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15172 if (!TARGET_AAPCS_BASED)
15175 default_named_section_asm_out_constructor
15176 : default_named_section_asm_out_destructor) (symbol, priority);
15180 /* Put these in the .init_array section, using a special relocation. */
15181 if (priority != DEFAULT_INIT_PRIORITY)
15184 sprintf (buf, "%s.%.5u",
15185 is_ctor ? ".init_array" : ".fini_array",
15187 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15194 switch_to_section (s);
15195 assemble_align (POINTER_SIZE);
15196 fputs ("\t.word\t", asm_out_file);
15197 output_addr_const (asm_out_file, symbol);
15198 fputs ("(target1)\n", asm_out_file);
15201 /* Add a function to the list of static constructors. */
15204 arm_elf_asm_constructor (rtx symbol, int priority)
15206 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15209 /* Add a function to the list of static destructors. */
15212 arm_elf_asm_destructor (rtx symbol, int priority)
15214 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15217 /* A finite state machine takes care of noticing whether or not instructions
15218 can be conditionally executed, and thus decrease execution time and code
15219 size by deleting branch instructions. The fsm is controlled by
15220 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15222 /* The state of the fsm controlling condition codes are:
15223 0: normal, do nothing special
15224 1: make ASM_OUTPUT_OPCODE not output this instruction
15225 2: make ASM_OUTPUT_OPCODE not output this instruction
15226 3: make instructions conditional
15227 4: make instructions conditional
15229 State transitions (state->state by whom under condition):
15230 0 -> 1 final_prescan_insn if the `target' is a label
15231 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15232 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15233 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15234 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15235 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15236 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15237 (the target insn is arm_target_insn).
15239 If the jump clobbers the conditions then we use states 2 and 4.
15241 A similar thing can be done with conditional return insns.
15243 XXX In case the `target' is an unconditional branch, this conditionalising
15244 of the instructions always reduces code size, but not always execution
15245 time. But then, I want to reduce the code size to somewhere near what
15246 /bin/cc produces. */
15248 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15249 instructions. When a COND_EXEC instruction is seen the subsequent
15250 instructions are scanned so that multiple conditional instructions can be
15251 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15252 specify the length and true/false mask for the IT block. These will be
15253 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15255 /* Returns the index of the ARM condition code string in
15256 `arm_condition_codes'. COMPARISON should be an rtx like
15257 `(eq (...) (...))'. */
15258 static enum arm_cond_code
15259 get_arm_condition_code (rtx comparison)
15261 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15262 enum arm_cond_code code;
15263 enum rtx_code comp_code = GET_CODE (comparison);
15265 if (GET_MODE_CLASS (mode) != MODE_CC)
15266 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15267 XEXP (comparison, 1));
15271 case CC_DNEmode: code = ARM_NE; goto dominance;
15272 case CC_DEQmode: code = ARM_EQ; goto dominance;
15273 case CC_DGEmode: code = ARM_GE; goto dominance;
15274 case CC_DGTmode: code = ARM_GT; goto dominance;
15275 case CC_DLEmode: code = ARM_LE; goto dominance;
15276 case CC_DLTmode: code = ARM_LT; goto dominance;
15277 case CC_DGEUmode: code = ARM_CS; goto dominance;
15278 case CC_DGTUmode: code = ARM_HI; goto dominance;
15279 case CC_DLEUmode: code = ARM_LS; goto dominance;
15280 case CC_DLTUmode: code = ARM_CC;
15283 gcc_assert (comp_code == EQ || comp_code == NE);
15285 if (comp_code == EQ)
15286 return ARM_INVERSE_CONDITION_CODE (code);
15292 case NE: return ARM_NE;
15293 case EQ: return ARM_EQ;
15294 case GE: return ARM_PL;
15295 case LT: return ARM_MI;
15296 default: gcc_unreachable ();
15302 case NE: return ARM_NE;
15303 case EQ: return ARM_EQ;
15304 default: gcc_unreachable ();
15310 case NE: return ARM_MI;
15311 case EQ: return ARM_PL;
15312 default: gcc_unreachable ();
15317 /* These encodings assume that AC=1 in the FPA system control
15318 byte. This allows us to handle all cases except UNEQ and
15322 case GE: return ARM_GE;
15323 case GT: return ARM_GT;
15324 case LE: return ARM_LS;
15325 case LT: return ARM_MI;
15326 case NE: return ARM_NE;
15327 case EQ: return ARM_EQ;
15328 case ORDERED: return ARM_VC;
15329 case UNORDERED: return ARM_VS;
15330 case UNLT: return ARM_LT;
15331 case UNLE: return ARM_LE;
15332 case UNGT: return ARM_HI;
15333 case UNGE: return ARM_PL;
15334 /* UNEQ and LTGT do not have a representation. */
15335 case UNEQ: /* Fall through. */
15336 case LTGT: /* Fall through. */
15337 default: gcc_unreachable ();
15343 case NE: return ARM_NE;
15344 case EQ: return ARM_EQ;
15345 case GE: return ARM_LE;
15346 case GT: return ARM_LT;
15347 case LE: return ARM_GE;
15348 case LT: return ARM_GT;
15349 case GEU: return ARM_LS;
15350 case GTU: return ARM_CC;
15351 case LEU: return ARM_CS;
15352 case LTU: return ARM_HI;
15353 default: gcc_unreachable ();
15359 case LTU: return ARM_CS;
15360 case GEU: return ARM_CC;
15361 default: gcc_unreachable ();
15367 case NE: return ARM_NE;
15368 case EQ: return ARM_EQ;
15369 case GE: return ARM_GE;
15370 case GT: return ARM_GT;
15371 case LE: return ARM_LE;
15372 case LT: return ARM_LT;
15373 case GEU: return ARM_CS;
15374 case GTU: return ARM_HI;
15375 case LEU: return ARM_LS;
15376 case LTU: return ARM_CC;
15377 default: gcc_unreachable ();
15380 default: gcc_unreachable ();
15384 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15387 thumb2_final_prescan_insn (rtx insn)
15389 rtx first_insn = insn;
15390 rtx body = PATTERN (insn);
15392 enum arm_cond_code code;
15396 /* Remove the previous insn from the count of insns to be output. */
15397 if (arm_condexec_count)
15398 arm_condexec_count--;
15400 /* Nothing to do if we are already inside a conditional block. */
15401 if (arm_condexec_count)
15404 if (GET_CODE (body) != COND_EXEC)
15407 /* Conditional jumps are implemented directly. */
15408 if (GET_CODE (insn) == JUMP_INSN)
15411 predicate = COND_EXEC_TEST (body);
15412 arm_current_cc = get_arm_condition_code (predicate);
15414 n = get_attr_ce_count (insn);
15415 arm_condexec_count = 1;
15416 arm_condexec_mask = (1 << n) - 1;
15417 arm_condexec_masklen = n;
15418 /* See if subsequent instructions can be combined into the same block. */
15421 insn = next_nonnote_insn (insn);
15423 /* Jumping into the middle of an IT block is illegal, so a label or
15424 barrier terminates the block. */
15425 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15428 body = PATTERN (insn);
15429 /* USE and CLOBBER aren't really insns, so just skip them. */
15430 if (GET_CODE (body) == USE
15431 || GET_CODE (body) == CLOBBER)
15434 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15435 if (GET_CODE (body) != COND_EXEC)
15437 /* Allow up to 4 conditionally executed instructions in a block. */
15438 n = get_attr_ce_count (insn);
15439 if (arm_condexec_masklen + n > 4)
15442 predicate = COND_EXEC_TEST (body);
15443 code = get_arm_condition_code (predicate);
15444 mask = (1 << n) - 1;
15445 if (arm_current_cc == code)
15446 arm_condexec_mask |= (mask << arm_condexec_masklen);
15447 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15450 arm_condexec_count++;
15451 arm_condexec_masklen += n;
15453 /* A jump must be the last instruction in a conditional block. */
15454 if (GET_CODE(insn) == JUMP_INSN)
15457 /* Restore recog_data (getting the attributes of other insns can
15458 destroy this array, but final.c assumes that it remains intact
15459 across this call). */
15460 extract_constrain_insn_cached (first_insn);
15464 arm_final_prescan_insn (rtx insn)
15466 /* BODY will hold the body of INSN. */
15467 rtx body = PATTERN (insn);
15469 /* This will be 1 if trying to repeat the trick, and things need to be
15470 reversed if it appears to fail. */
15473 /* If we start with a return insn, we only succeed if we find another one. */
15474 int seeking_return = 0;
15476 /* START_INSN will hold the insn from where we start looking. This is the
15477 first insn after the following code_label if REVERSE is true. */
15478 rtx start_insn = insn;
15480 /* If in state 4, check if the target branch is reached, in order to
15481 change back to state 0. */
15482 if (arm_ccfsm_state == 4)
15484 if (insn == arm_target_insn)
15486 arm_target_insn = NULL;
15487 arm_ccfsm_state = 0;
15492 /* If in state 3, it is possible to repeat the trick, if this insn is an
15493 unconditional branch to a label, and immediately following this branch
15494 is the previous target label which is only used once, and the label this
15495 branch jumps to is not too far off. */
15496 if (arm_ccfsm_state == 3)
15498 if (simplejump_p (insn))
15500 start_insn = next_nonnote_insn (start_insn);
15501 if (GET_CODE (start_insn) == BARRIER)
15503 /* XXX Isn't this always a barrier? */
15504 start_insn = next_nonnote_insn (start_insn);
15506 if (GET_CODE (start_insn) == CODE_LABEL
15507 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15508 && LABEL_NUSES (start_insn) == 1)
15513 else if (GET_CODE (body) == RETURN)
15515 start_insn = next_nonnote_insn (start_insn);
15516 if (GET_CODE (start_insn) == BARRIER)
15517 start_insn = next_nonnote_insn (start_insn);
15518 if (GET_CODE (start_insn) == CODE_LABEL
15519 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15520 && LABEL_NUSES (start_insn) == 1)
15523 seeking_return = 1;
15532 gcc_assert (!arm_ccfsm_state || reverse);
15533 if (GET_CODE (insn) != JUMP_INSN)
15536 /* This jump might be paralleled with a clobber of the condition codes
15537 the jump should always come first */
15538 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15539 body = XVECEXP (body, 0, 0);
15542 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15543 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15546 int fail = FALSE, succeed = FALSE;
15547 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15548 int then_not_else = TRUE;
15549 rtx this_insn = start_insn, label = 0;
15551 /* Register the insn jumped to. */
15554 if (!seeking_return)
15555 label = XEXP (SET_SRC (body), 0);
15557 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15558 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15559 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15561 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15562 then_not_else = FALSE;
15564 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15565 seeking_return = 1;
15566 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15568 seeking_return = 1;
15569 then_not_else = FALSE;
15572 gcc_unreachable ();
15574 /* See how many insns this branch skips, and what kind of insns. If all
15575 insns are okay, and the label or unconditional branch to the same
15576 label is not too far away, succeed. */
15577 for (insns_skipped = 0;
15578 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15582 this_insn = next_nonnote_insn (this_insn);
15586 switch (GET_CODE (this_insn))
15589 /* Succeed if it is the target label, otherwise fail since
15590 control falls in from somewhere else. */
15591 if (this_insn == label)
15593 arm_ccfsm_state = 1;
15601 /* Succeed if the following insn is the target label.
15603 If return insns are used then the last insn in a function
15604 will be a barrier. */
15605 this_insn = next_nonnote_insn (this_insn);
15606 if (this_insn && this_insn == label)
15608 arm_ccfsm_state = 1;
15616 /* The AAPCS says that conditional calls should not be
15617 used since they make interworking inefficient (the
15618 linker can't transform BL<cond> into BLX). That's
15619 only a problem if the machine has BLX. */
15626 /* Succeed if the following insn is the target label, or
15627 if the following two insns are a barrier and the
15629 this_insn = next_nonnote_insn (this_insn);
15630 if (this_insn && GET_CODE (this_insn) == BARRIER)
15631 this_insn = next_nonnote_insn (this_insn);
15633 if (this_insn && this_insn == label
15634 && insns_skipped < max_insns_skipped)
15636 arm_ccfsm_state = 1;
15644 /* If this is an unconditional branch to the same label, succeed.
15645 If it is to another label, do nothing. If it is conditional,
15647 /* XXX Probably, the tests for SET and the PC are
15650 scanbody = PATTERN (this_insn);
15651 if (GET_CODE (scanbody) == SET
15652 && GET_CODE (SET_DEST (scanbody)) == PC)
15654 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
15655 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
15657 arm_ccfsm_state = 2;
15660 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
15663 /* Fail if a conditional return is undesirable (e.g. on a
15664 StrongARM), but still allow this if optimizing for size. */
15665 else if (GET_CODE (scanbody) == RETURN
15666 && !use_return_insn (TRUE, NULL)
15669 else if (GET_CODE (scanbody) == RETURN
15672 arm_ccfsm_state = 2;
15675 else if (GET_CODE (scanbody) == PARALLEL)
15677 switch (get_attr_conds (this_insn))
15687 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
15692 /* Instructions using or affecting the condition codes make it
15694 scanbody = PATTERN (this_insn);
15695 if (!(GET_CODE (scanbody) == SET
15696 || GET_CODE (scanbody) == PARALLEL)
15697 || get_attr_conds (this_insn) != CONDS_NOCOND)
15700 /* A conditional cirrus instruction must be followed by
15701 a non Cirrus instruction. However, since we
15702 conditionalize instructions in this function and by
15703 the time we get here we can't add instructions
15704 (nops), because shorten_branches() has already been
15705 called, we will disable conditionalizing Cirrus
15706 instructions to be safe. */
15707 if (GET_CODE (scanbody) != USE
15708 && GET_CODE (scanbody) != CLOBBER
15709 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
15719 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
15720 arm_target_label = CODE_LABEL_NUMBER (label);
15723 gcc_assert (seeking_return || arm_ccfsm_state == 2);
15725 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
15727 this_insn = next_nonnote_insn (this_insn);
15728 gcc_assert (!this_insn
15729 || (GET_CODE (this_insn) != BARRIER
15730 && GET_CODE (this_insn) != CODE_LABEL));
15734 /* Oh, dear! we ran off the end.. give up. */
15735 extract_constrain_insn_cached (insn);
15736 arm_ccfsm_state = 0;
15737 arm_target_insn = NULL;
15740 arm_target_insn = this_insn;
15743 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
15746 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
15748 if (reverse || then_not_else)
15749 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
15752 /* Restore recog_data (getting the attributes of other insns can
15753 destroy this array, but final.c assumes that it remains intact
15754 across this call. */
15755 extract_constrain_insn_cached (insn);
15759 /* Output IT instructions. */
15761 thumb2_asm_output_opcode (FILE * stream)
15766 if (arm_condexec_mask)
15768 for (n = 0; n < arm_condexec_masklen; n++)
15769 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
15771 asm_fprintf(stream, "i%s\t%s\n\t", buff,
15772 arm_condition_codes[arm_current_cc]);
15773 arm_condexec_mask = 0;
15777 /* Returns true if REGNO is a valid register
15778 for holding a quantity of type MODE. */
15780 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
15782 if (GET_MODE_CLASS (mode) == MODE_CC)
15783 return (regno == CC_REGNUM
15784 || (TARGET_HARD_FLOAT && TARGET_VFP
15785 && regno == VFPCC_REGNUM));
15788 /* For the Thumb we only allow values bigger than SImode in
15789 registers 0 - 6, so that there is always a second low
15790 register available to hold the upper part of the value.
15791 We probably we ought to ensure that the register is the
15792 start of an even numbered register pair. */
15793 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
15795 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
15796 && IS_CIRRUS_REGNUM (regno))
15797 /* We have outlawed SI values in Cirrus registers because they
15798 reside in the lower 32 bits, but SF values reside in the
15799 upper 32 bits. This causes gcc all sorts of grief. We can't
15800 even split the registers into pairs because Cirrus SI values
15801 get sign extended to 64bits-- aldyh. */
15802 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
15804 if (TARGET_HARD_FLOAT && TARGET_VFP
15805 && IS_VFP_REGNUM (regno))
15807 if (mode == SFmode || mode == SImode)
15808 return VFP_REGNO_OK_FOR_SINGLE (regno);
15810 if (mode == DFmode)
15811 return VFP_REGNO_OK_FOR_DOUBLE (regno);
15813 /* VFP registers can hold HFmode values, but there is no point in
15814 putting them there unless we have the NEON extensions for
15815 loading/storing them, too. */
15816 if (mode == HFmode)
15817 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
15820 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
15821 || (VALID_NEON_QREG_MODE (mode)
15822 && NEON_REGNO_OK_FOR_QUAD (regno))
15823 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
15824 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
15825 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
15826 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
15827 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
15832 if (TARGET_REALLY_IWMMXT)
15834 if (IS_IWMMXT_GR_REGNUM (regno))
15835 return mode == SImode;
15837 if (IS_IWMMXT_REGNUM (regno))
15838 return VALID_IWMMXT_REG_MODE (mode);
15841 /* We allow almost any value to be stored in the general registers.
15842 Restrict doubleword quantities to even register pairs so that we can
15843 use ldrd. Do not allow very large Neon structure opaque modes in
15844 general registers; they would use too many. */
15845 if (regno <= LAST_ARM_REGNUM)
15846 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
15847 && ARM_NUM_REGS (mode) <= 4;
15849 if (regno == FRAME_POINTER_REGNUM
15850 || regno == ARG_POINTER_REGNUM)
15851 /* We only allow integers in the fake hard registers. */
15852 return GET_MODE_CLASS (mode) == MODE_INT;
15854 /* The only registers left are the FPA registers
15855 which we only allow to hold FP values. */
15856 return (TARGET_HARD_FLOAT && TARGET_FPA
15857 && GET_MODE_CLASS (mode) == MODE_FLOAT
15858 && regno >= FIRST_FPA_REGNUM
15859 && regno <= LAST_FPA_REGNUM);
15862 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
15863 not used in arm mode. */
15866 arm_regno_class (int regno)
15870 if (regno == STACK_POINTER_REGNUM)
15872 if (regno == CC_REGNUM)
15879 if (TARGET_THUMB2 && regno < 8)
15882 if ( regno <= LAST_ARM_REGNUM
15883 || regno == FRAME_POINTER_REGNUM
15884 || regno == ARG_POINTER_REGNUM)
15885 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
15887 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
15888 return TARGET_THUMB2 ? CC_REG : NO_REGS;
15890 if (IS_CIRRUS_REGNUM (regno))
15891 return CIRRUS_REGS;
15893 if (IS_VFP_REGNUM (regno))
15895 if (regno <= D7_VFP_REGNUM)
15896 return VFP_D0_D7_REGS;
15897 else if (regno <= LAST_LO_VFP_REGNUM)
15898 return VFP_LO_REGS;
15900 return VFP_HI_REGS;
15903 if (IS_IWMMXT_REGNUM (regno))
15904 return IWMMXT_REGS;
15906 if (IS_IWMMXT_GR_REGNUM (regno))
15907 return IWMMXT_GR_REGS;
15912 /* Handle a special case when computing the offset
15913 of an argument from the frame pointer. */
15915 arm_debugger_arg_offset (int value, rtx addr)
15919 /* We are only interested if dbxout_parms() failed to compute the offset. */
15923 /* We can only cope with the case where the address is held in a register. */
15924 if (GET_CODE (addr) != REG)
15927 /* If we are using the frame pointer to point at the argument, then
15928 an offset of 0 is correct. */
15929 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
15932 /* If we are using the stack pointer to point at the
15933 argument, then an offset of 0 is correct. */
15934 /* ??? Check this is consistent with thumb2 frame layout. */
15935 if ((TARGET_THUMB || !frame_pointer_needed)
15936 && REGNO (addr) == SP_REGNUM)
15939 /* Oh dear. The argument is pointed to by a register rather
15940 than being held in a register, or being stored at a known
15941 offset from the frame pointer. Since GDB only understands
15942 those two kinds of argument we must translate the address
15943 held in the register into an offset from the frame pointer.
15944 We do this by searching through the insns for the function
15945 looking to see where this register gets its value. If the
15946 register is initialized from the frame pointer plus an offset
15947 then we are in luck and we can continue, otherwise we give up.
15949 This code is exercised by producing debugging information
15950 for a function with arguments like this:
15952 double func (double a, double b, int c, double d) {return d;}
15954 Without this code the stab for parameter 'd' will be set to
15955 an offset of 0 from the frame pointer, rather than 8. */
15957 /* The if() statement says:
15959 If the insn is a normal instruction
15960 and if the insn is setting the value in a register
15961 and if the register being set is the register holding the address of the argument
15962 and if the address is computing by an addition
15963 that involves adding to a register
15964 which is the frame pointer
15969 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15971 if ( GET_CODE (insn) == INSN
15972 && GET_CODE (PATTERN (insn)) == SET
15973 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
15974 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
15975 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
15976 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
15977 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
15980 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
15989 warning (0, "unable to compute real location of stacked parameter");
15990 value = 8; /* XXX magic hack */
15996 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
15999 if ((MASK) & insn_flags) \
16000 add_builtin_function ((NAME), (TYPE), (CODE), \
16001 BUILT_IN_MD, NULL, NULL_TREE); \
16005 struct builtin_description
16007 const unsigned int mask;
16008 const enum insn_code icode;
16009 const char * const name;
16010 const enum arm_builtins code;
16011 const enum rtx_code comparison;
16012 const unsigned int flag;
16015 static const struct builtin_description bdesc_2arg[] =
16017 #define IWMMXT_BUILTIN(code, string, builtin) \
16018 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16019 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16021 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16022 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16023 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16024 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16025 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16026 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16027 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16028 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16029 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16030 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16031 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16032 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16033 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16034 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16035 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16036 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16037 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16038 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16039 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16040 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16041 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16042 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16043 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16044 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16045 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16046 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16047 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16048 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16049 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16050 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16051 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16052 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16053 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16054 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16055 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16056 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16057 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16058 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16059 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16060 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16061 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16062 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16063 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16064 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16065 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16066 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16067 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16068 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16069 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16070 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16071 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16072 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16073 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16074 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16075 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16076 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16077 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16078 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16080 #define IWMMXT_BUILTIN2(code, builtin) \
16081 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16083 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16084 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16085 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16086 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16087 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16088 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16089 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16090 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16091 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16092 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16093 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16094 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16095 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16096 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16097 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16098 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16099 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16100 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16101 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16102 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16103 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16104 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16105 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16106 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16107 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16108 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16109 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16110 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16111 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16112 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16113 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16114 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16117 static const struct builtin_description bdesc_1arg[] =
16119 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16120 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16121 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16122 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16123 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16124 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16125 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16126 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16127 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16128 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16129 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16130 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16131 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16132 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16133 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16134 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16135 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16136 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16139 /* Set up all the iWMMXt builtins. This is
16140 not called if TARGET_IWMMXT is zero. */
16143 arm_init_iwmmxt_builtins (void)
16145 const struct builtin_description * d;
16147 tree endlink = void_list_node;
16149 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16150 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16151 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16154 = build_function_type (integer_type_node,
16155 tree_cons (NULL_TREE, integer_type_node, endlink));
16156 tree v8qi_ftype_v8qi_v8qi_int
16157 = build_function_type (V8QI_type_node,
16158 tree_cons (NULL_TREE, V8QI_type_node,
16159 tree_cons (NULL_TREE, V8QI_type_node,
16160 tree_cons (NULL_TREE,
16163 tree v4hi_ftype_v4hi_int
16164 = build_function_type (V4HI_type_node,
16165 tree_cons (NULL_TREE, V4HI_type_node,
16166 tree_cons (NULL_TREE, integer_type_node,
16168 tree v2si_ftype_v2si_int
16169 = build_function_type (V2SI_type_node,
16170 tree_cons (NULL_TREE, V2SI_type_node,
16171 tree_cons (NULL_TREE, integer_type_node,
16173 tree v2si_ftype_di_di
16174 = build_function_type (V2SI_type_node,
16175 tree_cons (NULL_TREE, long_long_integer_type_node,
16176 tree_cons (NULL_TREE, long_long_integer_type_node,
16178 tree di_ftype_di_int
16179 = build_function_type (long_long_integer_type_node,
16180 tree_cons (NULL_TREE, long_long_integer_type_node,
16181 tree_cons (NULL_TREE, integer_type_node,
16183 tree di_ftype_di_int_int
16184 = build_function_type (long_long_integer_type_node,
16185 tree_cons (NULL_TREE, long_long_integer_type_node,
16186 tree_cons (NULL_TREE, integer_type_node,
16187 tree_cons (NULL_TREE,
16190 tree int_ftype_v8qi
16191 = build_function_type (integer_type_node,
16192 tree_cons (NULL_TREE, V8QI_type_node,
16194 tree int_ftype_v4hi
16195 = build_function_type (integer_type_node,
16196 tree_cons (NULL_TREE, V4HI_type_node,
16198 tree int_ftype_v2si
16199 = build_function_type (integer_type_node,
16200 tree_cons (NULL_TREE, V2SI_type_node,
16202 tree int_ftype_v8qi_int
16203 = build_function_type (integer_type_node,
16204 tree_cons (NULL_TREE, V8QI_type_node,
16205 tree_cons (NULL_TREE, integer_type_node,
16207 tree int_ftype_v4hi_int
16208 = build_function_type (integer_type_node,
16209 tree_cons (NULL_TREE, V4HI_type_node,
16210 tree_cons (NULL_TREE, integer_type_node,
16212 tree int_ftype_v2si_int
16213 = build_function_type (integer_type_node,
16214 tree_cons (NULL_TREE, V2SI_type_node,
16215 tree_cons (NULL_TREE, integer_type_node,
16217 tree v8qi_ftype_v8qi_int_int
16218 = build_function_type (V8QI_type_node,
16219 tree_cons (NULL_TREE, V8QI_type_node,
16220 tree_cons (NULL_TREE, integer_type_node,
16221 tree_cons (NULL_TREE,
16224 tree v4hi_ftype_v4hi_int_int
16225 = build_function_type (V4HI_type_node,
16226 tree_cons (NULL_TREE, V4HI_type_node,
16227 tree_cons (NULL_TREE, integer_type_node,
16228 tree_cons (NULL_TREE,
16231 tree v2si_ftype_v2si_int_int
16232 = build_function_type (V2SI_type_node,
16233 tree_cons (NULL_TREE, V2SI_type_node,
16234 tree_cons (NULL_TREE, integer_type_node,
16235 tree_cons (NULL_TREE,
16238 /* Miscellaneous. */
16239 tree v8qi_ftype_v4hi_v4hi
16240 = build_function_type (V8QI_type_node,
16241 tree_cons (NULL_TREE, V4HI_type_node,
16242 tree_cons (NULL_TREE, V4HI_type_node,
16244 tree v4hi_ftype_v2si_v2si
16245 = build_function_type (V4HI_type_node,
16246 tree_cons (NULL_TREE, V2SI_type_node,
16247 tree_cons (NULL_TREE, V2SI_type_node,
16249 tree v2si_ftype_v4hi_v4hi
16250 = build_function_type (V2SI_type_node,
16251 tree_cons (NULL_TREE, V4HI_type_node,
16252 tree_cons (NULL_TREE, V4HI_type_node,
16254 tree v2si_ftype_v8qi_v8qi
16255 = build_function_type (V2SI_type_node,
16256 tree_cons (NULL_TREE, V8QI_type_node,
16257 tree_cons (NULL_TREE, V8QI_type_node,
16259 tree v4hi_ftype_v4hi_di
16260 = build_function_type (V4HI_type_node,
16261 tree_cons (NULL_TREE, V4HI_type_node,
16262 tree_cons (NULL_TREE,
16263 long_long_integer_type_node,
16265 tree v2si_ftype_v2si_di
16266 = build_function_type (V2SI_type_node,
16267 tree_cons (NULL_TREE, V2SI_type_node,
16268 tree_cons (NULL_TREE,
16269 long_long_integer_type_node,
16271 tree void_ftype_int_int
16272 = build_function_type (void_type_node,
16273 tree_cons (NULL_TREE, integer_type_node,
16274 tree_cons (NULL_TREE, integer_type_node,
16277 = build_function_type (long_long_unsigned_type_node, endlink);
16279 = build_function_type (long_long_integer_type_node,
16280 tree_cons (NULL_TREE, V8QI_type_node,
16283 = build_function_type (long_long_integer_type_node,
16284 tree_cons (NULL_TREE, V4HI_type_node,
16287 = build_function_type (long_long_integer_type_node,
16288 tree_cons (NULL_TREE, V2SI_type_node,
16290 tree v2si_ftype_v4hi
16291 = build_function_type (V2SI_type_node,
16292 tree_cons (NULL_TREE, V4HI_type_node,
16294 tree v4hi_ftype_v8qi
16295 = build_function_type (V4HI_type_node,
16296 tree_cons (NULL_TREE, V8QI_type_node,
16299 tree di_ftype_di_v4hi_v4hi
16300 = build_function_type (long_long_unsigned_type_node,
16301 tree_cons (NULL_TREE,
16302 long_long_unsigned_type_node,
16303 tree_cons (NULL_TREE, V4HI_type_node,
16304 tree_cons (NULL_TREE,
16308 tree di_ftype_v4hi_v4hi
16309 = build_function_type (long_long_unsigned_type_node,
16310 tree_cons (NULL_TREE, V4HI_type_node,
16311 tree_cons (NULL_TREE, V4HI_type_node,
16314 /* Normal vector binops. */
16315 tree v8qi_ftype_v8qi_v8qi
16316 = build_function_type (V8QI_type_node,
16317 tree_cons (NULL_TREE, V8QI_type_node,
16318 tree_cons (NULL_TREE, V8QI_type_node,
16320 tree v4hi_ftype_v4hi_v4hi
16321 = build_function_type (V4HI_type_node,
16322 tree_cons (NULL_TREE, V4HI_type_node,
16323 tree_cons (NULL_TREE, V4HI_type_node,
16325 tree v2si_ftype_v2si_v2si
16326 = build_function_type (V2SI_type_node,
16327 tree_cons (NULL_TREE, V2SI_type_node,
16328 tree_cons (NULL_TREE, V2SI_type_node,
16330 tree di_ftype_di_di
16331 = build_function_type (long_long_unsigned_type_node,
16332 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16333 tree_cons (NULL_TREE,
16334 long_long_unsigned_type_node,
16337 /* Add all builtins that are more or less simple operations on two
16339 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16341 /* Use one of the operands; the target can have a different mode for
16342 mask-generating compares. */
16343 enum machine_mode mode;
16349 mode = insn_data[d->icode].operand[1].mode;
16354 type = v8qi_ftype_v8qi_v8qi;
16357 type = v4hi_ftype_v4hi_v4hi;
16360 type = v2si_ftype_v2si_v2si;
16363 type = di_ftype_di_di;
16367 gcc_unreachable ();
16370 def_mbuiltin (d->mask, d->name, type, d->code);
16373 /* Add the remaining MMX insns with somewhat more complicated types. */
16374 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16375 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16376 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16378 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16379 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16380 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16381 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16382 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16383 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16385 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16386 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16387 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16388 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16389 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16390 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16392 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16393 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16394 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16395 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16396 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16397 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16400 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16401 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16466 arm_init_tls_builtins (void)
16470 ftype = build_function_type (ptr_type_node, void_list_node);
16471 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16472 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16474 TREE_NOTHROW (decl) = 1;
16475 TREE_READONLY (decl) = 1;
16478 enum neon_builtin_type_bits {
16494 #define v8qi_UP T_V8QI
16495 #define v4hi_UP T_V4HI
16496 #define v2si_UP T_V2SI
16497 #define v2sf_UP T_V2SF
16499 #define v16qi_UP T_V16QI
16500 #define v8hi_UP T_V8HI
16501 #define v4si_UP T_V4SI
16502 #define v4sf_UP T_V4SF
16503 #define v2di_UP T_V2DI
16508 #define UP(X) X##_UP
16543 NEON_LOADSTRUCTLANE,
16545 NEON_STORESTRUCTLANE,
16554 const neon_itype itype;
16556 const enum insn_code codes[T_MAX];
16557 const unsigned int num_vars;
16558 unsigned int base_fcode;
16559 } neon_builtin_datum;
16561 #define CF(N,X) CODE_FOR_neon_##N##X
16563 #define VAR1(T, N, A) \
16564 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16565 #define VAR2(T, N, A, B) \
16566 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16567 #define VAR3(T, N, A, B, C) \
16568 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16569 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16570 #define VAR4(T, N, A, B, C, D) \
16571 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16572 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16573 #define VAR5(T, N, A, B, C, D, E) \
16574 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16575 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16576 #define VAR6(T, N, A, B, C, D, E, F) \
16577 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16578 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16579 #define VAR7(T, N, A, B, C, D, E, F, G) \
16580 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16581 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16583 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16584 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16586 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16587 CF (N, G), CF (N, H) }, 8, 0
16588 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
16589 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16590 | UP (H) | UP (I), \
16591 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16592 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
16593 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
16594 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16595 | UP (H) | UP (I) | UP (J), \
16596 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16597 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
16599 /* The mode entries in the following table correspond to the "key" type of the
16600 instruction variant, i.e. equivalent to that which would be specified after
16601 the assembler mnemonic, which usually refers to the last vector operand.
16602 (Signed/unsigned/polynomial types are not differentiated between though, and
16603 are all mapped onto the same mode for a given element size.) The modes
16604 listed per instruction should be the same as those defined for that
16605 instruction's pattern in neon.md.
16606 WARNING: Variants should be listed in the same increasing order as
16607 neon_builtin_type_bits. */
16609 static neon_builtin_datum neon_builtin_data[] =
16611 { VAR10 (BINOP, vadd,
16612 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16613 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
16614 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
16615 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16616 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16617 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
16618 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16619 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16620 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
16621 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16622 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
16623 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
16624 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
16625 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
16626 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
16627 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
16628 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
16629 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
16630 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
16631 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
16632 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
16633 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
16634 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16635 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16636 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16637 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
16638 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
16639 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
16640 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16641 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16642 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16643 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
16644 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16645 { VAR10 (BINOP, vsub,
16646 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16647 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
16648 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
16649 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16650 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16651 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
16652 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16653 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16654 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16655 { VAR2 (BINOP, vcage, v2sf, v4sf) },
16656 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
16657 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16658 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16659 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
16660 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16661 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
16662 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16663 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16664 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
16665 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16666 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16667 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
16668 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
16669 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
16670 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
16671 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16672 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16673 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16674 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16675 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16676 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16677 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16678 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16679 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
16680 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
16681 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
16682 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16683 /* FIXME: vget_lane supports more variants than this! */
16684 { VAR10 (GETLANE, vget_lane,
16685 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16686 { VAR10 (SETLANE, vset_lane,
16687 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16688 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
16689 { VAR10 (DUP, vdup_n,
16690 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16691 { VAR10 (DUPLANE, vdup_lane,
16692 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16693 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
16694 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
16695 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
16696 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
16697 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
16698 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
16699 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
16700 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16701 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16702 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
16703 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
16704 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16705 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
16706 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
16707 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16708 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16709 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
16710 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
16711 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16712 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
16713 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
16714 { VAR10 (BINOP, vext,
16715 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16716 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16717 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
16718 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
16719 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
16720 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
16721 { VAR10 (SELECT, vbsl,
16722 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16723 { VAR1 (VTBL, vtbl1, v8qi) },
16724 { VAR1 (VTBL, vtbl2, v8qi) },
16725 { VAR1 (VTBL, vtbl3, v8qi) },
16726 { VAR1 (VTBL, vtbl4, v8qi) },
16727 { VAR1 (VTBX, vtbx1, v8qi) },
16728 { VAR1 (VTBX, vtbx2, v8qi) },
16729 { VAR1 (VTBX, vtbx3, v8qi) },
16730 { VAR1 (VTBX, vtbx4, v8qi) },
16731 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16732 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16733 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16734 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
16735 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
16736 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
16737 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
16738 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
16739 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
16740 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
16741 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
16742 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
16743 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
16744 { VAR10 (LOAD1, vld1,
16745 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16746 { VAR10 (LOAD1LANE, vld1_lane,
16747 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16748 { VAR10 (LOAD1, vld1_dup,
16749 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16750 { VAR10 (STORE1, vst1,
16751 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16752 { VAR10 (STORE1LANE, vst1_lane,
16753 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16754 { VAR9 (LOADSTRUCT,
16755 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16756 { VAR7 (LOADSTRUCTLANE, vld2_lane,
16757 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16758 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
16759 { VAR9 (STORESTRUCT, vst2,
16760 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16761 { VAR7 (STORESTRUCTLANE, vst2_lane,
16762 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16763 { VAR9 (LOADSTRUCT,
16764 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16765 { VAR7 (LOADSTRUCTLANE, vld3_lane,
16766 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16767 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
16768 { VAR9 (STORESTRUCT, vst3,
16769 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16770 { VAR7 (STORESTRUCTLANE, vst3_lane,
16771 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16772 { VAR9 (LOADSTRUCT, vld4,
16773 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16774 { VAR7 (LOADSTRUCTLANE, vld4_lane,
16775 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16776 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
16777 { VAR9 (STORESTRUCT, vst4,
16778 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16779 { VAR7 (STORESTRUCTLANE, vst4_lane,
16780 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16781 { VAR10 (LOGICBINOP, vand,
16782 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16783 { VAR10 (LOGICBINOP, vorr,
16784 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16785 { VAR10 (BINOP, veor,
16786 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16787 { VAR10 (LOGICBINOP, vbic,
16788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16789 { VAR10 (LOGICBINOP, vorn,
16790 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
16806 arm_init_neon_builtins (void)
16808 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
16810 tree neon_intQI_type_node;
16811 tree neon_intHI_type_node;
16812 tree neon_polyQI_type_node;
16813 tree neon_polyHI_type_node;
16814 tree neon_intSI_type_node;
16815 tree neon_intDI_type_node;
16816 tree neon_float_type_node;
16818 tree intQI_pointer_node;
16819 tree intHI_pointer_node;
16820 tree intSI_pointer_node;
16821 tree intDI_pointer_node;
16822 tree float_pointer_node;
16824 tree const_intQI_node;
16825 tree const_intHI_node;
16826 tree const_intSI_node;
16827 tree const_intDI_node;
16828 tree const_float_node;
16830 tree const_intQI_pointer_node;
16831 tree const_intHI_pointer_node;
16832 tree const_intSI_pointer_node;
16833 tree const_intDI_pointer_node;
16834 tree const_float_pointer_node;
16836 tree V8QI_type_node;
16837 tree V4HI_type_node;
16838 tree V2SI_type_node;
16839 tree V2SF_type_node;
16840 tree V16QI_type_node;
16841 tree V8HI_type_node;
16842 tree V4SI_type_node;
16843 tree V4SF_type_node;
16844 tree V2DI_type_node;
16846 tree intUQI_type_node;
16847 tree intUHI_type_node;
16848 tree intUSI_type_node;
16849 tree intUDI_type_node;
16851 tree intEI_type_node;
16852 tree intOI_type_node;
16853 tree intCI_type_node;
16854 tree intXI_type_node;
16856 tree V8QI_pointer_node;
16857 tree V4HI_pointer_node;
16858 tree V2SI_pointer_node;
16859 tree V2SF_pointer_node;
16860 tree V16QI_pointer_node;
16861 tree V8HI_pointer_node;
16862 tree V4SI_pointer_node;
16863 tree V4SF_pointer_node;
16864 tree V2DI_pointer_node;
16866 tree void_ftype_pv8qi_v8qi_v8qi;
16867 tree void_ftype_pv4hi_v4hi_v4hi;
16868 tree void_ftype_pv2si_v2si_v2si;
16869 tree void_ftype_pv2sf_v2sf_v2sf;
16870 tree void_ftype_pdi_di_di;
16871 tree void_ftype_pv16qi_v16qi_v16qi;
16872 tree void_ftype_pv8hi_v8hi_v8hi;
16873 tree void_ftype_pv4si_v4si_v4si;
16874 tree void_ftype_pv4sf_v4sf_v4sf;
16875 tree void_ftype_pv2di_v2di_v2di;
16877 tree reinterp_ftype_dreg[5][5];
16878 tree reinterp_ftype_qreg[5][5];
16879 tree dreg_types[5], qreg_types[5];
16881 /* Create distinguished type nodes for NEON vector element types,
16882 and pointers to values of such types, so we can detect them later. */
16883 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
16884 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
16885 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
16886 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
16887 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
16888 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
16889 neon_float_type_node = make_node (REAL_TYPE);
16890 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
16891 layout_type (neon_float_type_node);
16893 /* Define typedefs which exactly correspond to the modes we are basing vector
16894 types on. If you change these names you'll need to change
16895 the table used by arm_mangle_type too. */
16896 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
16897 "__builtin_neon_qi");
16898 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
16899 "__builtin_neon_hi");
16900 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
16901 "__builtin_neon_si");
16902 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
16903 "__builtin_neon_sf");
16904 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
16905 "__builtin_neon_di");
16906 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
16907 "__builtin_neon_poly8");
16908 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
16909 "__builtin_neon_poly16");
16911 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
16912 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
16913 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
16914 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
16915 float_pointer_node = build_pointer_type (neon_float_type_node);
16917 /* Next create constant-qualified versions of the above types. */
16918 const_intQI_node = build_qualified_type (neon_intQI_type_node,
16920 const_intHI_node = build_qualified_type (neon_intHI_type_node,
16922 const_intSI_node = build_qualified_type (neon_intSI_type_node,
16924 const_intDI_node = build_qualified_type (neon_intDI_type_node,
16926 const_float_node = build_qualified_type (neon_float_type_node,
16929 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
16930 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
16931 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
16932 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
16933 const_float_pointer_node = build_pointer_type (const_float_node);
16935 /* Now create vector types based on our NEON element types. */
16936 /* 64-bit vectors. */
16938 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
16940 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
16942 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
16944 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
16945 /* 128-bit vectors. */
16947 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
16949 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
16951 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
16953 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
16955 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
16957 /* Unsigned integer types for various mode sizes. */
16958 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
16959 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
16960 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
16961 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
16963 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
16964 "__builtin_neon_uqi");
16965 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
16966 "__builtin_neon_uhi");
16967 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
16968 "__builtin_neon_usi");
16969 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
16970 "__builtin_neon_udi");
16972 /* Opaque integer types for structures of vectors. */
16973 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
16974 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
16975 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
16976 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
16978 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
16979 "__builtin_neon_ti");
16980 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
16981 "__builtin_neon_ei");
16982 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
16983 "__builtin_neon_oi");
16984 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
16985 "__builtin_neon_ci");
16986 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
16987 "__builtin_neon_xi");
16989 /* Pointers to vector types. */
16990 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
16991 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
16992 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
16993 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
16994 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
16995 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
16996 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
16997 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
16998 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17000 /* Operations which return results as pairs. */
17001 void_ftype_pv8qi_v8qi_v8qi =
17002 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17003 V8QI_type_node, NULL);
17004 void_ftype_pv4hi_v4hi_v4hi =
17005 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17006 V4HI_type_node, NULL);
17007 void_ftype_pv2si_v2si_v2si =
17008 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17009 V2SI_type_node, NULL);
17010 void_ftype_pv2sf_v2sf_v2sf =
17011 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17012 V2SF_type_node, NULL);
17013 void_ftype_pdi_di_di =
17014 build_function_type_list (void_type_node, intDI_pointer_node,
17015 neon_intDI_type_node, neon_intDI_type_node, NULL);
17016 void_ftype_pv16qi_v16qi_v16qi =
17017 build_function_type_list (void_type_node, V16QI_pointer_node,
17018 V16QI_type_node, V16QI_type_node, NULL);
17019 void_ftype_pv8hi_v8hi_v8hi =
17020 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17021 V8HI_type_node, NULL);
17022 void_ftype_pv4si_v4si_v4si =
17023 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17024 V4SI_type_node, NULL);
17025 void_ftype_pv4sf_v4sf_v4sf =
17026 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17027 V4SF_type_node, NULL);
17028 void_ftype_pv2di_v2di_v2di =
17029 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17030 V2DI_type_node, NULL);
17032 dreg_types[0] = V8QI_type_node;
17033 dreg_types[1] = V4HI_type_node;
17034 dreg_types[2] = V2SI_type_node;
17035 dreg_types[3] = V2SF_type_node;
17036 dreg_types[4] = neon_intDI_type_node;
17038 qreg_types[0] = V16QI_type_node;
17039 qreg_types[1] = V8HI_type_node;
17040 qreg_types[2] = V4SI_type_node;
17041 qreg_types[3] = V4SF_type_node;
17042 qreg_types[4] = V2DI_type_node;
17044 for (i = 0; i < 5; i++)
17047 for (j = 0; j < 5; j++)
17049 reinterp_ftype_dreg[i][j]
17050 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17051 reinterp_ftype_qreg[i][j]
17052 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17056 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17058 neon_builtin_datum *d = &neon_builtin_data[i];
17059 unsigned int j, codeidx = 0;
17061 d->base_fcode = fcode;
17063 for (j = 0; j < T_MAX; j++)
17065 const char* const modenames[] = {
17066 "v8qi", "v4hi", "v2si", "v2sf", "di",
17067 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17071 enum insn_code icode;
17072 int is_load = 0, is_store = 0;
17074 if ((d->bits & (1 << j)) == 0)
17077 icode = d->codes[codeidx++];
17082 case NEON_LOAD1LANE:
17083 case NEON_LOADSTRUCT:
17084 case NEON_LOADSTRUCTLANE:
17086 /* Fall through. */
17088 case NEON_STORE1LANE:
17089 case NEON_STORESTRUCT:
17090 case NEON_STORESTRUCTLANE:
17093 /* Fall through. */
17096 case NEON_LOGICBINOP:
17097 case NEON_SHIFTINSERT:
17104 case NEON_SHIFTIMM:
17105 case NEON_SHIFTACC:
17111 case NEON_LANEMULL:
17112 case NEON_LANEMULH:
17114 case NEON_SCALARMUL:
17115 case NEON_SCALARMULL:
17116 case NEON_SCALARMULH:
17117 case NEON_SCALARMAC:
17123 tree return_type = void_type_node, args = void_list_node;
17125 /* Build a function type directly from the insn_data for this
17126 builtin. The build_function_type() function takes care of
17127 removing duplicates for us. */
17128 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17132 if (is_load && k == 1)
17134 /* Neon load patterns always have the memory operand
17135 (a SImode pointer) in the operand 1 position. We
17136 want a const pointer to the element type in that
17138 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17144 eltype = const_intQI_pointer_node;
17149 eltype = const_intHI_pointer_node;
17154 eltype = const_intSI_pointer_node;
17159 eltype = const_float_pointer_node;
17164 eltype = const_intDI_pointer_node;
17167 default: gcc_unreachable ();
17170 else if (is_store && k == 0)
17172 /* Similarly, Neon store patterns use operand 0 as
17173 the memory location to store to (a SImode pointer).
17174 Use a pointer to the element type of the store in
17176 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17182 eltype = intQI_pointer_node;
17187 eltype = intHI_pointer_node;
17192 eltype = intSI_pointer_node;
17197 eltype = float_pointer_node;
17202 eltype = intDI_pointer_node;
17205 default: gcc_unreachable ();
17210 switch (insn_data[icode].operand[k].mode)
17212 case VOIDmode: eltype = void_type_node; break;
17214 case QImode: eltype = neon_intQI_type_node; break;
17215 case HImode: eltype = neon_intHI_type_node; break;
17216 case SImode: eltype = neon_intSI_type_node; break;
17217 case SFmode: eltype = neon_float_type_node; break;
17218 case DImode: eltype = neon_intDI_type_node; break;
17219 case TImode: eltype = intTI_type_node; break;
17220 case EImode: eltype = intEI_type_node; break;
17221 case OImode: eltype = intOI_type_node; break;
17222 case CImode: eltype = intCI_type_node; break;
17223 case XImode: eltype = intXI_type_node; break;
17224 /* 64-bit vectors. */
17225 case V8QImode: eltype = V8QI_type_node; break;
17226 case V4HImode: eltype = V4HI_type_node; break;
17227 case V2SImode: eltype = V2SI_type_node; break;
17228 case V2SFmode: eltype = V2SF_type_node; break;
17229 /* 128-bit vectors. */
17230 case V16QImode: eltype = V16QI_type_node; break;
17231 case V8HImode: eltype = V8HI_type_node; break;
17232 case V4SImode: eltype = V4SI_type_node; break;
17233 case V4SFmode: eltype = V4SF_type_node; break;
17234 case V2DImode: eltype = V2DI_type_node; break;
17235 default: gcc_unreachable ();
17239 if (k == 0 && !is_store)
17240 return_type = eltype;
17242 args = tree_cons (NULL_TREE, eltype, args);
17245 ftype = build_function_type (return_type, args);
17249 case NEON_RESULTPAIR:
17251 switch (insn_data[icode].operand[1].mode)
17253 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17254 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17255 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17256 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17257 case DImode: ftype = void_ftype_pdi_di_di; break;
17258 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17259 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17260 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17261 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17262 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17263 default: gcc_unreachable ();
17268 case NEON_REINTERP:
17270 /* We iterate over 5 doubleword types, then 5 quadword
17273 switch (insn_data[icode].operand[0].mode)
17275 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17276 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17277 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17278 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17279 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17280 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17281 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17282 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17283 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17284 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17285 default: gcc_unreachable ();
17291 gcc_unreachable ();
17294 gcc_assert (ftype != NULL);
17296 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17298 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17305 arm_init_fp16_builtins (void)
17307 tree fp16_type = make_node (REAL_TYPE);
17308 TYPE_PRECISION (fp16_type) = 16;
17309 layout_type (fp16_type);
17310 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17314 arm_init_builtins (void)
17316 arm_init_tls_builtins ();
17318 if (TARGET_REALLY_IWMMXT)
17319 arm_init_iwmmxt_builtins ();
17322 arm_init_neon_builtins ();
17324 if (arm_fp16_format)
17325 arm_init_fp16_builtins ();
17328 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17330 static const char *
17331 arm_invalid_parameter_type (const_tree t)
17333 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17334 return N_("function parameters cannot have __fp16 type");
17338 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17340 static const char *
17341 arm_invalid_return_type (const_tree t)
17343 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17344 return N_("functions cannot return __fp16 type");
17348 /* Implement TARGET_PROMOTED_TYPE. */
17351 arm_promoted_type (const_tree t)
17353 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17354 return float_type_node;
17358 /* Implement TARGET_CONVERT_TO_TYPE.
17359 Specifically, this hook implements the peculiarity of the ARM
17360 half-precision floating-point C semantics that requires conversions between
17361 __fp16 to or from double to do an intermediate conversion to float. */
17364 arm_convert_to_type (tree type, tree expr)
17366 tree fromtype = TREE_TYPE (expr);
17367 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17369 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17370 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17371 return convert (type, convert (float_type_node, expr));
17375 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17376 This simply adds HFmode as a supported mode; even though we don't
17377 implement arithmetic on this type directly, it's supported by
17378 optabs conversions, much the way the double-word arithmetic is
17379 special-cased in the default hook. */
17382 arm_scalar_mode_supported_p (enum machine_mode mode)
17384 if (mode == HFmode)
17385 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17387 return default_scalar_mode_supported_p (mode);
17390 /* Errors in the source file can cause expand_expr to return const0_rtx
17391 where we expect a vector. To avoid crashing, use one of the vector
17392 clear instructions. */
17395 safe_vector_operand (rtx x, enum machine_mode mode)
17397 if (x != const0_rtx)
17399 x = gen_reg_rtx (mode);
17401 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17402 : gen_rtx_SUBREG (DImode, x, 0)));
17406 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17409 arm_expand_binop_builtin (enum insn_code icode,
17410 tree exp, rtx target)
17413 tree arg0 = CALL_EXPR_ARG (exp, 0);
17414 tree arg1 = CALL_EXPR_ARG (exp, 1);
17415 rtx op0 = expand_normal (arg0);
17416 rtx op1 = expand_normal (arg1);
17417 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17418 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17419 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17421 if (VECTOR_MODE_P (mode0))
17422 op0 = safe_vector_operand (op0, mode0);
17423 if (VECTOR_MODE_P (mode1))
17424 op1 = safe_vector_operand (op1, mode1);
17427 || GET_MODE (target) != tmode
17428 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17429 target = gen_reg_rtx (tmode);
17431 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17433 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17434 op0 = copy_to_mode_reg (mode0, op0);
17435 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17436 op1 = copy_to_mode_reg (mode1, op1);
17438 pat = GEN_FCN (icode) (target, op0, op1);
17445 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17448 arm_expand_unop_builtin (enum insn_code icode,
17449 tree exp, rtx target, int do_load)
17452 tree arg0 = CALL_EXPR_ARG (exp, 0);
17453 rtx op0 = expand_normal (arg0);
17454 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17455 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17458 || GET_MODE (target) != tmode
17459 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17460 target = gen_reg_rtx (tmode);
17462 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17465 if (VECTOR_MODE_P (mode0))
17466 op0 = safe_vector_operand (op0, mode0);
17468 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17469 op0 = copy_to_mode_reg (mode0, op0);
17472 pat = GEN_FCN (icode) (target, op0);
17480 neon_builtin_compare (const void *a, const void *b)
17482 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17483 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17484 unsigned int soughtcode = key->base_fcode;
17486 if (soughtcode >= memb->base_fcode
17487 && soughtcode < memb->base_fcode + memb->num_vars)
17489 else if (soughtcode < memb->base_fcode)
17495 static enum insn_code
17496 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17498 neon_builtin_datum key, *found;
17501 key.base_fcode = fcode;
17502 found = (neon_builtin_datum *)
17503 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17504 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17505 gcc_assert (found);
17506 idx = fcode - (int) found->base_fcode;
17507 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17510 *itype = found->itype;
17512 return found->codes[idx];
17516 NEON_ARG_COPY_TO_REG,
17521 #define NEON_MAX_BUILTIN_ARGS 5
17523 /* Expand a Neon builtin. */
17525 arm_expand_neon_args (rtx target, int icode, int have_retval,
17530 tree arg[NEON_MAX_BUILTIN_ARGS];
17531 rtx op[NEON_MAX_BUILTIN_ARGS];
17532 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17533 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17538 || GET_MODE (target) != tmode
17539 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17540 target = gen_reg_rtx (tmode);
17542 va_start (ap, exp);
17546 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17548 if (thisarg == NEON_ARG_STOP)
17552 arg[argc] = CALL_EXPR_ARG (exp, argc);
17553 op[argc] = expand_normal (arg[argc]);
17554 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17558 case NEON_ARG_COPY_TO_REG:
17559 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17560 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17561 (op[argc], mode[argc]))
17562 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17565 case NEON_ARG_CONSTANT:
17566 /* FIXME: This error message is somewhat unhelpful. */
17567 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17568 (op[argc], mode[argc]))
17569 error ("argument must be a constant");
17572 case NEON_ARG_STOP:
17573 gcc_unreachable ();
17586 pat = GEN_FCN (icode) (target, op[0]);
17590 pat = GEN_FCN (icode) (target, op[0], op[1]);
17594 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
17598 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
17602 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
17606 gcc_unreachable ();
17612 pat = GEN_FCN (icode) (op[0]);
17616 pat = GEN_FCN (icode) (op[0], op[1]);
17620 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
17624 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
17628 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
17632 gcc_unreachable ();
17643 /* Expand a Neon builtin. These are "special" because they don't have symbolic
17644 constants defined per-instruction or per instruction-variant. Instead, the
17645 required info is looked up in the table neon_builtin_data. */
17647 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
17650 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
17657 return arm_expand_neon_args (target, icode, 1, exp,
17658 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17662 case NEON_SCALARMUL:
17663 case NEON_SCALARMULL:
17664 case NEON_SCALARMULH:
17665 case NEON_SHIFTINSERT:
17666 case NEON_LOGICBINOP:
17667 return arm_expand_neon_args (target, icode, 1, exp,
17668 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17672 return arm_expand_neon_args (target, icode, 1, exp,
17673 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17674 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17678 case NEON_SHIFTIMM:
17679 return arm_expand_neon_args (target, icode, 1, exp,
17680 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
17684 return arm_expand_neon_args (target, icode, 1, exp,
17685 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17689 case NEON_REINTERP:
17690 return arm_expand_neon_args (target, icode, 1, exp,
17691 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17695 return arm_expand_neon_args (target, icode, 1, exp,
17696 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17698 case NEON_RESULTPAIR:
17699 return arm_expand_neon_args (target, icode, 0, exp,
17700 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17704 case NEON_LANEMULL:
17705 case NEON_LANEMULH:
17706 return arm_expand_neon_args (target, icode, 1, exp,
17707 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17708 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17711 return arm_expand_neon_args (target, icode, 1, exp,
17712 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17713 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17715 case NEON_SHIFTACC:
17716 return arm_expand_neon_args (target, icode, 1, exp,
17717 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17718 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17720 case NEON_SCALARMAC:
17721 return arm_expand_neon_args (target, icode, 1, exp,
17722 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17723 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17727 return arm_expand_neon_args (target, icode, 1, exp,
17728 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17732 case NEON_LOADSTRUCT:
17733 return arm_expand_neon_args (target, icode, 1, exp,
17734 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17736 case NEON_LOAD1LANE:
17737 case NEON_LOADSTRUCTLANE:
17738 return arm_expand_neon_args (target, icode, 1, exp,
17739 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17743 case NEON_STORESTRUCT:
17744 return arm_expand_neon_args (target, icode, 0, exp,
17745 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17747 case NEON_STORE1LANE:
17748 case NEON_STORESTRUCTLANE:
17749 return arm_expand_neon_args (target, icode, 0, exp,
17750 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17754 gcc_unreachable ();
17757 /* Emit code to reinterpret one Neon type as another, without altering bits. */
17759 neon_reinterpret (rtx dest, rtx src)
17761 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
17764 /* Emit code to place a Neon pair result in memory locations (with equal
17767 neon_emit_pair_result_insn (enum machine_mode mode,
17768 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
17771 rtx mem = gen_rtx_MEM (mode, destaddr);
17772 rtx tmp1 = gen_reg_rtx (mode);
17773 rtx tmp2 = gen_reg_rtx (mode);
17775 emit_insn (intfn (tmp1, op1, tmp2, op2));
17777 emit_move_insn (mem, tmp1);
17778 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
17779 emit_move_insn (mem, tmp2);
17782 /* Set up operands for a register copy from src to dest, taking care not to
17783 clobber registers in the process.
17784 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
17785 be called with a large N, so that should be OK. */
17788 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
17790 unsigned int copied = 0, opctr = 0;
17791 unsigned int done = (1 << count) - 1;
17794 while (copied != done)
17796 for (i = 0; i < count; i++)
17800 for (j = 0; good && j < count; j++)
17801 if (i != j && (copied & (1 << j)) == 0
17802 && reg_overlap_mentioned_p (src[j], dest[i]))
17807 operands[opctr++] = dest[i];
17808 operands[opctr++] = src[i];
17814 gcc_assert (opctr == count * 2);
17817 /* Expand an expression EXP that calls a built-in function,
17818 with result going to TARGET if that's convenient
17819 (and in mode MODE if that's convenient).
17820 SUBTARGET may be used as the target for computing one of EXP's operands.
17821 IGNORE is nonzero if the value is to be ignored. */
17824 arm_expand_builtin (tree exp,
17826 rtx subtarget ATTRIBUTE_UNUSED,
17827 enum machine_mode mode ATTRIBUTE_UNUSED,
17828 int ignore ATTRIBUTE_UNUSED)
17830 const struct builtin_description * d;
17831 enum insn_code icode;
17832 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17840 int fcode = DECL_FUNCTION_CODE (fndecl);
17842 enum machine_mode tmode;
17843 enum machine_mode mode0;
17844 enum machine_mode mode1;
17845 enum machine_mode mode2;
17847 if (fcode >= ARM_BUILTIN_NEON_BASE)
17848 return arm_expand_neon_builtin (fcode, exp, target);
17852 case ARM_BUILTIN_TEXTRMSB:
17853 case ARM_BUILTIN_TEXTRMUB:
17854 case ARM_BUILTIN_TEXTRMSH:
17855 case ARM_BUILTIN_TEXTRMUH:
17856 case ARM_BUILTIN_TEXTRMSW:
17857 case ARM_BUILTIN_TEXTRMUW:
17858 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
17859 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
17860 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
17861 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
17862 : CODE_FOR_iwmmxt_textrmw);
17864 arg0 = CALL_EXPR_ARG (exp, 0);
17865 arg1 = CALL_EXPR_ARG (exp, 1);
17866 op0 = expand_normal (arg0);
17867 op1 = expand_normal (arg1);
17868 tmode = insn_data[icode].operand[0].mode;
17869 mode0 = insn_data[icode].operand[1].mode;
17870 mode1 = insn_data[icode].operand[2].mode;
17872 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17873 op0 = copy_to_mode_reg (mode0, op0);
17874 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17876 /* @@@ better error message */
17877 error ("selector must be an immediate");
17878 return gen_reg_rtx (tmode);
17881 || GET_MODE (target) != tmode
17882 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17883 target = gen_reg_rtx (tmode);
17884 pat = GEN_FCN (icode) (target, op0, op1);
17890 case ARM_BUILTIN_TINSRB:
17891 case ARM_BUILTIN_TINSRH:
17892 case ARM_BUILTIN_TINSRW:
17893 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
17894 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
17895 : CODE_FOR_iwmmxt_tinsrw);
17896 arg0 = CALL_EXPR_ARG (exp, 0);
17897 arg1 = CALL_EXPR_ARG (exp, 1);
17898 arg2 = CALL_EXPR_ARG (exp, 2);
17899 op0 = expand_normal (arg0);
17900 op1 = expand_normal (arg1);
17901 op2 = expand_normal (arg2);
17902 tmode = insn_data[icode].operand[0].mode;
17903 mode0 = insn_data[icode].operand[1].mode;
17904 mode1 = insn_data[icode].operand[2].mode;
17905 mode2 = insn_data[icode].operand[3].mode;
17907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17908 op0 = copy_to_mode_reg (mode0, op0);
17909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17910 op1 = copy_to_mode_reg (mode1, op1);
17911 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17913 /* @@@ better error message */
17914 error ("selector must be an immediate");
17918 || GET_MODE (target) != tmode
17919 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17920 target = gen_reg_rtx (tmode);
17921 pat = GEN_FCN (icode) (target, op0, op1, op2);
17927 case ARM_BUILTIN_SETWCX:
17928 arg0 = CALL_EXPR_ARG (exp, 0);
17929 arg1 = CALL_EXPR_ARG (exp, 1);
17930 op0 = force_reg (SImode, expand_normal (arg0));
17931 op1 = expand_normal (arg1);
17932 emit_insn (gen_iwmmxt_tmcr (op1, op0));
17935 case ARM_BUILTIN_GETWCX:
17936 arg0 = CALL_EXPR_ARG (exp, 0);
17937 op0 = expand_normal (arg0);
17938 target = gen_reg_rtx (SImode);
17939 emit_insn (gen_iwmmxt_tmrc (target, op0));
17942 case ARM_BUILTIN_WSHUFH:
17943 icode = CODE_FOR_iwmmxt_wshufh;
17944 arg0 = CALL_EXPR_ARG (exp, 0);
17945 arg1 = CALL_EXPR_ARG (exp, 1);
17946 op0 = expand_normal (arg0);
17947 op1 = expand_normal (arg1);
17948 tmode = insn_data[icode].operand[0].mode;
17949 mode1 = insn_data[icode].operand[1].mode;
17950 mode2 = insn_data[icode].operand[2].mode;
17952 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17953 op0 = copy_to_mode_reg (mode1, op0);
17954 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17956 /* @@@ better error message */
17957 error ("mask must be an immediate");
17961 || GET_MODE (target) != tmode
17962 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17963 target = gen_reg_rtx (tmode);
17964 pat = GEN_FCN (icode) (target, op0, op1);
17970 case ARM_BUILTIN_WSADB:
17971 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
17972 case ARM_BUILTIN_WSADH:
17973 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
17974 case ARM_BUILTIN_WSADBZ:
17975 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
17976 case ARM_BUILTIN_WSADHZ:
17977 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
17979 /* Several three-argument builtins. */
17980 case ARM_BUILTIN_WMACS:
17981 case ARM_BUILTIN_WMACU:
17982 case ARM_BUILTIN_WALIGN:
17983 case ARM_BUILTIN_TMIA:
17984 case ARM_BUILTIN_TMIAPH:
17985 case ARM_BUILTIN_TMIATT:
17986 case ARM_BUILTIN_TMIATB:
17987 case ARM_BUILTIN_TMIABT:
17988 case ARM_BUILTIN_TMIABB:
17989 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
17990 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
17991 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
17992 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
17993 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
17994 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
17995 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
17996 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
17997 : CODE_FOR_iwmmxt_walign);
17998 arg0 = CALL_EXPR_ARG (exp, 0);
17999 arg1 = CALL_EXPR_ARG (exp, 1);
18000 arg2 = CALL_EXPR_ARG (exp, 2);
18001 op0 = expand_normal (arg0);
18002 op1 = expand_normal (arg1);
18003 op2 = expand_normal (arg2);
18004 tmode = insn_data[icode].operand[0].mode;
18005 mode0 = insn_data[icode].operand[1].mode;
18006 mode1 = insn_data[icode].operand[2].mode;
18007 mode2 = insn_data[icode].operand[3].mode;
18009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18010 op0 = copy_to_mode_reg (mode0, op0);
18011 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18012 op1 = copy_to_mode_reg (mode1, op1);
18013 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18014 op2 = copy_to_mode_reg (mode2, op2);
18016 || GET_MODE (target) != tmode
18017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18018 target = gen_reg_rtx (tmode);
18019 pat = GEN_FCN (icode) (target, op0, op1, op2);
18025 case ARM_BUILTIN_WZERO:
18026 target = gen_reg_rtx (DImode);
18027 emit_insn (gen_iwmmxt_clrdi (target));
18030 case ARM_BUILTIN_THREAD_POINTER:
18031 return arm_load_tp (target);
18037 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18038 if (d->code == (const enum arm_builtins) fcode)
18039 return arm_expand_binop_builtin (d->icode, exp, target);
18041 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18042 if (d->code == (const enum arm_builtins) fcode)
18043 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18045 /* @@@ Should really do something sensible here. */
18049 /* Return the number (counting from 0) of
18050 the least significant set bit in MASK. */
18053 number_of_first_bit_set (unsigned mask)
18058 (mask & (1 << bit)) == 0;
18065 /* Emit code to push or pop registers to or from the stack. F is the
18066 assembly file. MASK is the registers to push or pop. PUSH is
18067 nonzero if we should push, and zero if we should pop. For debugging
18068 output, if pushing, adjust CFA_OFFSET by the amount of space added
18069 to the stack. REAL_REGS should have the same number of bits set as
18070 MASK, and will be used instead (in the same order) to describe which
18071 registers were saved - this is used to mark the save slots when we
18072 push high registers after moving them to low registers. */
18074 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18075 unsigned long real_regs)
18078 int lo_mask = mask & 0xFF;
18079 int pushed_words = 0;
18083 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18085 /* Special case. Do not generate a POP PC statement here, do it in
18087 thumb_exit (f, -1);
18091 if (ARM_EABI_UNWIND_TABLES && push)
18093 fprintf (f, "\t.save\t{");
18094 for (regno = 0; regno < 15; regno++)
18096 if (real_regs & (1 << regno))
18098 if (real_regs & ((1 << regno) -1))
18100 asm_fprintf (f, "%r", regno);
18103 fprintf (f, "}\n");
18106 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18108 /* Look at the low registers first. */
18109 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18113 asm_fprintf (f, "%r", regno);
18115 if ((lo_mask & ~1) != 0)
18122 if (push && (mask & (1 << LR_REGNUM)))
18124 /* Catch pushing the LR. */
18128 asm_fprintf (f, "%r", LR_REGNUM);
18132 else if (!push && (mask & (1 << PC_REGNUM)))
18134 /* Catch popping the PC. */
18135 if (TARGET_INTERWORK || TARGET_BACKTRACE
18136 || crtl->calls_eh_return)
18138 /* The PC is never poped directly, instead
18139 it is popped into r3 and then BX is used. */
18140 fprintf (f, "}\n");
18142 thumb_exit (f, -1);
18151 asm_fprintf (f, "%r", PC_REGNUM);
18155 fprintf (f, "}\n");
18157 if (push && pushed_words && dwarf2out_do_frame ())
18159 char *l = dwarf2out_cfi_label (false);
18160 int pushed_mask = real_regs;
18162 *cfa_offset += pushed_words * 4;
18163 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18166 pushed_mask = real_regs;
18167 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18169 if (pushed_mask & 1)
18170 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18175 /* Generate code to return from a thumb function.
18176 If 'reg_containing_return_addr' is -1, then the return address is
18177 actually on the stack, at the stack pointer. */
18179 thumb_exit (FILE *f, int reg_containing_return_addr)
18181 unsigned regs_available_for_popping;
18182 unsigned regs_to_pop;
18184 unsigned available;
18188 int restore_a4 = FALSE;
18190 /* Compute the registers we need to pop. */
18194 if (reg_containing_return_addr == -1)
18196 regs_to_pop |= 1 << LR_REGNUM;
18200 if (TARGET_BACKTRACE)
18202 /* Restore the (ARM) frame pointer and stack pointer. */
18203 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18207 /* If there is nothing to pop then just emit the BX instruction and
18209 if (pops_needed == 0)
18211 if (crtl->calls_eh_return)
18212 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18214 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18217 /* Otherwise if we are not supporting interworking and we have not created
18218 a backtrace structure and the function was not entered in ARM mode then
18219 just pop the return address straight into the PC. */
18220 else if (!TARGET_INTERWORK
18221 && !TARGET_BACKTRACE
18222 && !is_called_in_ARM_mode (current_function_decl)
18223 && !crtl->calls_eh_return)
18225 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18229 /* Find out how many of the (return) argument registers we can corrupt. */
18230 regs_available_for_popping = 0;
18232 /* If returning via __builtin_eh_return, the bottom three registers
18233 all contain information needed for the return. */
18234 if (crtl->calls_eh_return)
18238 /* If we can deduce the registers used from the function's
18239 return value. This is more reliable that examining
18240 df_regs_ever_live_p () because that will be set if the register is
18241 ever used in the function, not just if the register is used
18242 to hold a return value. */
18244 if (crtl->return_rtx != 0)
18245 mode = GET_MODE (crtl->return_rtx);
18247 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18249 size = GET_MODE_SIZE (mode);
18253 /* In a void function we can use any argument register.
18254 In a function that returns a structure on the stack
18255 we can use the second and third argument registers. */
18256 if (mode == VOIDmode)
18257 regs_available_for_popping =
18258 (1 << ARG_REGISTER (1))
18259 | (1 << ARG_REGISTER (2))
18260 | (1 << ARG_REGISTER (3));
18262 regs_available_for_popping =
18263 (1 << ARG_REGISTER (2))
18264 | (1 << ARG_REGISTER (3));
18266 else if (size <= 4)
18267 regs_available_for_popping =
18268 (1 << ARG_REGISTER (2))
18269 | (1 << ARG_REGISTER (3));
18270 else if (size <= 8)
18271 regs_available_for_popping =
18272 (1 << ARG_REGISTER (3));
18275 /* Match registers to be popped with registers into which we pop them. */
18276 for (available = regs_available_for_popping,
18277 required = regs_to_pop;
18278 required != 0 && available != 0;
18279 available &= ~(available & - available),
18280 required &= ~(required & - required))
18283 /* If we have any popping registers left over, remove them. */
18285 regs_available_for_popping &= ~available;
18287 /* Otherwise if we need another popping register we can use
18288 the fourth argument register. */
18289 else if (pops_needed)
18291 /* If we have not found any free argument registers and
18292 reg a4 contains the return address, we must move it. */
18293 if (regs_available_for_popping == 0
18294 && reg_containing_return_addr == LAST_ARG_REGNUM)
18296 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18297 reg_containing_return_addr = LR_REGNUM;
18299 else if (size > 12)
18301 /* Register a4 is being used to hold part of the return value,
18302 but we have dire need of a free, low register. */
18305 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18308 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18310 /* The fourth argument register is available. */
18311 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18317 /* Pop as many registers as we can. */
18318 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18319 regs_available_for_popping);
18321 /* Process the registers we popped. */
18322 if (reg_containing_return_addr == -1)
18324 /* The return address was popped into the lowest numbered register. */
18325 regs_to_pop &= ~(1 << LR_REGNUM);
18327 reg_containing_return_addr =
18328 number_of_first_bit_set (regs_available_for_popping);
18330 /* Remove this register for the mask of available registers, so that
18331 the return address will not be corrupted by further pops. */
18332 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18335 /* If we popped other registers then handle them here. */
18336 if (regs_available_for_popping)
18340 /* Work out which register currently contains the frame pointer. */
18341 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18343 /* Move it into the correct place. */
18344 asm_fprintf (f, "\tmov\t%r, %r\n",
18345 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18347 /* (Temporarily) remove it from the mask of popped registers. */
18348 regs_available_for_popping &= ~(1 << frame_pointer);
18349 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18351 if (regs_available_for_popping)
18355 /* We popped the stack pointer as well,
18356 find the register that contains it. */
18357 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18359 /* Move it into the stack register. */
18360 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18362 /* At this point we have popped all necessary registers, so
18363 do not worry about restoring regs_available_for_popping
18364 to its correct value:
18366 assert (pops_needed == 0)
18367 assert (regs_available_for_popping == (1 << frame_pointer))
18368 assert (regs_to_pop == (1 << STACK_POINTER)) */
18372 /* Since we have just move the popped value into the frame
18373 pointer, the popping register is available for reuse, and
18374 we know that we still have the stack pointer left to pop. */
18375 regs_available_for_popping |= (1 << frame_pointer);
18379 /* If we still have registers left on the stack, but we no longer have
18380 any registers into which we can pop them, then we must move the return
18381 address into the link register and make available the register that
18383 if (regs_available_for_popping == 0 && pops_needed > 0)
18385 regs_available_for_popping |= 1 << reg_containing_return_addr;
18387 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18388 reg_containing_return_addr);
18390 reg_containing_return_addr = LR_REGNUM;
18393 /* If we have registers left on the stack then pop some more.
18394 We know that at most we will want to pop FP and SP. */
18395 if (pops_needed > 0)
18400 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18401 regs_available_for_popping);
18403 /* We have popped either FP or SP.
18404 Move whichever one it is into the correct register. */
18405 popped_into = number_of_first_bit_set (regs_available_for_popping);
18406 move_to = number_of_first_bit_set (regs_to_pop);
18408 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18410 regs_to_pop &= ~(1 << move_to);
18415 /* If we still have not popped everything then we must have only
18416 had one register available to us and we are now popping the SP. */
18417 if (pops_needed > 0)
18421 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18422 regs_available_for_popping);
18424 popped_into = number_of_first_bit_set (regs_available_for_popping);
18426 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18428 assert (regs_to_pop == (1 << STACK_POINTER))
18429 assert (pops_needed == 1)
18433 /* If necessary restore the a4 register. */
18436 if (reg_containing_return_addr != LR_REGNUM)
18438 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18439 reg_containing_return_addr = LR_REGNUM;
18442 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18445 if (crtl->calls_eh_return)
18446 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18448 /* Return to caller. */
18449 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18454 thumb1_final_prescan_insn (rtx insn)
18456 if (flag_print_asm_name)
18457 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18458 INSN_ADDRESSES (INSN_UID (insn)));
18462 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18464 unsigned HOST_WIDE_INT mask = 0xff;
18467 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18468 if (val == 0) /* XXX */
18471 for (i = 0; i < 25; i++)
18472 if ((val & (mask << i)) == val)
18478 /* Returns nonzero if the current function contains,
18479 or might contain a far jump. */
18481 thumb_far_jump_used_p (void)
18485 /* This test is only important for leaf functions. */
18486 /* assert (!leaf_function_p ()); */
18488 /* If we have already decided that far jumps may be used,
18489 do not bother checking again, and always return true even if
18490 it turns out that they are not being used. Once we have made
18491 the decision that far jumps are present (and that hence the link
18492 register will be pushed onto the stack) we cannot go back on it. */
18493 if (cfun->machine->far_jump_used)
18496 /* If this function is not being called from the prologue/epilogue
18497 generation code then it must be being called from the
18498 INITIAL_ELIMINATION_OFFSET macro. */
18499 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18501 /* In this case we know that we are being asked about the elimination
18502 of the arg pointer register. If that register is not being used,
18503 then there are no arguments on the stack, and we do not have to
18504 worry that a far jump might force the prologue to push the link
18505 register, changing the stack offsets. In this case we can just
18506 return false, since the presence of far jumps in the function will
18507 not affect stack offsets.
18509 If the arg pointer is live (or if it was live, but has now been
18510 eliminated and so set to dead) then we do have to test to see if
18511 the function might contain a far jump. This test can lead to some
18512 false negatives, since before reload is completed, then length of
18513 branch instructions is not known, so gcc defaults to returning their
18514 longest length, which in turn sets the far jump attribute to true.
18516 A false negative will not result in bad code being generated, but it
18517 will result in a needless push and pop of the link register. We
18518 hope that this does not occur too often.
18520 If we need doubleword stack alignment this could affect the other
18521 elimination offsets so we can't risk getting it wrong. */
18522 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18523 cfun->machine->arg_pointer_live = 1;
18524 else if (!cfun->machine->arg_pointer_live)
18528 /* Check to see if the function contains a branch
18529 insn with the far jump attribute set. */
18530 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18532 if (GET_CODE (insn) == JUMP_INSN
18533 /* Ignore tablejump patterns. */
18534 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18535 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18536 && get_attr_far_jump (insn) == FAR_JUMP_YES
18539 /* Record the fact that we have decided that
18540 the function does use far jumps. */
18541 cfun->machine->far_jump_used = 1;
18549 /* Return nonzero if FUNC must be entered in ARM mode. */
18551 is_called_in_ARM_mode (tree func)
18553 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18555 /* Ignore the problem about functions whose address is taken. */
18556 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18560 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18566 /* The bits which aren't usefully expanded as rtl. */
18568 thumb_unexpanded_epilogue (void)
18570 arm_stack_offsets *offsets;
18572 unsigned long live_regs_mask = 0;
18573 int high_regs_pushed = 0;
18574 int had_to_push_lr;
18577 if (cfun->machine->return_used_this_function != 0)
18580 if (IS_NAKED (arm_current_func_type ()))
18583 offsets = arm_get_frame_offsets ();
18584 live_regs_mask = offsets->saved_regs_mask;
18585 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18587 /* If we can deduce the registers used from the function's return value.
18588 This is more reliable that examining df_regs_ever_live_p () because that
18589 will be set if the register is ever used in the function, not just if
18590 the register is used to hold a return value. */
18591 size = arm_size_return_regs ();
18593 /* The prolog may have pushed some high registers to use as
18594 work registers. e.g. the testsuite file:
18595 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
18596 compiles to produce:
18597 push {r4, r5, r6, r7, lr}
18601 as part of the prolog. We have to undo that pushing here. */
18603 if (high_regs_pushed)
18605 unsigned long mask = live_regs_mask & 0xff;
18608 /* The available low registers depend on the size of the value we are
18616 /* Oh dear! We have no low registers into which we can pop
18619 ("no low registers available for popping high registers");
18621 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
18622 if (live_regs_mask & (1 << next_hi_reg))
18625 while (high_regs_pushed)
18627 /* Find lo register(s) into which the high register(s) can
18629 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18631 if (mask & (1 << regno))
18632 high_regs_pushed--;
18633 if (high_regs_pushed == 0)
18637 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
18639 /* Pop the values into the low register(s). */
18640 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
18642 /* Move the value(s) into the high registers. */
18643 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18645 if (mask & (1 << regno))
18647 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
18650 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
18651 if (live_regs_mask & (1 << next_hi_reg))
18656 live_regs_mask &= ~0x0f00;
18659 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
18660 live_regs_mask &= 0xff;
18662 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
18664 /* Pop the return address into the PC. */
18665 if (had_to_push_lr)
18666 live_regs_mask |= 1 << PC_REGNUM;
18668 /* Either no argument registers were pushed or a backtrace
18669 structure was created which includes an adjusted stack
18670 pointer, so just pop everything. */
18671 if (live_regs_mask)
18672 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18675 /* We have either just popped the return address into the
18676 PC or it is was kept in LR for the entire function. */
18677 if (!had_to_push_lr)
18678 thumb_exit (asm_out_file, LR_REGNUM);
18682 /* Pop everything but the return address. */
18683 if (live_regs_mask)
18684 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18687 if (had_to_push_lr)
18691 /* We have no free low regs, so save one. */
18692 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
18696 /* Get the return address into a temporary register. */
18697 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
18698 1 << LAST_ARG_REGNUM);
18702 /* Move the return address to lr. */
18703 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
18705 /* Restore the low register. */
18706 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
18711 regno = LAST_ARG_REGNUM;
18716 /* Remove the argument registers that were pushed onto the stack. */
18717 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
18718 SP_REGNUM, SP_REGNUM,
18719 crtl->args.pretend_args_size);
18721 thumb_exit (asm_out_file, regno);
18727 /* Functions to save and restore machine-specific function data. */
18728 static struct machine_function *
18729 arm_init_machine_status (void)
18731 struct machine_function *machine;
18732 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
18734 #if ARM_FT_UNKNOWN != 0
18735 machine->func_type = ARM_FT_UNKNOWN;
18740 /* Return an RTX indicating where the return address to the
18741 calling function can be found. */
18743 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
18748 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
18751 /* Do anything needed before RTL is emitted for each function. */
18753 arm_init_expanders (void)
18755 /* Arrange to initialize and mark the machine per-function status. */
18756 init_machine_status = arm_init_machine_status;
18758 /* This is to stop the combine pass optimizing away the alignment
18759 adjustment of va_arg. */
18760 /* ??? It is claimed that this should not be necessary. */
18762 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
18766 /* Like arm_compute_initial_elimination offset. Simpler because there
18767 isn't an ABI specified frame pointer for Thumb. Instead, we set it
18768 to point at the base of the local variables after static stack
18769 space for a function has been allocated. */
18772 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
18774 arm_stack_offsets *offsets;
18776 offsets = arm_get_frame_offsets ();
18780 case ARG_POINTER_REGNUM:
18783 case STACK_POINTER_REGNUM:
18784 return offsets->outgoing_args - offsets->saved_args;
18786 case FRAME_POINTER_REGNUM:
18787 return offsets->soft_frame - offsets->saved_args;
18789 case ARM_HARD_FRAME_POINTER_REGNUM:
18790 return offsets->saved_regs - offsets->saved_args;
18792 case THUMB_HARD_FRAME_POINTER_REGNUM:
18793 return offsets->locals_base - offsets->saved_args;
18796 gcc_unreachable ();
18800 case FRAME_POINTER_REGNUM:
18803 case STACK_POINTER_REGNUM:
18804 return offsets->outgoing_args - offsets->soft_frame;
18806 case ARM_HARD_FRAME_POINTER_REGNUM:
18807 return offsets->saved_regs - offsets->soft_frame;
18809 case THUMB_HARD_FRAME_POINTER_REGNUM:
18810 return offsets->locals_base - offsets->soft_frame;
18813 gcc_unreachable ();
18818 gcc_unreachable ();
18822 /* Generate the rest of a function's prologue. */
18824 thumb1_expand_prologue (void)
18828 HOST_WIDE_INT amount;
18829 arm_stack_offsets *offsets;
18830 unsigned long func_type;
18832 unsigned long live_regs_mask;
18834 func_type = arm_current_func_type ();
18836 /* Naked functions don't have prologues. */
18837 if (IS_NAKED (func_type))
18840 if (IS_INTERRUPT (func_type))
18842 error ("interrupt Service Routines cannot be coded in Thumb mode");
18846 offsets = arm_get_frame_offsets ();
18847 live_regs_mask = offsets->saved_regs_mask;
18848 /* Load the pic register before setting the frame pointer,
18849 so we can use r7 as a temporary work register. */
18850 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18851 arm_load_pic_register (live_regs_mask);
18853 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18854 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
18855 stack_pointer_rtx);
18857 amount = offsets->outgoing_args - offsets->saved_regs;
18862 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18863 GEN_INT (- amount)));
18864 RTX_FRAME_RELATED_P (insn) = 1;
18870 /* The stack decrement is too big for an immediate value in a single
18871 insn. In theory we could issue multiple subtracts, but after
18872 three of them it becomes more space efficient to place the full
18873 value in the constant pool and load into a register. (Also the
18874 ARM debugger really likes to see only one stack decrement per
18875 function). So instead we look for a scratch register into which
18876 we can load the decrement, and then we subtract this from the
18877 stack pointer. Unfortunately on the thumb the only available
18878 scratch registers are the argument registers, and we cannot use
18879 these as they may hold arguments to the function. Instead we
18880 attempt to locate a call preserved register which is used by this
18881 function. If we can find one, then we know that it will have
18882 been pushed at the start of the prologue and so we can corrupt
18884 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
18885 if (live_regs_mask & (1 << regno))
18888 gcc_assert(regno <= LAST_LO_REGNUM);
18890 reg = gen_rtx_REG (SImode, regno);
18892 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
18894 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18895 stack_pointer_rtx, reg));
18896 RTX_FRAME_RELATED_P (insn) = 1;
18897 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18898 plus_constant (stack_pointer_rtx,
18900 RTX_FRAME_RELATED_P (dwarf) = 1;
18901 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18905 if (frame_pointer_needed)
18906 thumb_set_frame_pointer (offsets);
18908 /* If we are profiling, make sure no instructions are scheduled before
18909 the call to mcount. Similarly if the user has requested no
18910 scheduling in the prolog. Similarly if we want non-call exceptions
18911 using the EABI unwinder, to prevent faulting instructions from being
18912 swapped with a stack adjustment. */
18913 if (crtl->profile || !TARGET_SCHED_PROLOG
18914 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
18915 emit_insn (gen_blockage ());
18917 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
18918 if (live_regs_mask & 0xff)
18919 cfun->machine->lr_save_eliminated = 0;
18924 thumb1_expand_epilogue (void)
18926 HOST_WIDE_INT amount;
18927 arm_stack_offsets *offsets;
18930 /* Naked functions don't have prologues. */
18931 if (IS_NAKED (arm_current_func_type ()))
18934 offsets = arm_get_frame_offsets ();
18935 amount = offsets->outgoing_args - offsets->saved_regs;
18937 if (frame_pointer_needed)
18939 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
18940 amount = offsets->locals_base - offsets->saved_regs;
18943 gcc_assert (amount >= 0);
18947 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18948 GEN_INT (amount)));
18951 /* r3 is always free in the epilogue. */
18952 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
18954 emit_insn (gen_movsi (reg, GEN_INT (amount)));
18955 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
18959 /* Emit a USE (stack_pointer_rtx), so that
18960 the stack adjustment will not be deleted. */
18961 emit_insn (gen_prologue_use (stack_pointer_rtx));
18963 if (crtl->profile || !TARGET_SCHED_PROLOG)
18964 emit_insn (gen_blockage ());
18966 /* Emit a clobber for each insn that will be restored in the epilogue,
18967 so that flow2 will get register lifetimes correct. */
18968 for (regno = 0; regno < 13; regno++)
18969 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
18970 emit_clobber (gen_rtx_REG (SImode, regno));
18972 if (! df_regs_ever_live_p (LR_REGNUM))
18973 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
18977 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
18979 arm_stack_offsets *offsets;
18980 unsigned long live_regs_mask = 0;
18981 unsigned long l_mask;
18982 unsigned high_regs_pushed = 0;
18983 int cfa_offset = 0;
18986 if (IS_NAKED (arm_current_func_type ()))
18989 if (is_called_in_ARM_mode (current_function_decl))
18993 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
18994 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
18996 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
18998 /* Generate code sequence to switch us into Thumb mode. */
18999 /* The .code 32 directive has already been emitted by
19000 ASM_DECLARE_FUNCTION_NAME. */
19001 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19002 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19004 /* Generate a label, so that the debugger will notice the
19005 change in instruction sets. This label is also used by
19006 the assembler to bypass the ARM code when this function
19007 is called from a Thumb encoded function elsewhere in the
19008 same file. Hence the definition of STUB_NAME here must
19009 agree with the definition in gas/config/tc-arm.c. */
19011 #define STUB_NAME ".real_start_of"
19013 fprintf (f, "\t.code\t16\n");
19015 if (arm_dllexport_name_p (name))
19016 name = arm_strip_name_encoding (name);
19018 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19019 fprintf (f, "\t.thumb_func\n");
19020 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19023 if (crtl->args.pretend_args_size)
19025 /* Output unwind directive for the stack adjustment. */
19026 if (ARM_EABI_UNWIND_TABLES)
19027 fprintf (f, "\t.pad #%d\n",
19028 crtl->args.pretend_args_size);
19030 if (cfun->machine->uses_anonymous_args)
19034 fprintf (f, "\tpush\t{");
19036 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19038 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19039 regno <= LAST_ARG_REGNUM;
19041 asm_fprintf (f, "%r%s", regno,
19042 regno == LAST_ARG_REGNUM ? "" : ", ");
19044 fprintf (f, "}\n");
19047 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19048 SP_REGNUM, SP_REGNUM,
19049 crtl->args.pretend_args_size);
19051 /* We don't need to record the stores for unwinding (would it
19052 help the debugger any if we did?), but record the change in
19053 the stack pointer. */
19054 if (dwarf2out_do_frame ())
19056 char *l = dwarf2out_cfi_label (false);
19058 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19059 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19063 /* Get the registers we are going to push. */
19064 offsets = arm_get_frame_offsets ();
19065 live_regs_mask = offsets->saved_regs_mask;
19066 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19067 l_mask = live_regs_mask & 0x40ff;
19068 /* Then count how many other high registers will need to be pushed. */
19069 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19071 if (TARGET_BACKTRACE)
19074 unsigned work_register;
19076 /* We have been asked to create a stack backtrace structure.
19077 The code looks like this:
19081 0 sub SP, #16 Reserve space for 4 registers.
19082 2 push {R7} Push low registers.
19083 4 add R7, SP, #20 Get the stack pointer before the push.
19084 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19085 8 mov R7, PC Get hold of the start of this code plus 12.
19086 10 str R7, [SP, #16] Store it.
19087 12 mov R7, FP Get hold of the current frame pointer.
19088 14 str R7, [SP, #4] Store it.
19089 16 mov R7, LR Get hold of the current return address.
19090 18 str R7, [SP, #12] Store it.
19091 20 add R7, SP, #16 Point at the start of the backtrace structure.
19092 22 mov FP, R7 Put this value into the frame pointer. */
19094 work_register = thumb_find_work_register (live_regs_mask);
19096 if (ARM_EABI_UNWIND_TABLES)
19097 asm_fprintf (f, "\t.pad #16\n");
19100 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19101 SP_REGNUM, SP_REGNUM);
19103 if (dwarf2out_do_frame ())
19105 char *l = dwarf2out_cfi_label (false);
19107 cfa_offset = cfa_offset + 16;
19108 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19113 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19114 offset = bit_count (l_mask) * UNITS_PER_WORD;
19119 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19120 offset + 16 + crtl->args.pretend_args_size);
19122 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19125 /* Make sure that the instruction fetching the PC is in the right place
19126 to calculate "start of backtrace creation code + 12". */
19129 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19130 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19132 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19133 ARM_HARD_FRAME_POINTER_REGNUM);
19134 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19139 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19140 ARM_HARD_FRAME_POINTER_REGNUM);
19141 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19143 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19144 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19148 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19149 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19151 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19153 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19154 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19156 /* Optimization: If we are not pushing any low registers but we are going
19157 to push some high registers then delay our first push. This will just
19158 be a push of LR and we can combine it with the push of the first high
19160 else if ((l_mask & 0xff) != 0
19161 || (high_regs_pushed == 0 && l_mask))
19162 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19164 if (high_regs_pushed)
19166 unsigned pushable_regs;
19167 unsigned next_hi_reg;
19169 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19170 if (live_regs_mask & (1 << next_hi_reg))
19173 pushable_regs = l_mask & 0xff;
19175 if (pushable_regs == 0)
19176 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19178 while (high_regs_pushed > 0)
19180 unsigned long real_regs_mask = 0;
19182 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19184 if (pushable_regs & (1 << regno))
19186 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19188 high_regs_pushed --;
19189 real_regs_mask |= (1 << next_hi_reg);
19191 if (high_regs_pushed)
19193 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19195 if (live_regs_mask & (1 << next_hi_reg))
19200 pushable_regs &= ~((1 << regno) - 1);
19206 /* If we had to find a work register and we have not yet
19207 saved the LR then add it to the list of regs to push. */
19208 if (l_mask == (1 << LR_REGNUM))
19210 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19212 real_regs_mask | (1 << LR_REGNUM));
19216 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19221 /* Handle the case of a double word load into a low register from
19222 a computed memory address. The computed address may involve a
19223 register which is overwritten by the load. */
19225 thumb_load_double_from_address (rtx *operands)
19233 gcc_assert (GET_CODE (operands[0]) == REG);
19234 gcc_assert (GET_CODE (operands[1]) == MEM);
19236 /* Get the memory address. */
19237 addr = XEXP (operands[1], 0);
19239 /* Work out how the memory address is computed. */
19240 switch (GET_CODE (addr))
19243 operands[2] = adjust_address (operands[1], SImode, 4);
19245 if (REGNO (operands[0]) == REGNO (addr))
19247 output_asm_insn ("ldr\t%H0, %2", operands);
19248 output_asm_insn ("ldr\t%0, %1", operands);
19252 output_asm_insn ("ldr\t%0, %1", operands);
19253 output_asm_insn ("ldr\t%H0, %2", operands);
19258 /* Compute <address> + 4 for the high order load. */
19259 operands[2] = adjust_address (operands[1], SImode, 4);
19261 output_asm_insn ("ldr\t%0, %1", operands);
19262 output_asm_insn ("ldr\t%H0, %2", operands);
19266 arg1 = XEXP (addr, 0);
19267 arg2 = XEXP (addr, 1);
19269 if (CONSTANT_P (arg1))
19270 base = arg2, offset = arg1;
19272 base = arg1, offset = arg2;
19274 gcc_assert (GET_CODE (base) == REG);
19276 /* Catch the case of <address> = <reg> + <reg> */
19277 if (GET_CODE (offset) == REG)
19279 int reg_offset = REGNO (offset);
19280 int reg_base = REGNO (base);
19281 int reg_dest = REGNO (operands[0]);
19283 /* Add the base and offset registers together into the
19284 higher destination register. */
19285 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19286 reg_dest + 1, reg_base, reg_offset);
19288 /* Load the lower destination register from the address in
19289 the higher destination register. */
19290 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19291 reg_dest, reg_dest + 1);
19293 /* Load the higher destination register from its own address
19295 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19296 reg_dest + 1, reg_dest + 1);
19300 /* Compute <address> + 4 for the high order load. */
19301 operands[2] = adjust_address (operands[1], SImode, 4);
19303 /* If the computed address is held in the low order register
19304 then load the high order register first, otherwise always
19305 load the low order register first. */
19306 if (REGNO (operands[0]) == REGNO (base))
19308 output_asm_insn ("ldr\t%H0, %2", operands);
19309 output_asm_insn ("ldr\t%0, %1", operands);
19313 output_asm_insn ("ldr\t%0, %1", operands);
19314 output_asm_insn ("ldr\t%H0, %2", operands);
19320 /* With no registers to worry about we can just load the value
19322 operands[2] = adjust_address (operands[1], SImode, 4);
19324 output_asm_insn ("ldr\t%H0, %2", operands);
19325 output_asm_insn ("ldr\t%0, %1", operands);
19329 gcc_unreachable ();
19336 thumb_output_move_mem_multiple (int n, rtx *operands)
19343 if (REGNO (operands[4]) > REGNO (operands[5]))
19346 operands[4] = operands[5];
19349 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19350 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19354 if (REGNO (operands[4]) > REGNO (operands[5]))
19357 operands[4] = operands[5];
19360 if (REGNO (operands[5]) > REGNO (operands[6]))
19363 operands[5] = operands[6];
19366 if (REGNO (operands[4]) > REGNO (operands[5]))
19369 operands[4] = operands[5];
19373 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19374 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19378 gcc_unreachable ();
19384 /* Output a call-via instruction for thumb state. */
19386 thumb_call_via_reg (rtx reg)
19388 int regno = REGNO (reg);
19391 gcc_assert (regno < LR_REGNUM);
19393 /* If we are in the normal text section we can use a single instance
19394 per compilation unit. If we are doing function sections, then we need
19395 an entry per section, since we can't rely on reachability. */
19396 if (in_section == text_section)
19398 thumb_call_reg_needed = 1;
19400 if (thumb_call_via_label[regno] == NULL)
19401 thumb_call_via_label[regno] = gen_label_rtx ();
19402 labelp = thumb_call_via_label + regno;
19406 if (cfun->machine->call_via[regno] == NULL)
19407 cfun->machine->call_via[regno] = gen_label_rtx ();
19408 labelp = cfun->machine->call_via + regno;
19411 output_asm_insn ("bl\t%a0", labelp);
19415 /* Routines for generating rtl. */
19417 thumb_expand_movmemqi (rtx *operands)
19419 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19420 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19421 HOST_WIDE_INT len = INTVAL (operands[2]);
19422 HOST_WIDE_INT offset = 0;
19426 emit_insn (gen_movmem12b (out, in, out, in));
19432 emit_insn (gen_movmem8b (out, in, out, in));
19438 rtx reg = gen_reg_rtx (SImode);
19439 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19440 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19447 rtx reg = gen_reg_rtx (HImode);
19448 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19449 plus_constant (in, offset))));
19450 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19458 rtx reg = gen_reg_rtx (QImode);
19459 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19460 plus_constant (in, offset))));
19461 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19467 thumb_reload_out_hi (rtx *operands)
19469 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19472 /* Handle reading a half-word from memory during reload. */
19474 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19476 gcc_unreachable ();
19479 /* Return the length of a function name prefix
19480 that starts with the character 'c'. */
19482 arm_get_strip_length (int c)
19486 ARM_NAME_ENCODING_LENGTHS
19491 /* Return a pointer to a function's name with any
19492 and all prefix encodings stripped from it. */
19494 arm_strip_name_encoding (const char *name)
19498 while ((skip = arm_get_strip_length (* name)))
19504 /* If there is a '*' anywhere in the name's prefix, then
19505 emit the stripped name verbatim, otherwise prepend an
19506 underscore if leading underscores are being used. */
19508 arm_asm_output_labelref (FILE *stream, const char *name)
19513 while ((skip = arm_get_strip_length (* name)))
19515 verbatim |= (*name == '*');
19520 fputs (name, stream);
19522 asm_fprintf (stream, "%U%s", name);
19526 arm_file_start (void)
19530 if (TARGET_UNIFIED_ASM)
19531 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19535 const char *fpu_name;
19536 if (arm_select[0].string)
19537 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19538 else if (arm_select[1].string)
19539 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19541 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19542 all_cores[arm_default_cpu].name);
19544 if (TARGET_SOFT_FLOAT)
19547 fpu_name = "softvfp";
19549 fpu_name = "softfpa";
19553 int set_float_abi_attributes = 0;
19554 switch (arm_fpu_arch)
19559 case FPUTYPE_FPA_EMU2:
19562 case FPUTYPE_FPA_EMU3:
19565 case FPUTYPE_MAVERICK:
19566 fpu_name = "maverick";
19570 set_float_abi_attributes = 1;
19572 case FPUTYPE_VFP3D16:
19573 fpu_name = "vfpv3-d16";
19574 set_float_abi_attributes = 1;
19577 fpu_name = "vfpv3";
19578 set_float_abi_attributes = 1;
19582 set_float_abi_attributes = 1;
19584 case FPUTYPE_NEON_FP16:
19585 fpu_name = "neon-fp16";
19586 set_float_abi_attributes = 1;
19591 if (set_float_abi_attributes)
19593 if (TARGET_HARD_FLOAT)
19594 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19595 if (TARGET_HARD_FLOAT_ABI)
19596 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19599 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19601 /* Some of these attributes only apply when the corresponding features
19602 are used. However we don't have any easy way of figuring this out.
19603 Conservatively record the setting that would have been used. */
19605 /* Tag_ABI_FP_rounding. */
19606 if (flag_rounding_math)
19607 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19608 if (!flag_unsafe_math_optimizations)
19610 /* Tag_ABI_FP_denomal. */
19611 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19612 /* Tag_ABI_FP_exceptions. */
19613 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19615 /* Tag_ABI_FP_user_exceptions. */
19616 if (flag_signaling_nans)
19617 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19618 /* Tag_ABI_FP_number_model. */
19619 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19620 flag_finite_math_only ? 1 : 3);
19622 /* Tag_ABI_align8_needed. */
19623 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19624 /* Tag_ABI_align8_preserved. */
19625 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
19626 /* Tag_ABI_enum_size. */
19627 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
19628 flag_short_enums ? 1 : 2);
19630 /* Tag_ABI_optimization_goals. */
19633 else if (optimize >= 2)
19639 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
19641 /* Tag_ABI_FP_16bit_format. */
19642 if (arm_fp16_format)
19643 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
19644 (int)arm_fp16_format);
19646 if (arm_lang_output_object_attributes_hook)
19647 arm_lang_output_object_attributes_hook();
19649 default_file_start();
19653 arm_file_end (void)
19657 if (NEED_INDICATE_EXEC_STACK)
19658 /* Add .note.GNU-stack. */
19659 file_end_indicate_exec_stack ();
19661 if (! thumb_call_reg_needed)
19664 switch_to_section (text_section);
19665 asm_fprintf (asm_out_file, "\t.code 16\n");
19666 ASM_OUTPUT_ALIGN (asm_out_file, 1);
19668 for (regno = 0; regno < LR_REGNUM; regno++)
19670 rtx label = thumb_call_via_label[regno];
19674 targetm.asm_out.internal_label (asm_out_file, "L",
19675 CODE_LABEL_NUMBER (label));
19676 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19682 /* Symbols in the text segment can be accessed without indirecting via the
19683 constant pool; it may take an extra binary operation, but this is still
19684 faster than indirecting via memory. Don't do this when not optimizing,
19685 since we won't be calculating al of the offsets necessary to do this
19689 arm_encode_section_info (tree decl, rtx rtl, int first)
19691 if (optimize > 0 && TREE_CONSTANT (decl))
19692 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
19694 default_encode_section_info (decl, rtl, first);
19696 #endif /* !ARM_PE */
19699 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
19701 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
19702 && !strcmp (prefix, "L"))
19704 arm_ccfsm_state = 0;
19705 arm_target_insn = NULL;
19707 default_internal_label (stream, prefix, labelno);
19710 /* Output code to add DELTA to the first argument, and then jump
19711 to FUNCTION. Used for C++ multiple inheritance. */
19713 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
19714 HOST_WIDE_INT delta,
19715 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
19718 static int thunk_label = 0;
19721 int mi_delta = delta;
19722 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
19724 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
19727 mi_delta = - mi_delta;
19731 int labelno = thunk_label++;
19732 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
19733 /* Thunks are entered in arm mode when avaiable. */
19734 if (TARGET_THUMB1_ONLY)
19736 /* push r3 so we can use it as a temporary. */
19737 /* TODO: Omit this save if r3 is not used. */
19738 fputs ("\tpush {r3}\n", file);
19739 fputs ("\tldr\tr3, ", file);
19743 fputs ("\tldr\tr12, ", file);
19745 assemble_name (file, label);
19746 fputc ('\n', file);
19749 /* If we are generating PIC, the ldr instruction below loads
19750 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
19751 the address of the add + 8, so we have:
19753 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
19756 Note that we have "+ 1" because some versions of GNU ld
19757 don't set the low bit of the result for R_ARM_REL32
19758 relocations against thumb function symbols.
19759 On ARMv6M this is +4, not +8. */
19760 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
19761 assemble_name (file, labelpc);
19762 fputs (":\n", file);
19763 if (TARGET_THUMB1_ONLY)
19765 /* This is 2 insns after the start of the thunk, so we know it
19766 is 4-byte aligned. */
19767 fputs ("\tadd\tr3, pc, r3\n", file);
19768 fputs ("\tmov r12, r3\n", file);
19771 fputs ("\tadd\tr12, pc, r12\n", file);
19773 else if (TARGET_THUMB1_ONLY)
19774 fputs ("\tmov r12, r3\n", file);
19776 if (TARGET_THUMB1_ONLY)
19778 if (mi_delta > 255)
19780 fputs ("\tldr\tr3, ", file);
19781 assemble_name (file, label);
19782 fputs ("+4\n", file);
19783 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
19784 mi_op, this_regno, this_regno);
19786 else if (mi_delta != 0)
19788 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19789 mi_op, this_regno, this_regno,
19795 /* TODO: Use movw/movt for large constants when available. */
19796 while (mi_delta != 0)
19798 if ((mi_delta & (3 << shift)) == 0)
19802 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19803 mi_op, this_regno, this_regno,
19804 mi_delta & (0xff << shift));
19805 mi_delta &= ~(0xff << shift);
19812 if (TARGET_THUMB1_ONLY)
19813 fputs ("\tpop\t{r3}\n", file);
19815 fprintf (file, "\tbx\tr12\n");
19816 ASM_OUTPUT_ALIGN (file, 2);
19817 assemble_name (file, label);
19818 fputs (":\n", file);
19821 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
19822 rtx tem = XEXP (DECL_RTL (function), 0);
19823 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
19824 tem = gen_rtx_MINUS (GET_MODE (tem),
19826 gen_rtx_SYMBOL_REF (Pmode,
19827 ggc_strdup (labelpc)));
19828 assemble_integer (tem, 4, BITS_PER_WORD, 1);
19831 /* Output ".word .LTHUNKn". */
19832 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
19834 if (TARGET_THUMB1_ONLY && mi_delta > 255)
19835 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
19839 fputs ("\tb\t", file);
19840 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
19841 if (NEED_PLT_RELOC)
19842 fputs ("(PLT)", file);
19843 fputc ('\n', file);
19848 arm_emit_vector_const (FILE *file, rtx x)
19851 const char * pattern;
19853 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19855 switch (GET_MODE (x))
19857 case V2SImode: pattern = "%08x"; break;
19858 case V4HImode: pattern = "%04x"; break;
19859 case V8QImode: pattern = "%02x"; break;
19860 default: gcc_unreachable ();
19863 fprintf (file, "0x");
19864 for (i = CONST_VECTOR_NUNITS (x); i--;)
19868 element = CONST_VECTOR_ELT (x, i);
19869 fprintf (file, pattern, INTVAL (element));
19875 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
19876 HFmode constant pool entries are actually loaded with ldr. */
19878 arm_emit_fp16_const (rtx c)
19883 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
19884 bits = real_to_target (NULL, &r, HFmode);
19885 if (WORDS_BIG_ENDIAN)
19886 assemble_zeros (2);
19887 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
19888 if (!WORDS_BIG_ENDIAN)
19889 assemble_zeros (2);
19893 arm_output_load_gr (rtx *operands)
19900 if (GET_CODE (operands [1]) != MEM
19901 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
19902 || GET_CODE (reg = XEXP (sum, 0)) != REG
19903 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
19904 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
19905 return "wldrw%?\t%0, %1";
19907 /* Fix up an out-of-range load of a GR register. */
19908 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
19909 wcgr = operands[0];
19911 output_asm_insn ("ldr%?\t%0, %1", operands);
19913 operands[0] = wcgr;
19915 output_asm_insn ("tmcr%?\t%0, %1", operands);
19916 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
19921 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
19923 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
19924 named arg and all anonymous args onto the stack.
19925 XXX I know the prologue shouldn't be pushing registers, but it is faster
19929 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
19930 enum machine_mode mode,
19933 int second_time ATTRIBUTE_UNUSED)
19937 cfun->machine->uses_anonymous_args = 1;
19938 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
19940 nregs = pcum->aapcs_ncrn;
19941 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
19945 nregs = pcum->nregs;
19947 if (nregs < NUM_ARG_REGS)
19948 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
19951 /* Return nonzero if the CONSUMER instruction (a store) does not need
19952 PRODUCER's value to calculate the address. */
19955 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
19957 rtx value = PATTERN (producer);
19958 rtx addr = PATTERN (consumer);
19960 if (GET_CODE (value) == COND_EXEC)
19961 value = COND_EXEC_CODE (value);
19962 if (GET_CODE (value) == PARALLEL)
19963 value = XVECEXP (value, 0, 0);
19964 value = XEXP (value, 0);
19965 if (GET_CODE (addr) == COND_EXEC)
19966 addr = COND_EXEC_CODE (addr);
19967 if (GET_CODE (addr) == PARALLEL)
19968 addr = XVECEXP (addr, 0, 0);
19969 addr = XEXP (addr, 0);
19971 return !reg_overlap_mentioned_p (value, addr);
19974 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19975 have an early register shift value or amount dependency on the
19976 result of PRODUCER. */
19979 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
19981 rtx value = PATTERN (producer);
19982 rtx op = PATTERN (consumer);
19985 if (GET_CODE (value) == COND_EXEC)
19986 value = COND_EXEC_CODE (value);
19987 if (GET_CODE (value) == PARALLEL)
19988 value = XVECEXP (value, 0, 0);
19989 value = XEXP (value, 0);
19990 if (GET_CODE (op) == COND_EXEC)
19991 op = COND_EXEC_CODE (op);
19992 if (GET_CODE (op) == PARALLEL)
19993 op = XVECEXP (op, 0, 0);
19996 early_op = XEXP (op, 0);
19997 /* This is either an actual independent shift, or a shift applied to
19998 the first operand of another operation. We want the whole shift
20000 if (GET_CODE (early_op) == REG)
20003 return !reg_overlap_mentioned_p (value, early_op);
20006 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20007 have an early register shift value dependency on the result of
20011 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20013 rtx value = PATTERN (producer);
20014 rtx op = PATTERN (consumer);
20017 if (GET_CODE (value) == COND_EXEC)
20018 value = COND_EXEC_CODE (value);
20019 if (GET_CODE (value) == PARALLEL)
20020 value = XVECEXP (value, 0, 0);
20021 value = XEXP (value, 0);
20022 if (GET_CODE (op) == COND_EXEC)
20023 op = COND_EXEC_CODE (op);
20024 if (GET_CODE (op) == PARALLEL)
20025 op = XVECEXP (op, 0, 0);
20028 early_op = XEXP (op, 0);
20030 /* This is either an actual independent shift, or a shift applied to
20031 the first operand of another operation. We want the value being
20032 shifted, in either case. */
20033 if (GET_CODE (early_op) != REG)
20034 early_op = XEXP (early_op, 0);
20036 return !reg_overlap_mentioned_p (value, early_op);
20039 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20040 have an early register mult dependency on the result of
20044 arm_no_early_mul_dep (rtx producer, rtx consumer)
20046 rtx value = PATTERN (producer);
20047 rtx op = PATTERN (consumer);
20049 if (GET_CODE (value) == COND_EXEC)
20050 value = COND_EXEC_CODE (value);
20051 if (GET_CODE (value) == PARALLEL)
20052 value = XVECEXP (value, 0, 0);
20053 value = XEXP (value, 0);
20054 if (GET_CODE (op) == COND_EXEC)
20055 op = COND_EXEC_CODE (op);
20056 if (GET_CODE (op) == PARALLEL)
20057 op = XVECEXP (op, 0, 0);
20060 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20062 if (GET_CODE (XEXP (op, 0)) == MULT)
20063 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20065 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20071 /* We can't rely on the caller doing the proper promotion when
20072 using APCS or ATPCS. */
20075 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20077 return !TARGET_AAPCS_BASED;
20080 static enum machine_mode
20081 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20082 enum machine_mode mode,
20083 int *punsignedp ATTRIBUTE_UNUSED,
20084 const_tree fntype ATTRIBUTE_UNUSED,
20085 int for_return ATTRIBUTE_UNUSED)
20087 if (GET_MODE_CLASS (mode) == MODE_INT
20088 && GET_MODE_SIZE (mode) < 4)
20094 /* AAPCS based ABIs use short enums by default. */
20097 arm_default_short_enums (void)
20099 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20103 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20106 arm_align_anon_bitfield (void)
20108 return TARGET_AAPCS_BASED;
20112 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20115 arm_cxx_guard_type (void)
20117 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20120 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20121 has an accumulator dependency on the result of the producer (a
20122 multiplication instruction) and no other dependency on that result. */
20124 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20126 rtx mul = PATTERN (producer);
20127 rtx mac = PATTERN (consumer);
20129 rtx mac_op0, mac_op1, mac_acc;
20131 if (GET_CODE (mul) == COND_EXEC)
20132 mul = COND_EXEC_CODE (mul);
20133 if (GET_CODE (mac) == COND_EXEC)
20134 mac = COND_EXEC_CODE (mac);
20136 /* Check that mul is of the form (set (...) (mult ...))
20137 and mla is of the form (set (...) (plus (mult ...) (...))). */
20138 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20139 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20140 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20143 mul_result = XEXP (mul, 0);
20144 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20145 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20146 mac_acc = XEXP (XEXP (mac, 1), 1);
20148 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20149 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20150 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20154 /* The EABI says test the least significant bit of a guard variable. */
20157 arm_cxx_guard_mask_bit (void)
20159 return TARGET_AAPCS_BASED;
20163 /* The EABI specifies that all array cookies are 8 bytes long. */
20166 arm_get_cookie_size (tree type)
20170 if (!TARGET_AAPCS_BASED)
20171 return default_cxx_get_cookie_size (type);
20173 size = build_int_cst (sizetype, 8);
20178 /* The EABI says that array cookies should also contain the element size. */
20181 arm_cookie_has_size (void)
20183 return TARGET_AAPCS_BASED;
20187 /* The EABI says constructors and destructors should return a pointer to
20188 the object constructed/destroyed. */
20191 arm_cxx_cdtor_returns_this (void)
20193 return TARGET_AAPCS_BASED;
20196 /* The EABI says that an inline function may never be the key
20200 arm_cxx_key_method_may_be_inline (void)
20202 return !TARGET_AAPCS_BASED;
20206 arm_cxx_determine_class_data_visibility (tree decl)
20208 if (!TARGET_AAPCS_BASED
20209 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20212 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20213 is exported. However, on systems without dynamic vague linkage,
20214 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20215 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20216 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20218 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20219 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20223 arm_cxx_class_data_always_comdat (void)
20225 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20226 vague linkage if the class has no key function. */
20227 return !TARGET_AAPCS_BASED;
20231 /* The EABI says __aeabi_atexit should be used to register static
20235 arm_cxx_use_aeabi_atexit (void)
20237 return TARGET_AAPCS_BASED;
20242 arm_set_return_address (rtx source, rtx scratch)
20244 arm_stack_offsets *offsets;
20245 HOST_WIDE_INT delta;
20247 unsigned long saved_regs;
20249 offsets = arm_get_frame_offsets ();
20250 saved_regs = offsets->saved_regs_mask;
20252 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20253 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20256 if (frame_pointer_needed)
20257 addr = plus_constant(hard_frame_pointer_rtx, -4);
20260 /* LR will be the first saved register. */
20261 delta = offsets->outgoing_args - (offsets->frame + 4);
20266 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20267 GEN_INT (delta & ~4095)));
20272 addr = stack_pointer_rtx;
20274 addr = plus_constant (addr, delta);
20276 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20282 thumb_set_return_address (rtx source, rtx scratch)
20284 arm_stack_offsets *offsets;
20285 HOST_WIDE_INT delta;
20286 HOST_WIDE_INT limit;
20289 unsigned long mask;
20293 offsets = arm_get_frame_offsets ();
20294 mask = offsets->saved_regs_mask;
20295 if (mask & (1 << LR_REGNUM))
20298 /* Find the saved regs. */
20299 if (frame_pointer_needed)
20301 delta = offsets->soft_frame - offsets->saved_args;
20302 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20308 delta = offsets->outgoing_args - offsets->saved_args;
20311 /* Allow for the stack frame. */
20312 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20314 /* The link register is always the first saved register. */
20317 /* Construct the address. */
20318 addr = gen_rtx_REG (SImode, reg);
20321 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20322 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20326 addr = plus_constant (addr, delta);
20328 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20331 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20334 /* Implements target hook vector_mode_supported_p. */
20336 arm_vector_mode_supported_p (enum machine_mode mode)
20338 /* Neon also supports V2SImode, etc. listed in the clause below. */
20339 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20340 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20343 if ((TARGET_NEON || TARGET_IWMMXT)
20344 && ((mode == V2SImode)
20345 || (mode == V4HImode)
20346 || (mode == V8QImode)))
20352 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20353 ARM insns and therefore guarantee that the shift count is modulo 256.
20354 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20355 guarantee no particular behavior for out-of-range counts. */
20357 static unsigned HOST_WIDE_INT
20358 arm_shift_truncation_mask (enum machine_mode mode)
20360 return mode == SImode ? 255 : 0;
20364 /* Map internal gcc register numbers to DWARF2 register numbers. */
20367 arm_dbx_register_number (unsigned int regno)
20372 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20373 compatibility. The EABI defines them as registers 96-103. */
20374 if (IS_FPA_REGNUM (regno))
20375 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20377 /* FIXME: VFPv3 register numbering. */
20378 if (IS_VFP_REGNUM (regno))
20379 return 64 + regno - FIRST_VFP_REGNUM;
20381 if (IS_IWMMXT_GR_REGNUM (regno))
20382 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20384 if (IS_IWMMXT_REGNUM (regno))
20385 return 112 + regno - FIRST_IWMMXT_REGNUM;
20387 gcc_unreachable ();
20391 #ifdef TARGET_UNWIND_INFO
20392 /* Emit unwind directives for a store-multiple instruction or stack pointer
20393 push during alignment.
20394 These should only ever be generated by the function prologue code, so
20395 expect them to have a particular form. */
20398 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20401 HOST_WIDE_INT offset;
20402 HOST_WIDE_INT nregs;
20408 e = XVECEXP (p, 0, 0);
20409 if (GET_CODE (e) != SET)
20412 /* First insn will adjust the stack pointer. */
20413 if (GET_CODE (e) != SET
20414 || GET_CODE (XEXP (e, 0)) != REG
20415 || REGNO (XEXP (e, 0)) != SP_REGNUM
20416 || GET_CODE (XEXP (e, 1)) != PLUS)
20419 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20420 nregs = XVECLEN (p, 0) - 1;
20422 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20425 /* The function prologue may also push pc, but not annotate it as it is
20426 never restored. We turn this into a stack pointer adjustment. */
20427 if (nregs * 4 == offset - 4)
20429 fprintf (asm_out_file, "\t.pad #4\n");
20433 fprintf (asm_out_file, "\t.save {");
20435 else if (IS_VFP_REGNUM (reg))
20438 fprintf (asm_out_file, "\t.vsave {");
20440 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20442 /* FPA registers are done differently. */
20443 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20447 /* Unknown register type. */
20450 /* If the stack increment doesn't match the size of the saved registers,
20451 something has gone horribly wrong. */
20452 if (offset != nregs * reg_size)
20457 /* The remaining insns will describe the stores. */
20458 for (i = 1; i <= nregs; i++)
20460 /* Expect (set (mem <addr>) (reg)).
20461 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20462 e = XVECEXP (p, 0, i);
20463 if (GET_CODE (e) != SET
20464 || GET_CODE (XEXP (e, 0)) != MEM
20465 || GET_CODE (XEXP (e, 1)) != REG)
20468 reg = REGNO (XEXP (e, 1));
20473 fprintf (asm_out_file, ", ");
20474 /* We can't use %r for vfp because we need to use the
20475 double precision register names. */
20476 if (IS_VFP_REGNUM (reg))
20477 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20479 asm_fprintf (asm_out_file, "%r", reg);
20481 #ifdef ENABLE_CHECKING
20482 /* Check that the addresses are consecutive. */
20483 e = XEXP (XEXP (e, 0), 0);
20484 if (GET_CODE (e) == PLUS)
20486 offset += reg_size;
20487 if (GET_CODE (XEXP (e, 0)) != REG
20488 || REGNO (XEXP (e, 0)) != SP_REGNUM
20489 || GET_CODE (XEXP (e, 1)) != CONST_INT
20490 || offset != INTVAL (XEXP (e, 1)))
20494 || GET_CODE (e) != REG
20495 || REGNO (e) != SP_REGNUM)
20499 fprintf (asm_out_file, "}\n");
20502 /* Emit unwind directives for a SET. */
20505 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20513 switch (GET_CODE (e0))
20516 /* Pushing a single register. */
20517 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20518 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20519 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20522 asm_fprintf (asm_out_file, "\t.save ");
20523 if (IS_VFP_REGNUM (REGNO (e1)))
20524 asm_fprintf(asm_out_file, "{d%d}\n",
20525 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20527 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20531 if (REGNO (e0) == SP_REGNUM)
20533 /* A stack increment. */
20534 if (GET_CODE (e1) != PLUS
20535 || GET_CODE (XEXP (e1, 0)) != REG
20536 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20537 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20540 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20541 -INTVAL (XEXP (e1, 1)));
20543 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20545 HOST_WIDE_INT offset;
20547 if (GET_CODE (e1) == PLUS)
20549 if (GET_CODE (XEXP (e1, 0)) != REG
20550 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20552 reg = REGNO (XEXP (e1, 0));
20553 offset = INTVAL (XEXP (e1, 1));
20554 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20555 HARD_FRAME_POINTER_REGNUM, reg,
20556 INTVAL (XEXP (e1, 1)));
20558 else if (GET_CODE (e1) == REG)
20561 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20562 HARD_FRAME_POINTER_REGNUM, reg);
20567 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20569 /* Move from sp to reg. */
20570 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20572 else if (GET_CODE (e1) == PLUS
20573 && GET_CODE (XEXP (e1, 0)) == REG
20574 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20575 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20577 /* Set reg to offset from sp. */
20578 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20579 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20581 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20583 /* Stack pointer save before alignment. */
20585 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
20598 /* Emit unwind directives for the given insn. */
20601 arm_unwind_emit (FILE * asm_out_file, rtx insn)
20605 if (!ARM_EABI_UNWIND_TABLES)
20608 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20609 && (TREE_NOTHROW (current_function_decl)
20610 || crtl->all_throwers_are_sibcalls))
20613 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
20616 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
20618 pat = XEXP (pat, 0);
20620 pat = PATTERN (insn);
20622 switch (GET_CODE (pat))
20625 arm_unwind_emit_set (asm_out_file, pat);
20629 /* Store multiple. */
20630 arm_unwind_emit_sequence (asm_out_file, pat);
20639 /* Output a reference from a function exception table to the type_info
20640 object X. The EABI specifies that the symbol should be relocated by
20641 an R_ARM_TARGET2 relocation. */
20644 arm_output_ttype (rtx x)
20646 fputs ("\t.word\t", asm_out_file);
20647 output_addr_const (asm_out_file, x);
20648 /* Use special relocations for symbol references. */
20649 if (GET_CODE (x) != CONST_INT)
20650 fputs ("(TARGET2)", asm_out_file);
20651 fputc ('\n', asm_out_file);
20655 #endif /* TARGET_UNWIND_INFO */
20658 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
20659 stack alignment. */
20662 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
20664 rtx unspec = SET_SRC (pattern);
20665 gcc_assert (GET_CODE (unspec) == UNSPEC);
20669 case UNSPEC_STACK_ALIGN:
20670 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
20671 put anything on the stack, so hopefully it won't matter.
20672 CFA = SP will be correct after alignment. */
20673 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
20674 SET_DEST (pattern));
20677 gcc_unreachable ();
20682 /* Output unwind directives for the start/end of a function. */
20685 arm_output_fn_unwind (FILE * f, bool prologue)
20687 if (!ARM_EABI_UNWIND_TABLES)
20691 fputs ("\t.fnstart\n", f);
20694 /* If this function will never be unwound, then mark it as such.
20695 The came condition is used in arm_unwind_emit to suppress
20696 the frame annotations. */
20697 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20698 && (TREE_NOTHROW (current_function_decl)
20699 || crtl->all_throwers_are_sibcalls))
20700 fputs("\t.cantunwind\n", f);
20702 fputs ("\t.fnend\n", f);
20707 arm_emit_tls_decoration (FILE *fp, rtx x)
20709 enum tls_reloc reloc;
20712 val = XVECEXP (x, 0, 0);
20713 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
20715 output_addr_const (fp, val);
20720 fputs ("(tlsgd)", fp);
20723 fputs ("(tlsldm)", fp);
20726 fputs ("(tlsldo)", fp);
20729 fputs ("(gottpoff)", fp);
20732 fputs ("(tpoff)", fp);
20735 gcc_unreachable ();
20743 fputs (" + (. - ", fp);
20744 output_addr_const (fp, XVECEXP (x, 0, 2));
20746 output_addr_const (fp, XVECEXP (x, 0, 3));
20756 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
20759 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
20761 gcc_assert (size == 4);
20762 fputs ("\t.word\t", file);
20763 output_addr_const (file, x);
20764 fputs ("(tlsldo)", file);
20768 arm_output_addr_const_extra (FILE *fp, rtx x)
20770 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
20771 return arm_emit_tls_decoration (fp, x);
20772 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
20775 int labelno = INTVAL (XVECEXP (x, 0, 0));
20777 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
20778 assemble_name_raw (fp, label);
20782 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
20784 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
20788 output_addr_const (fp, XVECEXP (x, 0, 0));
20792 else if (GET_CODE (x) == CONST_VECTOR)
20793 return arm_emit_vector_const (fp, x);
20798 /* Output assembly for a shift instruction.
20799 SET_FLAGS determines how the instruction modifies the condition codes.
20800 0 - Do not set condition codes.
20801 1 - Set condition codes.
20802 2 - Use smallest instruction. */
20804 arm_output_shift(rtx * operands, int set_flags)
20807 static const char flag_chars[3] = {'?', '.', '!'};
20812 c = flag_chars[set_flags];
20813 if (TARGET_UNIFIED_ASM)
20815 shift = shift_op(operands[3], &val);
20819 operands[2] = GEN_INT(val);
20820 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
20823 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
20826 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
20827 output_asm_insn (pattern, operands);
20831 /* Output a Thumb-1 casesi dispatch sequence. */
20833 thumb1_output_casesi (rtx *operands)
20835 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
20836 addr_diff_vec_flags flags;
20838 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20840 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
20842 switch (GET_MODE(diff_vec))
20845 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20846 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
20848 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20849 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
20851 return "bl\t%___gnu_thumb1_case_si";
20853 gcc_unreachable ();
20857 /* Output a Thumb-2 casesi instruction. */
20859 thumb2_output_casesi (rtx *operands)
20861 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
20863 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20865 output_asm_insn ("cmp\t%0, %1", operands);
20866 output_asm_insn ("bhi\t%l3", operands);
20867 switch (GET_MODE(diff_vec))
20870 return "tbb\t[%|pc, %0]";
20872 return "tbh\t[%|pc, %0, lsl #1]";
20876 output_asm_insn ("adr\t%4, %l2", operands);
20877 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
20878 output_asm_insn ("add\t%4, %4, %5", operands);
20883 output_asm_insn ("adr\t%4, %l2", operands);
20884 return "ldr\t%|pc, [%4, %0, lsl #2]";
20887 gcc_unreachable ();
20891 /* Most ARM cores are single issue, but some newer ones can dual issue.
20892 The scheduler descriptions rely on this being correct. */
20894 arm_issue_rate (void)
20909 /* A table and a function to perform ARM-specific name mangling for
20910 NEON vector types in order to conform to the AAPCS (see "Procedure
20911 Call Standard for the ARM Architecture", Appendix A). To qualify
20912 for emission with the mangled names defined in that document, a
20913 vector type must not only be of the correct mode but also be
20914 composed of NEON vector element types (e.g. __builtin_neon_qi). */
20917 enum machine_mode mode;
20918 const char *element_type_name;
20919 const char *aapcs_name;
20920 } arm_mangle_map_entry;
20922 static arm_mangle_map_entry arm_mangle_map[] = {
20923 /* 64-bit containerized types. */
20924 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
20925 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
20926 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
20927 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
20928 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
20929 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
20930 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
20931 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
20932 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
20933 /* 128-bit containerized types. */
20934 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
20935 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
20936 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
20937 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
20938 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
20939 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
20940 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
20941 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
20942 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
20943 { VOIDmode, NULL, NULL }
20947 arm_mangle_type (const_tree type)
20949 arm_mangle_map_entry *pos = arm_mangle_map;
20951 /* The ARM ABI documents (10th October 2008) say that "__va_list"
20952 has to be managled as if it is in the "std" namespace. */
20953 if (TARGET_AAPCS_BASED
20954 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
20956 static bool warned;
20957 if (!warned && warn_psabi)
20960 inform (input_location,
20961 "the mangling of %<va_list%> has changed in GCC 4.4");
20963 return "St9__va_list";
20966 /* Half-precision float. */
20967 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
20970 if (TREE_CODE (type) != VECTOR_TYPE)
20973 /* Check the mode of the vector type, and the name of the vector
20974 element type, against the table. */
20975 while (pos->mode != VOIDmode)
20977 tree elt_type = TREE_TYPE (type);
20979 if (pos->mode == TYPE_MODE (type)
20980 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
20981 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
20982 pos->element_type_name))
20983 return pos->aapcs_name;
20988 /* Use the default mangling for unrecognized (possibly user-defined)
20993 /* Order of allocation of core registers for Thumb: this allocation is
20994 written over the corresponding initial entries of the array
20995 initialized with REG_ALLOC_ORDER. We allocate all low registers
20996 first. Saving and restoring a low register is usually cheaper than
20997 using a call-clobbered high register. */
20999 static const int thumb_core_reg_alloc_order[] =
21001 3, 2, 1, 0, 4, 5, 6, 7,
21002 14, 12, 8, 9, 10, 11, 13, 15
21005 /* Adjust register allocation order when compiling for Thumb. */
21008 arm_order_regs_for_local_alloc (void)
21010 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21011 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21013 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21014 sizeof (thumb_core_reg_alloc_order));
21017 /* Set default optimization options. */
21019 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21021 /* Enable section anchors by default at -O1 or higher.
21022 Use 2 to distinguish from an explicit -fsection-anchors
21023 given on the command line. */
21025 flag_section_anchors = 2;
21028 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21031 arm_frame_pointer_required (void)
21033 return (cfun->has_nonlocal_label
21034 || SUBTARGET_FRAME_POINTER_REQUIRED
21035 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21038 #include "gt-arm.h"