1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
142 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
143 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_rtx_costs (rtx, int, int, int *, bool);
148 static int arm_address_cost (rtx, bool);
149 static bool arm_memory_load_p (rtx);
150 static bool arm_cirrus_insn_p (rtx);
151 static void cirrus_reorg (rtx);
152 static void arm_init_builtins (void);
153 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
154 static void arm_init_iwmmxt_builtins (void);
155 static rtx safe_vector_operand (rtx, enum machine_mode);
156 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
157 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
158 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
159 static void emit_constant_insn (rtx cond, rtx pattern);
160 static rtx emit_set_insn (rtx, rtx);
161 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 static int aapcs_select_return_coproc (const_tree, const_tree);
167 #ifdef OBJECT_FORMAT_ELF
168 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
169 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_encode_section_info (tree, rtx, int);
175 static void arm_file_end (void);
176 static void arm_file_start (void);
178 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
181 enum machine_mode, const_tree, bool);
182 static bool arm_promote_prototypes (const_tree);
183 static bool arm_default_short_enums (void);
184 static bool arm_align_anon_bitfield (void);
185 static bool arm_return_in_msb (const_tree);
186 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
187 static bool arm_return_in_memory (const_tree, const_tree);
188 #ifdef TARGET_UNWIND_INFO
189 static void arm_unwind_emit (FILE *, rtx);
190 static bool arm_output_ttype (rtx);
192 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree arm_get_cookie_size (tree);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr);
219 static bool arm_scalar_mode_supported_p (enum machine_mode);
220 static bool arm_frame_pointer_required (void);
221 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx, tree, rtx);
224 static rtx arm_trampoline_adjust_address (rtx);
227 /* Table of machine attributes. */
228 static const struct attribute_spec arm_attribute_table[] =
230 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
231 /* Function calls made to this symbol must be done indirectly, because
232 it may lie outside of the 26 bit addressing range of a normal function
234 { "long_call", 0, 0, false, true, true, NULL },
235 /* Whereas these functions are always known to reside within the 26 bit
237 { "short_call", 0, 0, false, true, true, NULL },
238 /* Specify the procedure call conventions for a function. */
239 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
240 /* Interrupt Service Routines have special prologue and epilogue requirements. */
241 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
242 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
243 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
245 /* ARM/PE has three new attributes:
247 dllexport - for exporting a function/variable that will live in a dll
248 dllimport - for importing a function/variable from a dll
250 Microsoft allows multiple declspecs in one __declspec, separating
251 them with spaces. We do NOT support this. Instead, use __declspec
254 { "dllimport", 0, 0, true, false, false, NULL },
255 { "dllexport", 0, 0, true, false, false, NULL },
256 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
257 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
258 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
259 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
260 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
262 { NULL, 0, 0, false, false, false, NULL }
265 /* Initialize the GCC target structure. */
266 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
267 #undef TARGET_MERGE_DECL_ATTRIBUTES
268 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
271 #undef TARGET_LEGITIMIZE_ADDRESS
272 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
274 #undef TARGET_ATTRIBUTE_TABLE
275 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
277 #undef TARGET_ASM_FILE_START
278 #define TARGET_ASM_FILE_START arm_file_start
279 #undef TARGET_ASM_FILE_END
280 #define TARGET_ASM_FILE_END arm_file_end
282 #undef TARGET_ASM_ALIGNED_SI_OP
283 #define TARGET_ASM_ALIGNED_SI_OP NULL
284 #undef TARGET_ASM_INTEGER
285 #define TARGET_ASM_INTEGER arm_assemble_integer
287 #undef TARGET_ASM_FUNCTION_PROLOGUE
288 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
290 #undef TARGET_ASM_FUNCTION_EPILOGUE
291 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
293 #undef TARGET_DEFAULT_TARGET_FLAGS
294 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
295 #undef TARGET_HANDLE_OPTION
296 #define TARGET_HANDLE_OPTION arm_handle_option
298 #define TARGET_HELP arm_target_help
300 #undef TARGET_COMP_TYPE_ATTRIBUTES
301 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
303 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
304 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
306 #undef TARGET_SCHED_ADJUST_COST
307 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
309 #undef TARGET_ENCODE_SECTION_INFO
311 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
313 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
316 #undef TARGET_STRIP_NAME_ENCODING
317 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
319 #undef TARGET_ASM_INTERNAL_LABEL
320 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
322 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
323 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
325 #undef TARGET_FUNCTION_VALUE
326 #define TARGET_FUNCTION_VALUE arm_function_value
328 #undef TARGET_LIBCALL_VALUE
329 #define TARGET_LIBCALL_VALUE arm_libcall_value
331 #undef TARGET_ASM_OUTPUT_MI_THUNK
332 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
333 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
334 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
336 #undef TARGET_RTX_COSTS
337 #define TARGET_RTX_COSTS arm_rtx_costs
338 #undef TARGET_ADDRESS_COST
339 #define TARGET_ADDRESS_COST arm_address_cost
341 #undef TARGET_SHIFT_TRUNCATION_MASK
342 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
343 #undef TARGET_VECTOR_MODE_SUPPORTED_P
344 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
346 #undef TARGET_MACHINE_DEPENDENT_REORG
347 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
349 #undef TARGET_INIT_BUILTINS
350 #define TARGET_INIT_BUILTINS arm_init_builtins
351 #undef TARGET_EXPAND_BUILTIN
352 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
354 #undef TARGET_INIT_LIBFUNCS
355 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
357 #undef TARGET_PROMOTE_FUNCTION_MODE
358 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
359 #undef TARGET_PROMOTE_PROTOTYPES
360 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
361 #undef TARGET_PASS_BY_REFERENCE
362 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
363 #undef TARGET_ARG_PARTIAL_BYTES
364 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
366 #undef TARGET_SETUP_INCOMING_VARARGS
367 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
369 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
370 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
372 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
373 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
374 #undef TARGET_TRAMPOLINE_INIT
375 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
376 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
377 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
379 #undef TARGET_DEFAULT_SHORT_ENUMS
380 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
382 #undef TARGET_ALIGN_ANON_BITFIELD
383 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
385 #undef TARGET_NARROW_VOLATILE_BITFIELD
386 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
388 #undef TARGET_CXX_GUARD_TYPE
389 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
391 #undef TARGET_CXX_GUARD_MASK_BIT
392 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
394 #undef TARGET_CXX_GET_COOKIE_SIZE
395 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
397 #undef TARGET_CXX_COOKIE_HAS_SIZE
398 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
400 #undef TARGET_CXX_CDTOR_RETURNS_THIS
401 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
403 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
404 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
406 #undef TARGET_CXX_USE_AEABI_ATEXIT
407 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
409 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
410 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
411 arm_cxx_determine_class_data_visibility
413 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
414 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
416 #undef TARGET_RETURN_IN_MSB
417 #define TARGET_RETURN_IN_MSB arm_return_in_msb
419 #undef TARGET_RETURN_IN_MEMORY
420 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
422 #undef TARGET_MUST_PASS_IN_STACK
423 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
425 #ifdef TARGET_UNWIND_INFO
426 #undef TARGET_UNWIND_EMIT
427 #define TARGET_UNWIND_EMIT arm_unwind_emit
429 /* EABI unwinding tables use a different format for the typeinfo tables. */
430 #undef TARGET_ASM_TTYPE
431 #define TARGET_ASM_TTYPE arm_output_ttype
433 #undef TARGET_ARM_EABI_UNWINDER
434 #define TARGET_ARM_EABI_UNWINDER true
435 #endif /* TARGET_UNWIND_INFO */
437 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
438 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
440 #undef TARGET_CANNOT_COPY_INSN_P
441 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
444 #undef TARGET_HAVE_TLS
445 #define TARGET_HAVE_TLS true
448 #undef TARGET_CANNOT_FORCE_CONST_MEM
449 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
451 #undef TARGET_MAX_ANCHOR_OFFSET
452 #define TARGET_MAX_ANCHOR_OFFSET 4095
454 /* The minimum is set such that the total size of the block
455 for a particular anchor is -4088 + 1 + 4095 bytes, which is
456 divisible by eight, ensuring natural spacing of anchors. */
457 #undef TARGET_MIN_ANCHOR_OFFSET
458 #define TARGET_MIN_ANCHOR_OFFSET -4088
460 #undef TARGET_SCHED_ISSUE_RATE
461 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
463 #undef TARGET_MANGLE_TYPE
464 #define TARGET_MANGLE_TYPE arm_mangle_type
466 #undef TARGET_BUILD_BUILTIN_VA_LIST
467 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
468 #undef TARGET_EXPAND_BUILTIN_VA_START
469 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
470 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
471 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
474 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
475 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
478 #undef TARGET_LEGITIMATE_ADDRESS_P
479 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
481 #undef TARGET_INVALID_PARAMETER_TYPE
482 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
484 #undef TARGET_INVALID_RETURN_TYPE
485 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
487 #undef TARGET_PROMOTED_TYPE
488 #define TARGET_PROMOTED_TYPE arm_promoted_type
490 #undef TARGET_CONVERT_TO_TYPE
491 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
493 #undef TARGET_SCALAR_MODE_SUPPORTED_P
494 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
496 #undef TARGET_FRAME_POINTER_REQUIRED
497 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
499 #undef TARGET_CAN_ELIMINATE
500 #define TARGET_CAN_ELIMINATE arm_can_eliminate
502 struct gcc_target targetm = TARGET_INITIALIZER;
504 /* Obstack for minipool constant handling. */
505 static struct obstack minipool_obstack;
506 static char * minipool_startobj;
508 /* The maximum number of insns skipped which
509 will be conditionalised if possible. */
510 static int max_insns_skipped = 5;
512 extern FILE * asm_out_file;
514 /* True if we are currently building a constant table. */
515 int making_const_table;
517 /* The processor for which instructions should be scheduled. */
518 enum processor_type arm_tune = arm_none;
520 /* The default processor used if not overridden by commandline. */
521 static enum processor_type arm_default_cpu = arm_none;
523 /* Which floating point model to use. */
524 enum arm_fp_model arm_fp_model;
526 /* Which floating point hardware is available. */
527 enum fputype arm_fpu_arch;
529 /* Which floating point hardware to schedule for. */
530 enum fputype arm_fpu_tune;
532 /* Whether to use floating point hardware. */
533 enum float_abi_type arm_float_abi;
535 /* Which __fp16 format to use. */
536 enum arm_fp16_format_type arm_fp16_format;
538 /* Which ABI to use. */
539 enum arm_abi_type arm_abi;
541 /* Which thread pointer model to use. */
542 enum arm_tp_type target_thread_pointer = TP_AUTO;
544 /* Used to parse -mstructure_size_boundary command line option. */
545 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
547 /* Used for Thumb call_via trampolines. */
548 rtx thumb_call_via_label[14];
549 static int thumb_call_reg_needed;
551 /* Bit values used to identify processor capabilities. */
552 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
553 #define FL_ARCH3M (1 << 1) /* Extended multiply */
554 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
555 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
556 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
557 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
558 #define FL_THUMB (1 << 6) /* Thumb aware */
559 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
560 #define FL_STRONG (1 << 8) /* StrongARM */
561 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
562 #define FL_XSCALE (1 << 10) /* XScale */
563 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
564 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
565 media instructions. */
566 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
567 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
568 Note: ARM6 & 7 derivatives only. */
569 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
570 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
571 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
573 #define FL_DIV (1 << 18) /* Hardware divide. */
574 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
575 #define FL_NEON (1 << 20) /* Neon instructions. */
577 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
579 #define FL_FOR_ARCH2 FL_NOTM
580 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
581 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
582 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
583 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
584 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
585 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
586 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
587 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
588 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
589 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
590 #define FL_FOR_ARCH6J FL_FOR_ARCH6
591 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
592 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
593 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
594 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
595 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
596 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
597 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
598 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
599 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
601 /* The bits in this mask specify which
602 instructions we are allowed to generate. */
603 static unsigned long insn_flags = 0;
605 /* The bits in this mask specify which instruction scheduling options should
607 static unsigned long tune_flags = 0;
609 /* The following are used in the arm.md file as equivalents to bits
610 in the above two flag variables. */
612 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
615 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
618 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
621 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
624 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
627 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
630 /* Nonzero if this chip supports the ARM 6K extensions. */
633 /* Nonzero if instructions not present in the 'M' profile can be used. */
634 int arm_arch_notm = 0;
636 /* Nonzero if this chip can benefit from load scheduling. */
637 int arm_ld_sched = 0;
639 /* Nonzero if this chip is a StrongARM. */
640 int arm_tune_strongarm = 0;
642 /* Nonzero if this chip is a Cirrus variant. */
643 int arm_arch_cirrus = 0;
645 /* Nonzero if this chip supports Intel Wireless MMX technology. */
646 int arm_arch_iwmmxt = 0;
648 /* Nonzero if this chip is an XScale. */
649 int arm_arch_xscale = 0;
651 /* Nonzero if tuning for XScale */
652 int arm_tune_xscale = 0;
654 /* Nonzero if we want to tune for stores that access the write-buffer.
655 This typically means an ARM6 or ARM7 with MMU or MPU. */
656 int arm_tune_wbuf = 0;
658 /* Nonzero if tuning for Cortex-A9. */
659 int arm_tune_cortex_a9 = 0;
661 /* Nonzero if generating Thumb instructions. */
664 /* Nonzero if we should define __THUMB_INTERWORK__ in the
666 XXX This is a bit of a hack, it's intended to help work around
667 problems in GLD which doesn't understand that armv5t code is
668 interworking clean. */
669 int arm_cpp_interwork = 0;
671 /* Nonzero if chip supports Thumb 2. */
674 /* Nonzero if chip supports integer division instruction. */
677 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
678 must report the mode of the memory reference from PRINT_OPERAND to
679 PRINT_OPERAND_ADDRESS. */
680 enum machine_mode output_memory_reference_mode;
682 /* The register number to be used for the PIC offset register. */
683 unsigned arm_pic_register = INVALID_REGNUM;
685 /* Set to 1 after arm_reorg has started. Reset to start at the start of
686 the next function. */
687 static int after_arm_reorg = 0;
689 /* The maximum number of insns to be used when loading a constant. */
690 static int arm_constant_limit = 3;
692 static enum arm_pcs arm_pcs_default;
694 /* For an explanation of these variables, see final_prescan_insn below. */
696 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
697 enum arm_cond_code arm_current_cc;
699 int arm_target_label;
700 /* The number of conditionally executed insns, including the current insn. */
701 int arm_condexec_count = 0;
702 /* A bitmask specifying the patterns for the IT block.
703 Zero means do not output an IT block before this insn. */
704 int arm_condexec_mask = 0;
705 /* The number of bits used in arm_condexec_mask. */
706 int arm_condexec_masklen = 0;
708 /* The condition codes of the ARM, and the inverse function. */
709 static const char * const arm_condition_codes[] =
711 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
712 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
715 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
716 #define streq(string1, string2) (strcmp (string1, string2) == 0)
718 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
719 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
720 | (1 << PIC_OFFSET_TABLE_REGNUM)))
722 /* Initialization code. */
726 const char *const name;
727 enum processor_type core;
729 const unsigned long flags;
730 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
733 /* Not all of these give usefully different compilation alternatives,
734 but there is no simple way of generalizing them. */
735 static const struct processors all_cores[] =
738 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
739 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
740 #include "arm-cores.def"
742 {NULL, arm_none, NULL, 0, NULL}
745 static const struct processors all_architectures[] =
747 /* ARM Architectures */
748 /* We don't specify rtx_costs here as it will be figured out
751 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
752 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
753 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
754 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
755 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
756 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
757 implementations that support it, so we will leave it out for now. */
758 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
759 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
760 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
761 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
762 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
763 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
764 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
765 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
766 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
767 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
768 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
769 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
770 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
771 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
772 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
773 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
774 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
775 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
776 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
777 {NULL, arm_none, NULL, 0 , NULL}
780 struct arm_cpu_select
784 const struct processors * processors;
787 /* This is a magic structure. The 'string' field is magically filled in
788 with a pointer to the value specified by the user on the command line
789 assuming that the user has specified such a value. */
791 static struct arm_cpu_select arm_select[] =
793 /* string name processors */
794 { NULL, "-mcpu=", all_cores },
795 { NULL, "-march=", all_architectures },
796 { NULL, "-mtune=", all_cores }
799 /* Defines representing the indexes into the above table. */
800 #define ARM_OPT_SET_CPU 0
801 #define ARM_OPT_SET_ARCH 1
802 #define ARM_OPT_SET_TUNE 2
804 /* The name of the preprocessor macro to define for this architecture. */
806 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
815 /* Available values for -mfpu=. */
817 static const struct fpu_desc all_fpus[] =
819 {"fpa", FPUTYPE_FPA},
820 {"fpe2", FPUTYPE_FPA_EMU2},
821 {"fpe3", FPUTYPE_FPA_EMU2},
822 {"maverick", FPUTYPE_MAVERICK},
823 {"vfp", FPUTYPE_VFP},
824 {"vfp3", FPUTYPE_VFP3},
825 {"vfpv3", FPUTYPE_VFP3},
826 {"vfpv3-d16", FPUTYPE_VFP3D16},
827 {"neon", FPUTYPE_NEON},
828 {"neon-fp16", FPUTYPE_NEON_FP16}
832 /* Floating point models used by the different hardware.
833 See fputype in arm.h. */
835 static const enum arm_fp_model fp_model_for_fpu[] =
837 /* No FP hardware. */
838 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
839 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
840 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
841 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
842 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
843 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
844 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
845 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
846 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
847 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
854 enum float_abi_type abi_type;
858 /* Available values for -mfloat-abi=. */
860 static const struct float_abi all_float_abis[] =
862 {"soft", ARM_FLOAT_ABI_SOFT},
863 {"softfp", ARM_FLOAT_ABI_SOFTFP},
864 {"hard", ARM_FLOAT_ABI_HARD}
871 enum arm_fp16_format_type fp16_format_type;
875 /* Available values for -mfp16-format=. */
877 static const struct fp16_format all_fp16_formats[] =
879 {"none", ARM_FP16_FORMAT_NONE},
880 {"ieee", ARM_FP16_FORMAT_IEEE},
881 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
888 enum arm_abi_type abi_type;
892 /* Available values for -mabi=. */
894 static const struct abi_name arm_all_abis[] =
896 {"apcs-gnu", ARM_ABI_APCS},
897 {"atpcs", ARM_ABI_ATPCS},
898 {"aapcs", ARM_ABI_AAPCS},
899 {"iwmmxt", ARM_ABI_IWMMXT},
900 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
903 /* Supported TLS relocations. */
913 /* Emit an insn that's a simple single-set. Both the operands must be known
916 emit_set_insn (rtx x, rtx y)
918 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
921 /* Return the number of bits set in VALUE. */
923 bit_count (unsigned long value)
925 unsigned long count = 0;
930 value &= value - 1; /* Clear the least-significant set bit. */
936 /* Set up library functions unique to ARM. */
939 arm_init_libfuncs (void)
941 /* There are no special library functions unless we are using the
946 /* The functions below are described in Section 4 of the "Run-Time
947 ABI for the ARM architecture", Version 1.0. */
949 /* Double-precision floating-point arithmetic. Table 2. */
950 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
951 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
952 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
953 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
954 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
956 /* Double-precision comparisons. Table 3. */
957 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
958 set_optab_libfunc (ne_optab, DFmode, NULL);
959 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
960 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
961 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
962 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
963 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
965 /* Single-precision floating-point arithmetic. Table 4. */
966 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
967 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
968 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
969 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
970 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
972 /* Single-precision comparisons. Table 5. */
973 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
974 set_optab_libfunc (ne_optab, SFmode, NULL);
975 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
976 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
977 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
978 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
979 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
981 /* Floating-point to integer conversions. Table 6. */
982 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
983 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
984 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
985 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
986 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
987 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
988 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
989 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
991 /* Conversions between floating types. Table 7. */
992 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
993 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
995 /* Integer to floating-point conversions. Table 8. */
996 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
997 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
998 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
999 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1000 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1001 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1002 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1003 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1005 /* Long long. Table 9. */
1006 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1007 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1008 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1009 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1010 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1011 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1012 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1013 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1015 /* Integer (32/32->32) division. \S 4.3.1. */
1016 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1017 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1019 /* The divmod functions are designed so that they can be used for
1020 plain division, even though they return both the quotient and the
1021 remainder. The quotient is returned in the usual location (i.e.,
1022 r0 for SImode, {r0, r1} for DImode), just as would be expected
1023 for an ordinary division routine. Because the AAPCS calling
1024 conventions specify that all of { r0, r1, r2, r3 } are
1025 callee-saved registers, there is no need to tell the compiler
1026 explicitly that those registers are clobbered by these
1028 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1029 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1031 /* For SImode division the ABI provides div-without-mod routines,
1032 which are faster. */
1033 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1034 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1036 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1037 divmod libcalls instead. */
1038 set_optab_libfunc (smod_optab, DImode, NULL);
1039 set_optab_libfunc (umod_optab, DImode, NULL);
1040 set_optab_libfunc (smod_optab, SImode, NULL);
1041 set_optab_libfunc (umod_optab, SImode, NULL);
1043 /* Half-precision float operations. The compiler handles all operations
1044 with NULL libfuncs by converting the SFmode. */
1045 switch (arm_fp16_format)
1047 case ARM_FP16_FORMAT_IEEE:
1048 case ARM_FP16_FORMAT_ALTERNATIVE:
1051 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1052 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1054 : "__gnu_f2h_alternative"));
1055 set_conv_libfunc (sext_optab, SFmode, HFmode,
1056 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1058 : "__gnu_h2f_alternative"));
1061 set_optab_libfunc (add_optab, HFmode, NULL);
1062 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1063 set_optab_libfunc (smul_optab, HFmode, NULL);
1064 set_optab_libfunc (neg_optab, HFmode, NULL);
1065 set_optab_libfunc (sub_optab, HFmode, NULL);
1068 set_optab_libfunc (eq_optab, HFmode, NULL);
1069 set_optab_libfunc (ne_optab, HFmode, NULL);
1070 set_optab_libfunc (lt_optab, HFmode, NULL);
1071 set_optab_libfunc (le_optab, HFmode, NULL);
1072 set_optab_libfunc (ge_optab, HFmode, NULL);
1073 set_optab_libfunc (gt_optab, HFmode, NULL);
1074 set_optab_libfunc (unord_optab, HFmode, NULL);
1081 if (TARGET_AAPCS_BASED)
1082 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1085 /* On AAPCS systems, this is the "struct __va_list". */
1086 static GTY(()) tree va_list_type;
1088 /* Return the type to use as __builtin_va_list. */
1090 arm_build_builtin_va_list (void)
1095 if (!TARGET_AAPCS_BASED)
1096 return std_build_builtin_va_list ();
1098 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1106 The C Library ABI further reinforces this definition in \S
1109 We must follow this definition exactly. The structure tag
1110 name is visible in C++ mangled names, and thus forms a part
1111 of the ABI. The field name may be used by people who
1112 #include <stdarg.h>. */
1113 /* Create the type. */
1114 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1115 /* Give it the required name. */
1116 va_list_name = build_decl (BUILTINS_LOCATION,
1118 get_identifier ("__va_list"),
1120 DECL_ARTIFICIAL (va_list_name) = 1;
1121 TYPE_NAME (va_list_type) = va_list_name;
1122 /* Create the __ap field. */
1123 ap_field = build_decl (BUILTINS_LOCATION,
1125 get_identifier ("__ap"),
1127 DECL_ARTIFICIAL (ap_field) = 1;
1128 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1129 TYPE_FIELDS (va_list_type) = ap_field;
1130 /* Compute its layout. */
1131 layout_type (va_list_type);
1133 return va_list_type;
1136 /* Return an expression of type "void *" pointing to the next
1137 available argument in a variable-argument list. VALIST is the
1138 user-level va_list object, of type __builtin_va_list. */
1140 arm_extract_valist_ptr (tree valist)
1142 if (TREE_TYPE (valist) == error_mark_node)
1143 return error_mark_node;
1145 /* On an AAPCS target, the pointer is stored within "struct
1147 if (TARGET_AAPCS_BASED)
1149 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1150 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1151 valist, ap_field, NULL_TREE);
1157 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1159 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1161 valist = arm_extract_valist_ptr (valist);
1162 std_expand_builtin_va_start (valist, nextarg);
1165 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1167 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1170 valist = arm_extract_valist_ptr (valist);
1171 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1174 /* Implement TARGET_HANDLE_OPTION. */
1177 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1182 arm_select[1].string = arg;
1186 arm_select[0].string = arg;
1189 case OPT_mhard_float:
1190 target_float_abi_name = "hard";
1193 case OPT_msoft_float:
1194 target_float_abi_name = "soft";
1198 arm_select[2].string = arg;
1207 arm_target_help (void)
1210 static int columns = 0;
1213 /* If we have not done so already, obtain the desired maximum width of
1214 the output. Note - this is a duplication of the code at the start of
1215 gcc/opts.c:print_specific_help() - the two copies should probably be
1216 replaced by a single function. */
1221 GET_ENVIRONMENT (p, "COLUMNS");
1224 int value = atoi (p);
1231 /* Use a reasonable default. */
1235 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1237 /* The - 2 is because we know that the last entry in the array is NULL. */
1238 i = ARRAY_SIZE (all_cores) - 2;
1240 printf (" %s", all_cores[i].name);
1241 remaining = columns - (strlen (all_cores[i].name) + 4);
1242 gcc_assert (remaining >= 0);
1246 int len = strlen (all_cores[i].name);
1248 if (remaining > len + 2)
1250 printf (", %s", all_cores[i].name);
1251 remaining -= len + 2;
1257 printf ("\n %s", all_cores[i].name);
1258 remaining = columns - (len + 4);
1262 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1264 i = ARRAY_SIZE (all_architectures) - 2;
1267 printf (" %s", all_architectures[i].name);
1268 remaining = columns - (strlen (all_architectures[i].name) + 4);
1269 gcc_assert (remaining >= 0);
1273 int len = strlen (all_architectures[i].name);
1275 if (remaining > len + 2)
1277 printf (", %s", all_architectures[i].name);
1278 remaining -= len + 2;
1284 printf ("\n %s", all_architectures[i].name);
1285 remaining = columns - (len + 4);
1292 /* Fix up any incompatible options that the user has specified.
1293 This has now turned into a maze. */
1295 arm_override_options (void)
1298 enum processor_type target_arch_cpu = arm_none;
1299 enum processor_type selected_cpu = arm_none;
1301 /* Set up the flags based on the cpu/architecture selected by the user. */
1302 for (i = ARRAY_SIZE (arm_select); i--;)
1304 struct arm_cpu_select * ptr = arm_select + i;
1306 if (ptr->string != NULL && ptr->string[0] != '\0')
1308 const struct processors * sel;
1310 for (sel = ptr->processors; sel->name != NULL; sel++)
1311 if (streq (ptr->string, sel->name))
1313 /* Set the architecture define. */
1314 if (i != ARM_OPT_SET_TUNE)
1315 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1317 /* Determine the processor core for which we should
1318 tune code-generation. */
1319 if (/* -mcpu= is a sensible default. */
1320 i == ARM_OPT_SET_CPU
1321 /* -mtune= overrides -mcpu= and -march=. */
1322 || i == ARM_OPT_SET_TUNE)
1323 arm_tune = (enum processor_type) (sel - ptr->processors);
1325 /* Remember the CPU associated with this architecture.
1326 If no other option is used to set the CPU type,
1327 we'll use this to guess the most suitable tuning
1329 if (i == ARM_OPT_SET_ARCH)
1330 target_arch_cpu = sel->core;
1332 if (i == ARM_OPT_SET_CPU)
1333 selected_cpu = (enum processor_type) (sel - ptr->processors);
1335 if (i != ARM_OPT_SET_TUNE)
1337 /* If we have been given an architecture and a processor
1338 make sure that they are compatible. We only generate
1339 a warning though, and we prefer the CPU over the
1341 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1342 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1345 insn_flags = sel->flags;
1351 if (sel->name == NULL)
1352 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1356 /* Guess the tuning options from the architecture if necessary. */
1357 if (arm_tune == arm_none)
1358 arm_tune = target_arch_cpu;
1360 /* If the user did not specify a processor, choose one for them. */
1361 if (insn_flags == 0)
1363 const struct processors * sel;
1364 unsigned int sought;
1366 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1367 if (selected_cpu == arm_none)
1369 #ifdef SUBTARGET_CPU_DEFAULT
1370 /* Use the subtarget default CPU if none was specified by
1372 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1374 /* Default to ARM6. */
1375 if (selected_cpu == arm_none)
1376 selected_cpu = arm6;
1378 sel = &all_cores[selected_cpu];
1380 insn_flags = sel->flags;
1382 /* Now check to see if the user has specified some command line
1383 switch that require certain abilities from the cpu. */
1386 if (TARGET_INTERWORK || TARGET_THUMB)
1388 sought |= (FL_THUMB | FL_MODE32);
1390 /* There are no ARM processors that support both APCS-26 and
1391 interworking. Therefore we force FL_MODE26 to be removed
1392 from insn_flags here (if it was set), so that the search
1393 below will always be able to find a compatible processor. */
1394 insn_flags &= ~FL_MODE26;
1397 if (sought != 0 && ((sought & insn_flags) != sought))
1399 /* Try to locate a CPU type that supports all of the abilities
1400 of the default CPU, plus the extra abilities requested by
1402 for (sel = all_cores; sel->name != NULL; sel++)
1403 if ((sel->flags & sought) == (sought | insn_flags))
1406 if (sel->name == NULL)
1408 unsigned current_bit_count = 0;
1409 const struct processors * best_fit = NULL;
1411 /* Ideally we would like to issue an error message here
1412 saying that it was not possible to find a CPU compatible
1413 with the default CPU, but which also supports the command
1414 line options specified by the programmer, and so they
1415 ought to use the -mcpu=<name> command line option to
1416 override the default CPU type.
1418 If we cannot find a cpu that has both the
1419 characteristics of the default cpu and the given
1420 command line options we scan the array again looking
1421 for a best match. */
1422 for (sel = all_cores; sel->name != NULL; sel++)
1423 if ((sel->flags & sought) == sought)
1427 count = bit_count (sel->flags & insn_flags);
1429 if (count >= current_bit_count)
1432 current_bit_count = count;
1436 gcc_assert (best_fit);
1440 insn_flags = sel->flags;
1442 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1443 arm_default_cpu = (enum processor_type) (sel - all_cores);
1444 if (arm_tune == arm_none)
1445 arm_tune = arm_default_cpu;
1448 /* The processor for which we should tune should now have been
1450 gcc_assert (arm_tune != arm_none);
1452 tune_flags = all_cores[(int)arm_tune].flags;
1454 if (target_fp16_format_name)
1456 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1458 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1460 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1464 if (i == ARRAY_SIZE (all_fp16_formats))
1465 error ("invalid __fp16 format option: -mfp16-format=%s",
1466 target_fp16_format_name);
1469 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1471 if (target_abi_name)
1473 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1475 if (streq (arm_all_abis[i].name, target_abi_name))
1477 arm_abi = arm_all_abis[i].abi_type;
1481 if (i == ARRAY_SIZE (arm_all_abis))
1482 error ("invalid ABI option: -mabi=%s", target_abi_name);
1485 arm_abi = ARM_DEFAULT_ABI;
1487 /* Make sure that the processor choice does not conflict with any of the
1488 other command line choices. */
1489 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1490 error ("target CPU does not support ARM mode");
1492 /* BPABI targets use linker tricks to allow interworking on cores
1493 without thumb support. */
1494 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1496 warning (0, "target CPU does not support interworking" );
1497 target_flags &= ~MASK_INTERWORK;
1500 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1502 warning (0, "target CPU does not support THUMB instructions");
1503 target_flags &= ~MASK_THUMB;
1506 if (TARGET_APCS_FRAME && TARGET_THUMB)
1508 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1509 target_flags &= ~MASK_APCS_FRAME;
1512 /* Callee super interworking implies thumb interworking. Adding
1513 this to the flags here simplifies the logic elsewhere. */
1514 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1515 target_flags |= MASK_INTERWORK;
1517 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1518 from here where no function is being compiled currently. */
1519 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1520 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1522 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1523 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1525 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1526 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1528 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1530 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1531 target_flags |= MASK_APCS_FRAME;
1534 if (TARGET_POKE_FUNCTION_NAME)
1535 target_flags |= MASK_APCS_FRAME;
1537 if (TARGET_APCS_REENT && flag_pic)
1538 error ("-fpic and -mapcs-reent are incompatible");
1540 if (TARGET_APCS_REENT)
1541 warning (0, "APCS reentrant code not supported. Ignored");
1543 /* If this target is normally configured to use APCS frames, warn if they
1544 are turned off and debugging is turned on. */
1546 && write_symbols != NO_DEBUG
1547 && !TARGET_APCS_FRAME
1548 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1549 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1551 if (TARGET_APCS_FLOAT)
1552 warning (0, "passing floating point arguments in fp regs not yet supported");
1554 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1555 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1556 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1557 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1558 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1559 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1560 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1561 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1562 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1563 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1564 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1565 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1567 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1568 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1569 thumb_code = (TARGET_ARM == 0);
1570 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1571 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1572 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1573 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1574 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1576 /* If we are not using the default (ARM mode) section anchor offset
1577 ranges, then set the correct ranges now. */
1580 /* Thumb-1 LDR instructions cannot have negative offsets.
1581 Permissible positive offset ranges are 5-bit (for byte loads),
1582 6-bit (for halfword loads), or 7-bit (for word loads).
1583 Empirical results suggest a 7-bit anchor range gives the best
1584 overall code size. */
1585 targetm.min_anchor_offset = 0;
1586 targetm.max_anchor_offset = 127;
1588 else if (TARGET_THUMB2)
1590 /* The minimum is set such that the total size of the block
1591 for a particular anchor is 248 + 1 + 4095 bytes, which is
1592 divisible by eight, ensuring natural spacing of anchors. */
1593 targetm.min_anchor_offset = -248;
1594 targetm.max_anchor_offset = 4095;
1597 /* V5 code we generate is completely interworking capable, so we turn off
1598 TARGET_INTERWORK here to avoid many tests later on. */
1600 /* XXX However, we must pass the right pre-processor defines to CPP
1601 or GLD can get confused. This is a hack. */
1602 if (TARGET_INTERWORK)
1603 arm_cpp_interwork = 1;
1606 target_flags &= ~MASK_INTERWORK;
1608 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1609 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1611 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1612 error ("iwmmxt abi requires an iwmmxt capable cpu");
1614 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1615 if (target_fpu_name == NULL && target_fpe_name != NULL)
1617 if (streq (target_fpe_name, "2"))
1618 target_fpu_name = "fpe2";
1619 else if (streq (target_fpe_name, "3"))
1620 target_fpu_name = "fpe3";
1622 error ("invalid floating point emulation option: -mfpe=%s",
1625 if (target_fpu_name != NULL)
1627 /* The user specified a FPU. */
1628 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1630 if (streq (all_fpus[i].name, target_fpu_name))
1632 arm_fpu_arch = all_fpus[i].fpu;
1633 arm_fpu_tune = arm_fpu_arch;
1634 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1638 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1639 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1643 #ifdef FPUTYPE_DEFAULT
1644 /* Use the default if it is specified for this platform. */
1645 arm_fpu_arch = FPUTYPE_DEFAULT;
1646 arm_fpu_tune = FPUTYPE_DEFAULT;
1648 /* Pick one based on CPU type. */
1649 /* ??? Some targets assume FPA is the default.
1650 if ((insn_flags & FL_VFP) != 0)
1651 arm_fpu_arch = FPUTYPE_VFP;
1654 if (arm_arch_cirrus)
1655 arm_fpu_arch = FPUTYPE_MAVERICK;
1657 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1659 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1660 arm_fpu_tune = FPUTYPE_FPA;
1662 arm_fpu_tune = arm_fpu_arch;
1663 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1664 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1667 if (target_float_abi_name != NULL)
1669 /* The user specified a FP ABI. */
1670 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1672 if (streq (all_float_abis[i].name, target_float_abi_name))
1674 arm_float_abi = all_float_abis[i].abi_type;
1678 if (i == ARRAY_SIZE (all_float_abis))
1679 error ("invalid floating point abi: -mfloat-abi=%s",
1680 target_float_abi_name);
1683 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1685 if (TARGET_AAPCS_BASED
1686 && (arm_fp_model == ARM_FP_MODEL_FPA))
1687 error ("FPA is unsupported in the AAPCS");
1689 if (TARGET_AAPCS_BASED)
1691 if (TARGET_CALLER_INTERWORKING)
1692 error ("AAPCS does not support -mcaller-super-interworking");
1694 if (TARGET_CALLEE_INTERWORKING)
1695 error ("AAPCS does not support -mcallee-super-interworking");
1698 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1699 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1700 will ever exist. GCC makes no attempt to support this combination. */
1701 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1702 sorry ("iWMMXt and hardware floating point");
1704 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1705 if (TARGET_THUMB2 && TARGET_IWMMXT)
1706 sorry ("Thumb-2 iWMMXt");
1708 /* __fp16 support currently assumes the core has ldrh. */
1709 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1710 sorry ("__fp16 and no ldrh");
1712 /* If soft-float is specified then don't use FPU. */
1713 if (TARGET_SOFT_FLOAT)
1714 arm_fpu_arch = FPUTYPE_NONE;
1716 if (TARGET_AAPCS_BASED)
1718 if (arm_abi == ARM_ABI_IWMMXT)
1719 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1720 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1721 && TARGET_HARD_FLOAT
1723 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1725 arm_pcs_default = ARM_PCS_AAPCS;
1729 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1730 sorry ("-mfloat-abi=hard and VFP");
1732 if (arm_abi == ARM_ABI_APCS)
1733 arm_pcs_default = ARM_PCS_APCS;
1735 arm_pcs_default = ARM_PCS_ATPCS;
1738 /* For arm2/3 there is no need to do any scheduling if there is only
1739 a floating point emulator, or we are doing software floating-point. */
1740 if ((TARGET_SOFT_FLOAT
1741 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1742 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1743 && (tune_flags & FL_MODE32) == 0)
1744 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1746 if (target_thread_switch)
1748 if (strcmp (target_thread_switch, "soft") == 0)
1749 target_thread_pointer = TP_SOFT;
1750 else if (strcmp (target_thread_switch, "auto") == 0)
1751 target_thread_pointer = TP_AUTO;
1752 else if (strcmp (target_thread_switch, "cp15") == 0)
1753 target_thread_pointer = TP_CP15;
1755 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1758 /* Use the cp15 method if it is available. */
1759 if (target_thread_pointer == TP_AUTO)
1761 if (arm_arch6k && !TARGET_THUMB)
1762 target_thread_pointer = TP_CP15;
1764 target_thread_pointer = TP_SOFT;
1767 if (TARGET_HARD_TP && TARGET_THUMB1)
1768 error ("can not use -mtp=cp15 with 16-bit Thumb");
1770 /* Override the default structure alignment for AAPCS ABI. */
1771 if (TARGET_AAPCS_BASED)
1772 arm_structure_size_boundary = 8;
1774 if (structure_size_string != NULL)
1776 int size = strtol (structure_size_string, NULL, 0);
1778 if (size == 8 || size == 32
1779 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1780 arm_structure_size_boundary = size;
1782 warning (0, "structure size boundary can only be set to %s",
1783 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1786 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1788 error ("RTP PIC is incompatible with Thumb");
1792 /* If stack checking is disabled, we can use r10 as the PIC register,
1793 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1794 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1796 if (TARGET_VXWORKS_RTP)
1797 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1798 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1801 if (flag_pic && TARGET_VXWORKS_RTP)
1802 arm_pic_register = 9;
1804 if (arm_pic_register_string != NULL)
1806 int pic_register = decode_reg_name (arm_pic_register_string);
1809 warning (0, "-mpic-register= is useless without -fpic");
1811 /* Prevent the user from choosing an obviously stupid PIC register. */
1812 else if (pic_register < 0 || call_used_regs[pic_register]
1813 || pic_register == HARD_FRAME_POINTER_REGNUM
1814 || pic_register == STACK_POINTER_REGNUM
1815 || pic_register >= PC_REGNUM
1816 || (TARGET_VXWORKS_RTP
1817 && (unsigned int) pic_register != arm_pic_register))
1818 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1820 arm_pic_register = pic_register;
1823 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1824 if (fix_cm3_ldrd == 2)
1826 if (selected_cpu == cortexm3)
1832 /* ??? We might want scheduling for thumb2. */
1833 if (TARGET_THUMB && flag_schedule_insns)
1835 /* Don't warn since it's on by default in -O2. */
1836 flag_schedule_insns = 0;
1841 arm_constant_limit = 1;
1843 /* If optimizing for size, bump the number of instructions that we
1844 are prepared to conditionally execute (even on a StrongARM). */
1845 max_insns_skipped = 6;
1849 /* For processors with load scheduling, it never costs more than
1850 2 cycles to load a constant, and the load scheduler may well
1851 reduce that to 1. */
1853 arm_constant_limit = 1;
1855 /* On XScale the longer latency of a load makes it more difficult
1856 to achieve a good schedule, so it's faster to synthesize
1857 constants that can be done in two insns. */
1858 if (arm_tune_xscale)
1859 arm_constant_limit = 2;
1861 /* StrongARM has early execution of branches, so a sequence
1862 that is worth skipping is shorter. */
1863 if (arm_tune_strongarm)
1864 max_insns_skipped = 3;
1867 /* Ideally we would want to use CFI directives to generate
1868 debug info. However this also creates the .eh_frame
1869 section, so disable them until GAS can handle
1870 this properly. See PR40521. */
1871 if (TARGET_AAPCS_BASED)
1872 flag_dwarf2_cfi_asm = 0;
1874 /* Register global variables with the garbage collector. */
1875 arm_add_gc_roots ();
1879 arm_add_gc_roots (void)
1881 gcc_obstack_init(&minipool_obstack);
1882 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1885 /* A table of known ARM exception types.
1886 For use with the interrupt function attribute. */
1890 const char *const arg;
1891 const unsigned long return_value;
1895 static const isr_attribute_arg isr_attribute_args [] =
1897 { "IRQ", ARM_FT_ISR },
1898 { "irq", ARM_FT_ISR },
1899 { "FIQ", ARM_FT_FIQ },
1900 { "fiq", ARM_FT_FIQ },
1901 { "ABORT", ARM_FT_ISR },
1902 { "abort", ARM_FT_ISR },
1903 { "ABORT", ARM_FT_ISR },
1904 { "abort", ARM_FT_ISR },
1905 { "UNDEF", ARM_FT_EXCEPTION },
1906 { "undef", ARM_FT_EXCEPTION },
1907 { "SWI", ARM_FT_EXCEPTION },
1908 { "swi", ARM_FT_EXCEPTION },
1909 { NULL, ARM_FT_NORMAL }
1912 /* Returns the (interrupt) function type of the current
1913 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1915 static unsigned long
1916 arm_isr_value (tree argument)
1918 const isr_attribute_arg * ptr;
1922 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1924 /* No argument - default to IRQ. */
1925 if (argument == NULL_TREE)
1928 /* Get the value of the argument. */
1929 if (TREE_VALUE (argument) == NULL_TREE
1930 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1931 return ARM_FT_UNKNOWN;
1933 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1935 /* Check it against the list of known arguments. */
1936 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1937 if (streq (arg, ptr->arg))
1938 return ptr->return_value;
1940 /* An unrecognized interrupt type. */
1941 return ARM_FT_UNKNOWN;
1944 /* Computes the type of the current function. */
1946 static unsigned long
1947 arm_compute_func_type (void)
1949 unsigned long type = ARM_FT_UNKNOWN;
1953 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1955 /* Decide if the current function is volatile. Such functions
1956 never return, and many memory cycles can be saved by not storing
1957 register values that will never be needed again. This optimization
1958 was added to speed up context switching in a kernel application. */
1960 && (TREE_NOTHROW (current_function_decl)
1961 || !(flag_unwind_tables
1962 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1963 && TREE_THIS_VOLATILE (current_function_decl))
1964 type |= ARM_FT_VOLATILE;
1966 if (cfun->static_chain_decl != NULL)
1967 type |= ARM_FT_NESTED;
1969 attr = DECL_ATTRIBUTES (current_function_decl);
1971 a = lookup_attribute ("naked", attr);
1973 type |= ARM_FT_NAKED;
1975 a = lookup_attribute ("isr", attr);
1977 a = lookup_attribute ("interrupt", attr);
1980 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1982 type |= arm_isr_value (TREE_VALUE (a));
1987 /* Returns the type of the current function. */
1990 arm_current_func_type (void)
1992 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1993 cfun->machine->func_type = arm_compute_func_type ();
1995 return cfun->machine->func_type;
1999 arm_allocate_stack_slots_for_args (void)
2001 /* Naked functions should not allocate stack slots for arguments. */
2002 return !IS_NAKED (arm_current_func_type ());
2006 /* Output assembler code for a block containing the constant parts
2007 of a trampoline, leaving space for the variable parts.
2009 On the ARM, (if r8 is the static chain regnum, and remembering that
2010 referencing pc adds an offset of 8) the trampoline looks like:
2013 .word static chain value
2014 .word function's address
2015 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2018 arm_asm_trampoline_template (FILE *f)
2022 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2023 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2025 else if (TARGET_THUMB2)
2027 /* The Thumb-2 trampoline is similar to the arm implementation.
2028 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2029 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2030 STATIC_CHAIN_REGNUM, PC_REGNUM);
2031 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2035 ASM_OUTPUT_ALIGN (f, 2);
2036 fprintf (f, "\t.code\t16\n");
2037 fprintf (f, ".Ltrampoline_start:\n");
2038 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2039 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2040 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2041 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2042 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2043 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2045 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2046 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2049 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2052 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2054 rtx fnaddr, mem, a_tramp;
2056 emit_block_move (m_tramp, assemble_trampoline_template (),
2057 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2059 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2060 emit_move_insn (mem, chain_value);
2062 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2063 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2064 emit_move_insn (mem, fnaddr);
2066 a_tramp = XEXP (m_tramp, 0);
2067 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2068 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2069 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2072 /* Thumb trampolines should be entered in thumb mode, so set
2073 the bottom bit of the address. */
2076 arm_trampoline_adjust_address (rtx addr)
2079 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2080 NULL, 0, OPTAB_LIB_WIDEN);
2084 /* Return 1 if it is possible to return using a single instruction.
2085 If SIBLING is non-null, this is a test for a return before a sibling
2086 call. SIBLING is the call insn, so we can examine its register usage. */
2089 use_return_insn (int iscond, rtx sibling)
2092 unsigned int func_type;
2093 unsigned long saved_int_regs;
2094 unsigned HOST_WIDE_INT stack_adjust;
2095 arm_stack_offsets *offsets;
2097 /* Never use a return instruction before reload has run. */
2098 if (!reload_completed)
2101 func_type = arm_current_func_type ();
2103 /* Naked, volatile and stack alignment functions need special
2105 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2108 /* So do interrupt functions that use the frame pointer and Thumb
2109 interrupt functions. */
2110 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2113 offsets = arm_get_frame_offsets ();
2114 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2116 /* As do variadic functions. */
2117 if (crtl->args.pretend_args_size
2118 || cfun->machine->uses_anonymous_args
2119 /* Or if the function calls __builtin_eh_return () */
2120 || crtl->calls_eh_return
2121 /* Or if the function calls alloca */
2122 || cfun->calls_alloca
2123 /* Or if there is a stack adjustment. However, if the stack pointer
2124 is saved on the stack, we can use a pre-incrementing stack load. */
2125 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2126 && stack_adjust == 4)))
2129 saved_int_regs = offsets->saved_regs_mask;
2131 /* Unfortunately, the insn
2133 ldmib sp, {..., sp, ...}
2135 triggers a bug on most SA-110 based devices, such that the stack
2136 pointer won't be correctly restored if the instruction takes a
2137 page fault. We work around this problem by popping r3 along with
2138 the other registers, since that is never slower than executing
2139 another instruction.
2141 We test for !arm_arch5 here, because code for any architecture
2142 less than this could potentially be run on one of the buggy
2144 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2146 /* Validate that r3 is a call-clobbered register (always true in
2147 the default abi) ... */
2148 if (!call_used_regs[3])
2151 /* ... that it isn't being used for a return value ... */
2152 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2155 /* ... or for a tail-call argument ... */
2158 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2160 if (find_regno_fusage (sibling, USE, 3))
2164 /* ... and that there are no call-saved registers in r0-r2
2165 (always true in the default ABI). */
2166 if (saved_int_regs & 0x7)
2170 /* Can't be done if interworking with Thumb, and any registers have been
2172 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2175 /* On StrongARM, conditional returns are expensive if they aren't
2176 taken and multiple registers have been stacked. */
2177 if (iscond && arm_tune_strongarm)
2179 /* Conditional return when just the LR is stored is a simple
2180 conditional-load instruction, that's not expensive. */
2181 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2185 && arm_pic_register != INVALID_REGNUM
2186 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2190 /* If there are saved registers but the LR isn't saved, then we need
2191 two instructions for the return. */
2192 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2195 /* Can't be done if any of the FPA regs are pushed,
2196 since this also requires an insn. */
2197 if (TARGET_HARD_FLOAT && TARGET_FPA)
2198 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2199 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2202 /* Likewise VFP regs. */
2203 if (TARGET_HARD_FLOAT && TARGET_VFP)
2204 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2205 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2208 if (TARGET_REALLY_IWMMXT)
2209 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2210 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2216 /* Return TRUE if int I is a valid immediate ARM constant. */
2219 const_ok_for_arm (HOST_WIDE_INT i)
2223 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2224 be all zero, or all one. */
2225 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2226 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2227 != ((~(unsigned HOST_WIDE_INT) 0)
2228 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2231 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2233 /* Fast return for 0 and small values. We must do this for zero, since
2234 the code below can't handle that one case. */
2235 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2238 /* Get the number of trailing zeros. */
2239 lowbit = ffs((int) i) - 1;
2241 /* Only even shifts are allowed in ARM mode so round down to the
2242 nearest even number. */
2246 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2251 /* Allow rotated constants in ARM mode. */
2253 && ((i & ~0xc000003f) == 0
2254 || (i & ~0xf000000f) == 0
2255 || (i & ~0xfc000003) == 0))
2262 /* Allow repeated pattern. */
2265 if (i == v || i == (v | (v << 8)))
2272 /* Return true if I is a valid constant for the operation CODE. */
2274 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2276 if (const_ok_for_arm (i))
2300 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2302 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2308 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2312 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2319 /* Emit a sequence of insns to handle a large constant.
2320 CODE is the code of the operation required, it can be any of SET, PLUS,
2321 IOR, AND, XOR, MINUS;
2322 MODE is the mode in which the operation is being performed;
2323 VAL is the integer to operate on;
2324 SOURCE is the other operand (a register, or a null-pointer for SET);
2325 SUBTARGETS means it is safe to create scratch registers if that will
2326 either produce a simpler sequence, or we will want to cse the values.
2327 Return value is the number of insns emitted. */
2329 /* ??? Tweak this for thumb2. */
2331 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2332 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2336 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2337 cond = COND_EXEC_TEST (PATTERN (insn));
2341 if (subtargets || code == SET
2342 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2343 && REGNO (target) != REGNO (source)))
2345 /* After arm_reorg has been called, we can't fix up expensive
2346 constants by pushing them into memory so we must synthesize
2347 them in-line, regardless of the cost. This is only likely to
2348 be more costly on chips that have load delay slots and we are
2349 compiling without running the scheduler (so no splitting
2350 occurred before the final instruction emission).
2352 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2354 if (!after_arm_reorg
2356 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2358 > arm_constant_limit + (code != SET)))
2362 /* Currently SET is the only monadic value for CODE, all
2363 the rest are diadic. */
2364 if (TARGET_USE_MOVT)
2365 arm_emit_movpair (target, GEN_INT (val));
2367 emit_set_insn (target, GEN_INT (val));
2373 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2375 if (TARGET_USE_MOVT)
2376 arm_emit_movpair (temp, GEN_INT (val));
2378 emit_set_insn (temp, GEN_INT (val));
2380 /* For MINUS, the value is subtracted from, since we never
2381 have subtraction of a constant. */
2383 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2385 emit_set_insn (target,
2386 gen_rtx_fmt_ee (code, mode, source, temp));
2392 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2396 /* Return the number of ARM instructions required to synthesize the given
2399 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2401 HOST_WIDE_INT temp1;
2409 if (remainder & (3 << (i - 2)))
2414 temp1 = remainder & ((0x0ff << end)
2415 | ((i < end) ? (0xff >> (32 - end)) : 0));
2416 remainder &= ~temp1;
2421 } while (remainder);
2425 /* Emit an instruction with the indicated PATTERN. If COND is
2426 non-NULL, conditionalize the execution of the instruction on COND
2430 emit_constant_insn (rtx cond, rtx pattern)
2433 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2434 emit_insn (pattern);
2437 /* As above, but extra parameter GENERATE which, if clear, suppresses
2439 /* ??? This needs more work for thumb2. */
2442 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2443 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2448 int can_negate_initial = 0;
2451 int num_bits_set = 0;
2452 int set_sign_bit_copies = 0;
2453 int clear_sign_bit_copies = 0;
2454 int clear_zero_bit_copies = 0;
2455 int set_zero_bit_copies = 0;
2457 unsigned HOST_WIDE_INT temp1, temp2;
2458 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2460 /* Find out which operations are safe for a given CODE. Also do a quick
2461 check for degenerate cases; these can occur when DImode operations
2473 can_negate_initial = 1;
2477 if (remainder == 0xffffffff)
2480 emit_constant_insn (cond,
2481 gen_rtx_SET (VOIDmode, target,
2482 GEN_INT (ARM_SIGN_EXTEND (val))));
2488 if (reload_completed && rtx_equal_p (target, source))
2492 emit_constant_insn (cond,
2493 gen_rtx_SET (VOIDmode, target, source));
2505 emit_constant_insn (cond,
2506 gen_rtx_SET (VOIDmode, target, const0_rtx));
2509 if (remainder == 0xffffffff)
2511 if (reload_completed && rtx_equal_p (target, source))
2514 emit_constant_insn (cond,
2515 gen_rtx_SET (VOIDmode, target, source));
2524 if (reload_completed && rtx_equal_p (target, source))
2527 emit_constant_insn (cond,
2528 gen_rtx_SET (VOIDmode, target, source));
2532 /* We don't know how to handle other cases yet. */
2533 gcc_assert (remainder == 0xffffffff);
2536 emit_constant_insn (cond,
2537 gen_rtx_SET (VOIDmode, target,
2538 gen_rtx_NOT (mode, source)));
2542 /* We treat MINUS as (val - source), since (source - val) is always
2543 passed as (source + (-val)). */
2547 emit_constant_insn (cond,
2548 gen_rtx_SET (VOIDmode, target,
2549 gen_rtx_NEG (mode, source)));
2552 if (const_ok_for_arm (val))
2555 emit_constant_insn (cond,
2556 gen_rtx_SET (VOIDmode, target,
2557 gen_rtx_MINUS (mode, GEN_INT (val),
2569 /* If we can do it in one insn get out quickly. */
2570 if (const_ok_for_arm (val)
2571 || (can_negate_initial && const_ok_for_arm (-val))
2572 || (can_invert && const_ok_for_arm (~val)))
2575 emit_constant_insn (cond,
2576 gen_rtx_SET (VOIDmode, target,
2578 ? gen_rtx_fmt_ee (code, mode, source,
2584 /* Calculate a few attributes that may be useful for specific
2586 /* Count number of leading zeros. */
2587 for (i = 31; i >= 0; i--)
2589 if ((remainder & (1 << i)) == 0)
2590 clear_sign_bit_copies++;
2595 /* Count number of leading 1's. */
2596 for (i = 31; i >= 0; i--)
2598 if ((remainder & (1 << i)) != 0)
2599 set_sign_bit_copies++;
2604 /* Count number of trailing zero's. */
2605 for (i = 0; i <= 31; i++)
2607 if ((remainder & (1 << i)) == 0)
2608 clear_zero_bit_copies++;
2613 /* Count number of trailing 1's. */
2614 for (i = 0; i <= 31; i++)
2616 if ((remainder & (1 << i)) != 0)
2617 set_zero_bit_copies++;
2625 /* See if we can use movw. */
2626 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2629 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2634 /* See if we can do this by sign_extending a constant that is known
2635 to be negative. This is a good, way of doing it, since the shift
2636 may well merge into a subsequent insn. */
2637 if (set_sign_bit_copies > 1)
2639 if (const_ok_for_arm
2640 (temp1 = ARM_SIGN_EXTEND (remainder
2641 << (set_sign_bit_copies - 1))))
2645 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2646 emit_constant_insn (cond,
2647 gen_rtx_SET (VOIDmode, new_src,
2649 emit_constant_insn (cond,
2650 gen_ashrsi3 (target, new_src,
2651 GEN_INT (set_sign_bit_copies - 1)));
2655 /* For an inverted constant, we will need to set the low bits,
2656 these will be shifted out of harm's way. */
2657 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2658 if (const_ok_for_arm (~temp1))
2662 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2663 emit_constant_insn (cond,
2664 gen_rtx_SET (VOIDmode, new_src,
2666 emit_constant_insn (cond,
2667 gen_ashrsi3 (target, new_src,
2668 GEN_INT (set_sign_bit_copies - 1)));
2674 /* See if we can calculate the value as the difference between two
2675 valid immediates. */
2676 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2678 int topshift = clear_sign_bit_copies & ~1;
2680 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2681 & (0xff000000 >> topshift));
2683 /* If temp1 is zero, then that means the 9 most significant
2684 bits of remainder were 1 and we've caused it to overflow.
2685 When topshift is 0 we don't need to do anything since we
2686 can borrow from 'bit 32'. */
2687 if (temp1 == 0 && topshift != 0)
2688 temp1 = 0x80000000 >> (topshift - 1);
2690 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2692 if (const_ok_for_arm (temp2))
2696 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2697 emit_constant_insn (cond,
2698 gen_rtx_SET (VOIDmode, new_src,
2700 emit_constant_insn (cond,
2701 gen_addsi3 (target, new_src,
2709 /* See if we can generate this by setting the bottom (or the top)
2710 16 bits, and then shifting these into the other half of the
2711 word. We only look for the simplest cases, to do more would cost
2712 too much. Be careful, however, not to generate this when the
2713 alternative would take fewer insns. */
2714 if (val & 0xffff0000)
2716 temp1 = remainder & 0xffff0000;
2717 temp2 = remainder & 0x0000ffff;
2719 /* Overlaps outside this range are best done using other methods. */
2720 for (i = 9; i < 24; i++)
2722 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2723 && !const_ok_for_arm (temp2))
2725 rtx new_src = (subtargets
2726 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2728 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2729 source, subtargets, generate);
2737 gen_rtx_ASHIFT (mode, source,
2744 /* Don't duplicate cases already considered. */
2745 for (i = 17; i < 24; i++)
2747 if (((temp1 | (temp1 >> i)) == remainder)
2748 && !const_ok_for_arm (temp1))
2750 rtx new_src = (subtargets
2751 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2753 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2754 source, subtargets, generate);
2759 gen_rtx_SET (VOIDmode, target,
2762 gen_rtx_LSHIFTRT (mode, source,
2773 /* If we have IOR or XOR, and the constant can be loaded in a
2774 single instruction, and we can find a temporary to put it in,
2775 then this can be done in two instructions instead of 3-4. */
2777 /* TARGET can't be NULL if SUBTARGETS is 0 */
2778 || (reload_completed && !reg_mentioned_p (target, source)))
2780 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2784 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2786 emit_constant_insn (cond,
2787 gen_rtx_SET (VOIDmode, sub,
2789 emit_constant_insn (cond,
2790 gen_rtx_SET (VOIDmode, target,
2791 gen_rtx_fmt_ee (code, mode,
2802 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2803 and the remainder 0s for e.g. 0xfff00000)
2804 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2806 This can be done in 2 instructions by using shifts with mov or mvn.
2811 mvn r0, r0, lsr #12 */
2812 if (set_sign_bit_copies > 8
2813 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2817 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2818 rtx shift = GEN_INT (set_sign_bit_copies);
2822 gen_rtx_SET (VOIDmode, sub,
2824 gen_rtx_ASHIFT (mode,
2829 gen_rtx_SET (VOIDmode, target,
2831 gen_rtx_LSHIFTRT (mode, sub,
2838 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2840 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2842 For eg. r0 = r0 | 0xfff
2847 if (set_zero_bit_copies > 8
2848 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2852 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2853 rtx shift = GEN_INT (set_zero_bit_copies);
2857 gen_rtx_SET (VOIDmode, sub,
2859 gen_rtx_LSHIFTRT (mode,
2864 gen_rtx_SET (VOIDmode, target,
2866 gen_rtx_ASHIFT (mode, sub,
2872 /* This will never be reached for Thumb2 because orn is a valid
2873 instruction. This is for Thumb1 and the ARM 32 bit cases.
2875 x = y | constant (such that ~constant is a valid constant)
2877 x = ~(~y & ~constant).
2879 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2883 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2884 emit_constant_insn (cond,
2885 gen_rtx_SET (VOIDmode, sub,
2886 gen_rtx_NOT (mode, source)));
2889 sub = gen_reg_rtx (mode);
2890 emit_constant_insn (cond,
2891 gen_rtx_SET (VOIDmode, sub,
2892 gen_rtx_AND (mode, source,
2894 emit_constant_insn (cond,
2895 gen_rtx_SET (VOIDmode, target,
2896 gen_rtx_NOT (mode, sub)));
2903 /* See if two shifts will do 2 or more insn's worth of work. */
2904 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2906 HOST_WIDE_INT shift_mask = ((0xffffffff
2907 << (32 - clear_sign_bit_copies))
2910 if ((remainder | shift_mask) != 0xffffffff)
2914 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2915 insns = arm_gen_constant (AND, mode, cond,
2916 remainder | shift_mask,
2917 new_src, source, subtargets, 1);
2922 rtx targ = subtargets ? NULL_RTX : target;
2923 insns = arm_gen_constant (AND, mode, cond,
2924 remainder | shift_mask,
2925 targ, source, subtargets, 0);
2931 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2932 rtx shift = GEN_INT (clear_sign_bit_copies);
2934 emit_insn (gen_ashlsi3 (new_src, source, shift));
2935 emit_insn (gen_lshrsi3 (target, new_src, shift));
2941 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2943 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2945 if ((remainder | shift_mask) != 0xffffffff)
2949 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2951 insns = arm_gen_constant (AND, mode, cond,
2952 remainder | shift_mask,
2953 new_src, source, subtargets, 1);
2958 rtx targ = subtargets ? NULL_RTX : target;
2960 insns = arm_gen_constant (AND, mode, cond,
2961 remainder | shift_mask,
2962 targ, source, subtargets, 0);
2968 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2969 rtx shift = GEN_INT (clear_zero_bit_copies);
2971 emit_insn (gen_lshrsi3 (new_src, source, shift));
2972 emit_insn (gen_ashlsi3 (target, new_src, shift));
2984 for (i = 0; i < 32; i++)
2985 if (remainder & (1 << i))
2989 || (code != IOR && can_invert && num_bits_set > 16))
2990 remainder = (~remainder) & 0xffffffff;
2991 else if (code == PLUS && num_bits_set > 16)
2992 remainder = (-remainder) & 0xffffffff;
2999 /* Now try and find a way of doing the job in either two or three
3001 We start by looking for the largest block of zeros that are aligned on
3002 a 2-bit boundary, we then fill up the temps, wrapping around to the
3003 top of the word when we drop off the bottom.
3004 In the worst case this code should produce no more than four insns.
3005 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3006 best place to start. */
3008 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3014 int best_consecutive_zeros = 0;
3016 for (i = 0; i < 32; i += 2)
3018 int consecutive_zeros = 0;
3020 if (!(remainder & (3 << i)))
3022 while ((i < 32) && !(remainder & (3 << i)))
3024 consecutive_zeros += 2;
3027 if (consecutive_zeros > best_consecutive_zeros)
3029 best_consecutive_zeros = consecutive_zeros;
3030 best_start = i - consecutive_zeros;
3036 /* So long as it won't require any more insns to do so, it's
3037 desirable to emit a small constant (in bits 0...9) in the last
3038 insn. This way there is more chance that it can be combined with
3039 a later addressing insn to form a pre-indexed load or store
3040 operation. Consider:
3042 *((volatile int *)0xe0000100) = 1;
3043 *((volatile int *)0xe0000110) = 2;
3045 We want this to wind up as:
3049 str rB, [rA, #0x100]
3051 str rB, [rA, #0x110]
3053 rather than having to synthesize both large constants from scratch.
3055 Therefore, we calculate how many insns would be required to emit
3056 the constant starting from `best_start', and also starting from
3057 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3058 yield a shorter sequence, we may as well use zero. */
3060 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
3061 && (count_insns_for_constant (remainder, 0) <=
3062 count_insns_for_constant (remainder, best_start)))
3066 /* Now start emitting the insns. */
3074 if (remainder & (3 << (i - 2)))
3079 temp1 = remainder & ((0x0ff << end)
3080 | ((i < end) ? (0xff >> (32 - end)) : 0));
3081 remainder &= ~temp1;
3085 rtx new_src, temp1_rtx;
3087 if (code == SET || code == MINUS)
3089 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3090 if (can_invert && code != MINUS)
3095 if (remainder && subtargets)
3096 new_src = gen_reg_rtx (mode);
3101 else if (can_negate)
3105 temp1 = trunc_int_for_mode (temp1, mode);
3106 temp1_rtx = GEN_INT (temp1);
3110 else if (code == MINUS)
3111 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3113 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3115 emit_constant_insn (cond,
3116 gen_rtx_SET (VOIDmode, new_src,
3126 else if (code == MINUS)
3135 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3148 /* Canonicalize a comparison so that we are more likely to recognize it.
3149 This can be done for a few constant compares, where we can make the
3150 immediate value easier to load. */
3153 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3156 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3157 unsigned HOST_WIDE_INT maxval;
3158 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3169 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3171 *op1 = GEN_INT (i + 1);
3172 return code == GT ? GE : LT;
3179 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3181 *op1 = GEN_INT (i - 1);
3182 return code == GE ? GT : LE;
3188 if (i != ~((unsigned HOST_WIDE_INT) 0)
3189 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3191 *op1 = GEN_INT (i + 1);
3192 return code == GTU ? GEU : LTU;
3199 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3201 *op1 = GEN_INT (i - 1);
3202 return code == GEU ? GTU : LEU;
3214 /* Define how to find the value returned by a function. */
3217 arm_function_value(const_tree type, const_tree func,
3218 bool outgoing ATTRIBUTE_UNUSED)
3220 enum machine_mode mode;
3221 int unsignedp ATTRIBUTE_UNUSED;
3222 rtx r ATTRIBUTE_UNUSED;
3224 mode = TYPE_MODE (type);
3226 if (TARGET_AAPCS_BASED)
3227 return aapcs_allocate_return_reg (mode, type, func);
3229 /* Promote integer types. */
3230 if (INTEGRAL_TYPE_P (type))
3231 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3233 /* Promotes small structs returned in a register to full-word size
3234 for big-endian AAPCS. */
3235 if (arm_return_in_msb (type))
3237 HOST_WIDE_INT size = int_size_in_bytes (type);
3238 if (size % UNITS_PER_WORD != 0)
3240 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3241 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3245 return LIBCALL_VALUE (mode);
3249 libcall_eq (const void *p1, const void *p2)
3251 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3255 libcall_hash (const void *p1)
3257 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3261 add_libcall (htab_t htab, rtx libcall)
3263 *htab_find_slot (htab, libcall, INSERT) = libcall;
3267 arm_libcall_uses_aapcs_base (const_rtx libcall)
3269 static bool init_done = false;
3270 static htab_t libcall_htab;
3276 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3278 add_libcall (libcall_htab,
3279 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3280 add_libcall (libcall_htab,
3281 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3282 add_libcall (libcall_htab,
3283 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3284 add_libcall (libcall_htab,
3285 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3287 add_libcall (libcall_htab,
3288 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3289 add_libcall (libcall_htab,
3290 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3291 add_libcall (libcall_htab,
3292 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3293 add_libcall (libcall_htab,
3294 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3296 add_libcall (libcall_htab,
3297 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3298 add_libcall (libcall_htab,
3299 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3300 add_libcall (libcall_htab,
3301 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3302 add_libcall (libcall_htab,
3303 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3304 add_libcall (libcall_htab,
3305 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3306 add_libcall (libcall_htab,
3307 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3310 return libcall && htab_find (libcall_htab, libcall) != NULL;
3314 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3316 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3317 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3319 /* The following libcalls return their result in integer registers,
3320 even though they return a floating point value. */
3321 if (arm_libcall_uses_aapcs_base (libcall))
3322 return gen_rtx_REG (mode, ARG_REGISTER(1));
3326 return LIBCALL_VALUE (mode);
3329 /* Determine the amount of memory needed to store the possible return
3330 registers of an untyped call. */
3332 arm_apply_result_size (void)
3338 if (TARGET_HARD_FLOAT_ABI)
3344 if (TARGET_MAVERICK)
3347 if (TARGET_IWMMXT_ABI)
3354 /* Decide whether TYPE should be returned in memory (true)
3355 or in a register (false). FNTYPE is the type of the function making
3358 arm_return_in_memory (const_tree type, const_tree fntype)
3362 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3364 if (TARGET_AAPCS_BASED)
3366 /* Simple, non-aggregate types (ie not including vectors and
3367 complex) are always returned in a register (or registers).
3368 We don't care about which register here, so we can short-cut
3369 some of the detail. */
3370 if (!AGGREGATE_TYPE_P (type)
3371 && TREE_CODE (type) != VECTOR_TYPE
3372 && TREE_CODE (type) != COMPLEX_TYPE)
3375 /* Any return value that is no larger than one word can be
3377 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3380 /* Check any available co-processors to see if they accept the
3381 type as a register candidate (VFP, for example, can return
3382 some aggregates in consecutive registers). These aren't
3383 available if the call is variadic. */
3384 if (aapcs_select_return_coproc (type, fntype) >= 0)
3387 /* Vector values should be returned using ARM registers, not
3388 memory (unless they're over 16 bytes, which will break since
3389 we only have four call-clobbered registers to play with). */
3390 if (TREE_CODE (type) == VECTOR_TYPE)
3391 return (size < 0 || size > (4 * UNITS_PER_WORD));
3393 /* The rest go in memory. */
3397 if (TREE_CODE (type) == VECTOR_TYPE)
3398 return (size < 0 || size > (4 * UNITS_PER_WORD));
3400 if (!AGGREGATE_TYPE_P (type) &&
3401 (TREE_CODE (type) != VECTOR_TYPE))
3402 /* All simple types are returned in registers. */
3405 if (arm_abi != ARM_ABI_APCS)
3407 /* ATPCS and later return aggregate types in memory only if they are
3408 larger than a word (or are variable size). */
3409 return (size < 0 || size > UNITS_PER_WORD);
3412 /* For the arm-wince targets we choose to be compatible with Microsoft's
3413 ARM and Thumb compilers, which always return aggregates in memory. */
3415 /* All structures/unions bigger than one word are returned in memory.
3416 Also catch the case where int_size_in_bytes returns -1. In this case
3417 the aggregate is either huge or of variable size, and in either case
3418 we will want to return it via memory and not in a register. */
3419 if (size < 0 || size > UNITS_PER_WORD)
3422 if (TREE_CODE (type) == RECORD_TYPE)
3426 /* For a struct the APCS says that we only return in a register
3427 if the type is 'integer like' and every addressable element
3428 has an offset of zero. For practical purposes this means
3429 that the structure can have at most one non bit-field element
3430 and that this element must be the first one in the structure. */
3432 /* Find the first field, ignoring non FIELD_DECL things which will
3433 have been created by C++. */
3434 for (field = TYPE_FIELDS (type);
3435 field && TREE_CODE (field) != FIELD_DECL;
3436 field = TREE_CHAIN (field))
3440 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3442 /* Check that the first field is valid for returning in a register. */
3444 /* ... Floats are not allowed */
3445 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3448 /* ... Aggregates that are not themselves valid for returning in
3449 a register are not allowed. */
3450 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3453 /* Now check the remaining fields, if any. Only bitfields are allowed,
3454 since they are not addressable. */
3455 for (field = TREE_CHAIN (field);
3457 field = TREE_CHAIN (field))
3459 if (TREE_CODE (field) != FIELD_DECL)
3462 if (!DECL_BIT_FIELD_TYPE (field))
3469 if (TREE_CODE (type) == UNION_TYPE)
3473 /* Unions can be returned in registers if every element is
3474 integral, or can be returned in an integer register. */
3475 for (field = TYPE_FIELDS (type);
3477 field = TREE_CHAIN (field))
3479 if (TREE_CODE (field) != FIELD_DECL)
3482 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3485 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3491 #endif /* not ARM_WINCE */
3493 /* Return all other types in memory. */
3497 /* Indicate whether or not words of a double are in big-endian order. */
3500 arm_float_words_big_endian (void)
3502 if (TARGET_MAVERICK)
3505 /* For FPA, float words are always big-endian. For VFP, floats words
3506 follow the memory system mode. */
3514 return (TARGET_BIG_END ? 1 : 0);
3519 const struct pcs_attribute_arg
3523 } pcs_attribute_args[] =
3525 {"aapcs", ARM_PCS_AAPCS},
3526 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3528 /* We could recognize these, but changes would be needed elsewhere
3529 * to implement them. */
3530 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3531 {"atpcs", ARM_PCS_ATPCS},
3532 {"apcs", ARM_PCS_APCS},
3534 {NULL, ARM_PCS_UNKNOWN}
3538 arm_pcs_from_attribute (tree attr)
3540 const struct pcs_attribute_arg *ptr;
3543 /* Get the value of the argument. */
3544 if (TREE_VALUE (attr) == NULL_TREE
3545 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3546 return ARM_PCS_UNKNOWN;
3548 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3550 /* Check it against the list of known arguments. */
3551 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3552 if (streq (arg, ptr->arg))
3555 /* An unrecognized interrupt type. */
3556 return ARM_PCS_UNKNOWN;
3559 /* Get the PCS variant to use for this call. TYPE is the function's type
3560 specification, DECL is the specific declartion. DECL may be null if
3561 the call could be indirect or if this is a library call. */
3563 arm_get_pcs_model (const_tree type, const_tree decl)
3565 bool user_convention = false;
3566 enum arm_pcs user_pcs = arm_pcs_default;
3571 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3574 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3575 user_convention = true;
3578 if (TARGET_AAPCS_BASED)
3580 /* Detect varargs functions. These always use the base rules
3581 (no argument is ever a candidate for a co-processor
3583 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3584 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3585 != void_type_node));
3587 if (user_convention)
3589 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3590 sorry ("Non-AAPCS derived PCS variant");
3591 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3592 error ("Variadic functions must use the base AAPCS variant");
3596 return ARM_PCS_AAPCS;
3597 else if (user_convention)
3599 else if (decl && flag_unit_at_a_time)
3601 /* Local functions never leak outside this compilation unit,
3602 so we are free to use whatever conventions are
3604 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3605 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3607 return ARM_PCS_AAPCS_LOCAL;
3610 else if (user_convention && user_pcs != arm_pcs_default)
3611 sorry ("PCS variant");
3613 /* For everything else we use the target's default. */
3614 return arm_pcs_default;
3619 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3620 const_tree fntype ATTRIBUTE_UNUSED,
3621 rtx libcall ATTRIBUTE_UNUSED,
3622 const_tree fndecl ATTRIBUTE_UNUSED)
3624 /* Record the unallocated VFP registers. */
3625 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3626 pcum->aapcs_vfp_reg_alloc = 0;
3629 /* Walk down the type tree of TYPE counting consecutive base elements.
3630 If *MODEP is VOIDmode, then set it to the first valid floating point
3631 type. If a non-floating point type is found, or if a floating point
3632 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3633 otherwise return the count in the sub-tree. */
3635 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3637 enum machine_mode mode;
3640 switch (TREE_CODE (type))
3643 mode = TYPE_MODE (type);
3644 if (mode != DFmode && mode != SFmode)
3647 if (*modep == VOIDmode)
3656 mode = TYPE_MODE (TREE_TYPE (type));
3657 if (mode != DFmode && mode != SFmode)
3660 if (*modep == VOIDmode)
3669 /* Use V2SImode and V4SImode as representatives of all 64-bit
3670 and 128-bit vector types, whether or not those modes are
3671 supported with the present options. */
3672 size = int_size_in_bytes (type);
3685 if (*modep == VOIDmode)
3688 /* Vector modes are considered to be opaque: two vectors are
3689 equivalent for the purposes of being homogeneous aggregates
3690 if they are the same size. */
3699 tree index = TYPE_DOMAIN (type);
3701 /* Can't handle incomplete types. */
3702 if (!COMPLETE_TYPE_P(type))
3705 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3708 || !TYPE_MAX_VALUE (index)
3709 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3710 || !TYPE_MIN_VALUE (index)
3711 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3715 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3716 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3718 /* There must be no padding. */
3719 if (!host_integerp (TYPE_SIZE (type), 1)
3720 || (tree_low_cst (TYPE_SIZE (type), 1)
3721 != count * GET_MODE_BITSIZE (*modep)))
3733 /* Can't handle incomplete types. */
3734 if (!COMPLETE_TYPE_P(type))
3737 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3739 if (TREE_CODE (field) != FIELD_DECL)
3742 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3748 /* There must be no padding. */
3749 if (!host_integerp (TYPE_SIZE (type), 1)
3750 || (tree_low_cst (TYPE_SIZE (type), 1)
3751 != count * GET_MODE_BITSIZE (*modep)))
3758 case QUAL_UNION_TYPE:
3760 /* These aren't very interesting except in a degenerate case. */
3765 /* Can't handle incomplete types. */
3766 if (!COMPLETE_TYPE_P(type))
3769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3771 if (TREE_CODE (field) != FIELD_DECL)
3774 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3777 count = count > sub_count ? count : sub_count;
3780 /* There must be no padding. */
3781 if (!host_integerp (TYPE_SIZE (type), 1)
3782 || (tree_low_cst (TYPE_SIZE (type), 1)
3783 != count * GET_MODE_BITSIZE (*modep)))
3797 aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
3798 enum machine_mode *base_mode,
3801 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3802 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3803 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3809 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3812 *base_mode = (mode == DCmode ? DFmode : SFmode);
3815 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3817 enum machine_mode aggregate_mode = VOIDmode;
3818 int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
3820 if (ag_count > 0 && ag_count <= 4)
3823 *base_mode = aggregate_mode;
3831 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3832 enum machine_mode mode, const_tree type)
3834 int count ATTRIBUTE_UNUSED;
3835 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3837 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3838 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3839 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3841 return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3845 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3848 if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
3849 || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
3850 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3852 return aapcs_vfp_is_call_or_return_candidate (mode, type,
3853 &pcum->aapcs_vfp_rmode,
3854 &pcum->aapcs_vfp_rcount);
3858 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3859 const_tree type ATTRIBUTE_UNUSED)
3861 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3862 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3865 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3866 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3868 pcum->aapcs_vfp_reg_alloc = mask << regno;
3869 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3872 int rcount = pcum->aapcs_vfp_rcount;
3874 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3878 /* Avoid using unsupported vector modes. */
3879 if (rmode == V2SImode)
3881 else if (rmode == V4SImode)
3888 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3889 for (i = 0; i < rcount; i++)
3891 rtx tmp = gen_rtx_REG (rmode,
3892 FIRST_VFP_REGNUM + regno + i * rshift);
3893 tmp = gen_rtx_EXPR_LIST
3895 GEN_INT (i * GET_MODE_SIZE (rmode)));
3896 XVECEXP (par, 0, i) = tmp;
3899 pcum->aapcs_reg = par;
3902 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3909 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3910 enum machine_mode mode,
3911 const_tree type ATTRIBUTE_UNUSED)
3913 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3914 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3915 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3917 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3920 enum machine_mode ag_mode;
3925 aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3929 if (ag_mode == V2SImode)
3931 else if (ag_mode == V4SImode)
3937 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3938 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3939 for (i = 0; i < count; i++)
3941 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3942 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3943 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3944 XVECEXP (par, 0, i) = tmp;
3950 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3954 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3955 enum machine_mode mode ATTRIBUTE_UNUSED,
3956 const_tree type ATTRIBUTE_UNUSED)
3958 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3959 pcum->aapcs_vfp_reg_alloc = 0;
3963 #define AAPCS_CP(X) \
3965 aapcs_ ## X ## _cum_init, \
3966 aapcs_ ## X ## _is_call_candidate, \
3967 aapcs_ ## X ## _allocate, \
3968 aapcs_ ## X ## _is_return_candidate, \
3969 aapcs_ ## X ## _allocate_return_reg, \
3970 aapcs_ ## X ## _advance \
3973 /* Table of co-processors that can be used to pass arguments in
3974 registers. Idealy no arugment should be a candidate for more than
3975 one co-processor table entry, but the table is processed in order
3976 and stops after the first match. If that entry then fails to put
3977 the argument into a co-processor register, the argument will go on
3981 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
3982 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
3984 /* Return true if an argument of mode MODE (or type TYPE if MODE is
3985 BLKmode) is a candidate for this co-processor's registers; this
3986 function should ignore any position-dependent state in
3987 CUMULATIVE_ARGS and only use call-type dependent information. */
3988 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3990 /* Return true if the argument does get a co-processor register; it
3991 should set aapcs_reg to an RTX of the register allocated as is
3992 required for a return from FUNCTION_ARG. */
3993 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3995 /* Return true if a result of mode MODE (or type TYPE if MODE is
3996 BLKmode) is can be returned in this co-processor's registers. */
3997 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
3999 /* Allocate and return an RTX element to hold the return type of a
4000 call, this routine must not fail and will only be called if
4001 is_return_candidate returned true with the same parameters. */
4002 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4004 /* Finish processing this argument and prepare to start processing
4006 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4007 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4015 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4020 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4021 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4028 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4030 /* We aren't passed a decl, so we can't check that a call is local.
4031 However, it isn't clear that that would be a win anyway, since it
4032 might limit some tail-calling opportunities. */
4033 enum arm_pcs pcs_variant;
4037 const_tree fndecl = NULL_TREE;
4039 if (TREE_CODE (fntype) == FUNCTION_DECL)
4042 fntype = TREE_TYPE (fntype);
4045 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4048 pcs_variant = arm_pcs_default;
4050 if (pcs_variant != ARM_PCS_AAPCS)
4054 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4055 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4064 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4067 /* We aren't passed a decl, so we can't check that a call is local.
4068 However, it isn't clear that that would be a win anyway, since it
4069 might limit some tail-calling opportunities. */
4070 enum arm_pcs pcs_variant;
4071 int unsignedp ATTRIBUTE_UNUSED;
4075 const_tree fndecl = NULL_TREE;
4077 if (TREE_CODE (fntype) == FUNCTION_DECL)
4080 fntype = TREE_TYPE (fntype);
4083 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4086 pcs_variant = arm_pcs_default;
4088 /* Promote integer types. */
4089 if (type && INTEGRAL_TYPE_P (type))
4090 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4092 if (pcs_variant != ARM_PCS_AAPCS)
4096 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4097 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4099 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4103 /* Promotes small structs returned in a register to full-word size
4104 for big-endian AAPCS. */
4105 if (type && arm_return_in_msb (type))
4107 HOST_WIDE_INT size = int_size_in_bytes (type);
4108 if (size % UNITS_PER_WORD != 0)
4110 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4111 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4115 return gen_rtx_REG (mode, R0_REGNUM);
4119 aapcs_libcall_value (enum machine_mode mode)
4121 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4124 /* Lay out a function argument using the AAPCS rules. The rule
4125 numbers referred to here are those in the AAPCS. */
4127 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4128 tree type, int named)
4133 /* We only need to do this once per argument. */
4134 if (pcum->aapcs_arg_processed)
4137 pcum->aapcs_arg_processed = true;
4139 /* Special case: if named is false then we are handling an incoming
4140 anonymous argument which is on the stack. */
4144 /* Is this a potential co-processor register candidate? */
4145 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4147 int slot = aapcs_select_call_coproc (pcum, mode, type);
4148 pcum->aapcs_cprc_slot = slot;
4150 /* We don't have to apply any of the rules from part B of the
4151 preparation phase, these are handled elsewhere in the
4156 /* A Co-processor register candidate goes either in its own
4157 class of registers or on the stack. */
4158 if (!pcum->aapcs_cprc_failed[slot])
4160 /* C1.cp - Try to allocate the argument to co-processor
4162 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4165 /* C2.cp - Put the argument on the stack and note that we
4166 can't assign any more candidates in this slot. We also
4167 need to note that we have allocated stack space, so that
4168 we won't later try to split a non-cprc candidate between
4169 core registers and the stack. */
4170 pcum->aapcs_cprc_failed[slot] = true;
4171 pcum->can_split = false;
4174 /* We didn't get a register, so this argument goes on the
4176 gcc_assert (pcum->can_split == false);
4181 /* C3 - For double-word aligned arguments, round the NCRN up to the
4182 next even number. */
4183 ncrn = pcum->aapcs_ncrn;
4184 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4187 nregs = ARM_NUM_REGS2(mode, type);
4189 /* Sigh, this test should really assert that nregs > 0, but a GCC
4190 extension allows empty structs and then gives them empty size; it
4191 then allows such a structure to be passed by value. For some of
4192 the code below we have to pretend that such an argument has
4193 non-zero size so that we 'locate' it correctly either in
4194 registers or on the stack. */
4195 gcc_assert (nregs >= 0);
4197 nregs2 = nregs ? nregs : 1;
4199 /* C4 - Argument fits entirely in core registers. */
4200 if (ncrn + nregs2 <= NUM_ARG_REGS)
4202 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4203 pcum->aapcs_next_ncrn = ncrn + nregs;
4207 /* C5 - Some core registers left and there are no arguments already
4208 on the stack: split this argument between the remaining core
4209 registers and the stack. */
4210 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4212 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4213 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4214 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4218 /* C6 - NCRN is set to 4. */
4219 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4221 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4225 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4226 for a call to a function whose data type is FNTYPE.
4227 For a library call, FNTYPE is NULL. */
4229 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4231 tree fndecl ATTRIBUTE_UNUSED)
4233 /* Long call handling. */
4235 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4237 pcum->pcs_variant = arm_pcs_default;
4239 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4241 if (arm_libcall_uses_aapcs_base (libname))
4242 pcum->pcs_variant = ARM_PCS_AAPCS;
4244 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4245 pcum->aapcs_reg = NULL_RTX;
4246 pcum->aapcs_partial = 0;
4247 pcum->aapcs_arg_processed = false;
4248 pcum->aapcs_cprc_slot = -1;
4249 pcum->can_split = true;
4251 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4255 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4257 pcum->aapcs_cprc_failed[i] = false;
4258 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4266 /* On the ARM, the offset starts at 0. */
4268 pcum->iwmmxt_nregs = 0;
4269 pcum->can_split = true;
4271 /* Varargs vectors are treated the same as long long.
4272 named_count avoids having to change the way arm handles 'named' */
4273 pcum->named_count = 0;
4276 if (TARGET_REALLY_IWMMXT && fntype)
4280 for (fn_arg = TYPE_ARG_TYPES (fntype);
4282 fn_arg = TREE_CHAIN (fn_arg))
4283 pcum->named_count += 1;
4285 if (! pcum->named_count)
4286 pcum->named_count = INT_MAX;
4291 /* Return true if mode/type need doubleword alignment. */
4293 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4295 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4296 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4300 /* Determine where to put an argument to a function.
4301 Value is zero to push the argument on the stack,
4302 or a hard register in which to store the argument.
4304 MODE is the argument's machine mode.
4305 TYPE is the data type of the argument (as a tree).
4306 This is null for libcalls where that information may
4308 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4309 the preceding args and about the function being called.
4310 NAMED is nonzero if this argument is a named parameter
4311 (otherwise it is an extra parameter matching an ellipsis). */
4314 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4315 tree type, int named)
4319 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4320 a call insn (op3 of a call_value insn). */
4321 if (mode == VOIDmode)
4324 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4326 aapcs_layout_arg (pcum, mode, type, named);
4327 return pcum->aapcs_reg;
4330 /* Varargs vectors are treated the same as long long.
4331 named_count avoids having to change the way arm handles 'named' */
4332 if (TARGET_IWMMXT_ABI
4333 && arm_vector_mode_supported_p (mode)
4334 && pcum->named_count > pcum->nargs + 1)
4336 if (pcum->iwmmxt_nregs <= 9)
4337 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4340 pcum->can_split = false;
4345 /* Put doubleword aligned quantities in even register pairs. */
4347 && ARM_DOUBLEWORD_ALIGN
4348 && arm_needs_doubleword_align (mode, type))
4351 if (mode == VOIDmode)
4352 /* Pick an arbitrary value for operand 2 of the call insn. */
4355 /* Only allow splitting an arg between regs and memory if all preceding
4356 args were allocated to regs. For args passed by reference we only count
4357 the reference pointer. */
4358 if (pcum->can_split)
4361 nregs = ARM_NUM_REGS2 (mode, type);
4363 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4366 return gen_rtx_REG (mode, pcum->nregs);
4370 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4371 tree type, bool named)
4373 int nregs = pcum->nregs;
4375 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4377 aapcs_layout_arg (pcum, mode, type, named);
4378 return pcum->aapcs_partial;
4381 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4384 if (NUM_ARG_REGS > nregs
4385 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4387 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4393 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4394 tree type, bool named)
4396 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4398 aapcs_layout_arg (pcum, mode, type, named);
4400 if (pcum->aapcs_cprc_slot >= 0)
4402 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4404 pcum->aapcs_cprc_slot = -1;
4407 /* Generic stuff. */
4408 pcum->aapcs_arg_processed = false;
4409 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4410 pcum->aapcs_reg = NULL_RTX;
4411 pcum->aapcs_partial = 0;
4416 if (arm_vector_mode_supported_p (mode)
4417 && pcum->named_count > pcum->nargs
4418 && TARGET_IWMMXT_ABI)
4419 pcum->iwmmxt_nregs += 1;
4421 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4425 /* Variable sized types are passed by reference. This is a GCC
4426 extension to the ARM ABI. */
4429 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4430 enum machine_mode mode ATTRIBUTE_UNUSED,
4431 const_tree type, bool named ATTRIBUTE_UNUSED)
4433 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4436 /* Encode the current state of the #pragma [no_]long_calls. */
4439 OFF, /* No #pragma [no_]long_calls is in effect. */
4440 LONG, /* #pragma long_calls is in effect. */
4441 SHORT /* #pragma no_long_calls is in effect. */
4444 static arm_pragma_enum arm_pragma_long_calls = OFF;
4447 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4449 arm_pragma_long_calls = LONG;
4453 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4455 arm_pragma_long_calls = SHORT;
4459 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4461 arm_pragma_long_calls = OFF;
4464 /* Handle an attribute requiring a FUNCTION_DECL;
4465 arguments as in struct attribute_spec.handler. */
4467 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4468 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4470 if (TREE_CODE (*node) != FUNCTION_DECL)
4472 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4474 *no_add_attrs = true;
4480 /* Handle an "interrupt" or "isr" attribute;
4481 arguments as in struct attribute_spec.handler. */
4483 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4488 if (TREE_CODE (*node) != FUNCTION_DECL)
4490 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4492 *no_add_attrs = true;
4494 /* FIXME: the argument if any is checked for type attributes;
4495 should it be checked for decl ones? */
4499 if (TREE_CODE (*node) == FUNCTION_TYPE
4500 || TREE_CODE (*node) == METHOD_TYPE)
4502 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4504 warning (OPT_Wattributes, "%qE attribute ignored",
4506 *no_add_attrs = true;
4509 else if (TREE_CODE (*node) == POINTER_TYPE
4510 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4511 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4512 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4514 *node = build_variant_type_copy (*node);
4515 TREE_TYPE (*node) = build_type_attribute_variant
4517 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4518 *no_add_attrs = true;
4522 /* Possibly pass this attribute on from the type to a decl. */
4523 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4524 | (int) ATTR_FLAG_FUNCTION_NEXT
4525 | (int) ATTR_FLAG_ARRAY_NEXT))
4527 *no_add_attrs = true;
4528 return tree_cons (name, args, NULL_TREE);
4532 warning (OPT_Wattributes, "%qE attribute ignored",
4541 /* Handle a "pcs" attribute; arguments as in struct
4542 attribute_spec.handler. */
4544 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4545 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4547 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4549 warning (OPT_Wattributes, "%qE attribute ignored", name);
4550 *no_add_attrs = true;
4555 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4556 /* Handle the "notshared" attribute. This attribute is another way of
4557 requesting hidden visibility. ARM's compiler supports
4558 "__declspec(notshared)"; we support the same thing via an
4562 arm_handle_notshared_attribute (tree *node,
4563 tree name ATTRIBUTE_UNUSED,
4564 tree args ATTRIBUTE_UNUSED,
4565 int flags ATTRIBUTE_UNUSED,
4568 tree decl = TYPE_NAME (*node);
4572 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4573 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4574 *no_add_attrs = false;
4580 /* Return 0 if the attributes for two types are incompatible, 1 if they
4581 are compatible, and 2 if they are nearly compatible (which causes a
4582 warning to be generated). */
4584 arm_comp_type_attributes (const_tree type1, const_tree type2)
4588 /* Check for mismatch of non-default calling convention. */
4589 if (TREE_CODE (type1) != FUNCTION_TYPE)
4592 /* Check for mismatched call attributes. */
4593 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4594 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4595 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4596 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4598 /* Only bother to check if an attribute is defined. */
4599 if (l1 | l2 | s1 | s2)
4601 /* If one type has an attribute, the other must have the same attribute. */
4602 if ((l1 != l2) || (s1 != s2))
4605 /* Disallow mixed attributes. */
4606 if ((l1 & s2) || (l2 & s1))
4610 /* Check for mismatched ISR attribute. */
4611 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4613 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4614 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4616 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4623 /* Assigns default attributes to newly defined type. This is used to
4624 set short_call/long_call attributes for function types of
4625 functions defined inside corresponding #pragma scopes. */
4627 arm_set_default_type_attributes (tree type)
4629 /* Add __attribute__ ((long_call)) to all functions, when
4630 inside #pragma long_calls or __attribute__ ((short_call)),
4631 when inside #pragma no_long_calls. */
4632 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4634 tree type_attr_list, attr_name;
4635 type_attr_list = TYPE_ATTRIBUTES (type);
4637 if (arm_pragma_long_calls == LONG)
4638 attr_name = get_identifier ("long_call");
4639 else if (arm_pragma_long_calls == SHORT)
4640 attr_name = get_identifier ("short_call");
4644 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4645 TYPE_ATTRIBUTES (type) = type_attr_list;
4649 /* Return true if DECL is known to be linked into section SECTION. */
4652 arm_function_in_section_p (tree decl, section *section)
4654 /* We can only be certain about functions defined in the same
4655 compilation unit. */
4656 if (!TREE_STATIC (decl))
4659 /* Make sure that SYMBOL always binds to the definition in this
4660 compilation unit. */
4661 if (!targetm.binds_local_p (decl))
4664 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4665 if (!DECL_SECTION_NAME (decl))
4667 /* Make sure that we will not create a unique section for DECL. */
4668 if (flag_function_sections || DECL_ONE_ONLY (decl))
4672 return function_section (decl) == section;
4675 /* Return nonzero if a 32-bit "long_call" should be generated for
4676 a call from the current function to DECL. We generate a long_call
4679 a. has an __attribute__((long call))
4680 or b. is within the scope of a #pragma long_calls
4681 or c. the -mlong-calls command line switch has been specified
4683 However we do not generate a long call if the function:
4685 d. has an __attribute__ ((short_call))
4686 or e. is inside the scope of a #pragma no_long_calls
4687 or f. is defined in the same section as the current function. */
4690 arm_is_long_call_p (tree decl)
4695 return TARGET_LONG_CALLS;
4697 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4698 if (lookup_attribute ("short_call", attrs))
4701 /* For "f", be conservative, and only cater for cases in which the
4702 whole of the current function is placed in the same section. */
4703 if (!flag_reorder_blocks_and_partition
4704 && TREE_CODE (decl) == FUNCTION_DECL
4705 && arm_function_in_section_p (decl, current_function_section ()))
4708 if (lookup_attribute ("long_call", attrs))
4711 return TARGET_LONG_CALLS;
4714 /* Return nonzero if it is ok to make a tail-call to DECL. */
4716 arm_function_ok_for_sibcall (tree decl, tree exp)
4718 unsigned long func_type;
4720 if (cfun->machine->sibcall_blocked)
4723 /* Never tailcall something for which we have no decl, or if we
4724 are in Thumb mode. */
4725 if (decl == NULL || TARGET_THUMB)
4728 /* The PIC register is live on entry to VxWorks PLT entries, so we
4729 must make the call before restoring the PIC register. */
4730 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4733 /* Cannot tail-call to long calls, since these are out of range of
4734 a branch instruction. */
4735 if (arm_is_long_call_p (decl))
4738 /* If we are interworking and the function is not declared static
4739 then we can't tail-call it unless we know that it exists in this
4740 compilation unit (since it might be a Thumb routine). */
4741 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4744 func_type = arm_current_func_type ();
4745 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4746 if (IS_INTERRUPT (func_type))
4749 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4751 /* Check that the return value locations are the same. For
4752 example that we aren't returning a value from the sibling in
4753 a VFP register but then need to transfer it to a core
4757 a = arm_function_value (TREE_TYPE (exp), decl, false);
4758 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4760 if (!rtx_equal_p (a, b))
4764 /* Never tailcall if function may be called with a misaligned SP. */
4765 if (IS_STACKALIGN (func_type))
4768 /* Everything else is ok. */
4773 /* Addressing mode support functions. */
4775 /* Return nonzero if X is a legitimate immediate operand when compiling
4776 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4778 legitimate_pic_operand_p (rtx x)
4780 if (GET_CODE (x) == SYMBOL_REF
4781 || (GET_CODE (x) == CONST
4782 && GET_CODE (XEXP (x, 0)) == PLUS
4783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4789 /* Record that the current function needs a PIC register. Initialize
4790 cfun->machine->pic_reg if we have not already done so. */
4793 require_pic_register (void)
4795 /* A lot of the logic here is made obscure by the fact that this
4796 routine gets called as part of the rtx cost estimation process.
4797 We don't want those calls to affect any assumptions about the real
4798 function; and further, we can't call entry_of_function() until we
4799 start the real expansion process. */
4800 if (!crtl->uses_pic_offset_table)
4802 gcc_assert (can_create_pseudo_p ());
4803 if (arm_pic_register != INVALID_REGNUM)
4805 if (!cfun->machine->pic_reg)
4806 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4808 /* Play games to avoid marking the function as needing pic
4809 if we are being called as part of the cost-estimation
4811 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4812 crtl->uses_pic_offset_table = 1;
4818 if (!cfun->machine->pic_reg)
4819 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4821 /* Play games to avoid marking the function as needing pic
4822 if we are being called as part of the cost-estimation
4824 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4826 crtl->uses_pic_offset_table = 1;
4829 arm_load_pic_register (0UL);
4833 /* We can be called during expansion of PHI nodes, where
4834 we can't yet emit instructions directly in the final
4835 insn stream. Queue the insns on the entry edge, they will
4836 be committed after everything else is expanded. */
4837 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4844 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4846 if (GET_CODE (orig) == SYMBOL_REF
4847 || GET_CODE (orig) == LABEL_REF)
4849 rtx pic_ref, address;
4853 /* If this function doesn't have a pic register, create one now. */
4854 require_pic_register ();
4858 gcc_assert (can_create_pseudo_p ());
4859 reg = gen_reg_rtx (Pmode);
4865 address = gen_reg_rtx (Pmode);
4870 emit_insn (gen_pic_load_addr_arm (address, orig));
4871 else if (TARGET_THUMB2)
4872 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
4873 else /* TARGET_THUMB1 */
4874 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4876 /* VxWorks does not impose a fixed gap between segments; the run-time
4877 gap can be different from the object-file gap. We therefore can't
4878 use GOTOFF unless we are absolutely sure that the symbol is in the
4879 same segment as the GOT. Unfortunately, the flexibility of linker
4880 scripts means that we can't be sure of that in general, so assume
4881 that GOTOFF is never valid on VxWorks. */
4882 if ((GET_CODE (orig) == LABEL_REF
4883 || (GET_CODE (orig) == SYMBOL_REF &&
4884 SYMBOL_REF_LOCAL_P (orig)))
4886 && !TARGET_VXWORKS_RTP)
4887 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4890 pic_ref = gen_const_mem (Pmode,
4891 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4895 insn = emit_move_insn (reg, pic_ref);
4897 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4899 set_unique_reg_note (insn, REG_EQUAL, orig);
4903 else if (GET_CODE (orig) == CONST)
4907 if (GET_CODE (XEXP (orig, 0)) == PLUS
4908 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4911 /* Handle the case where we have: const (UNSPEC_TLS). */
4912 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4913 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4916 /* Handle the case where we have:
4917 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4919 if (GET_CODE (XEXP (orig, 0)) == PLUS
4920 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4921 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4923 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4929 gcc_assert (can_create_pseudo_p ());
4930 reg = gen_reg_rtx (Pmode);
4933 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4935 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4936 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4937 base == reg ? 0 : reg);
4939 if (GET_CODE (offset) == CONST_INT)
4941 /* The base register doesn't really matter, we only want to
4942 test the index for the appropriate mode. */
4943 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4945 gcc_assert (can_create_pseudo_p ());
4946 offset = force_reg (Pmode, offset);
4949 if (GET_CODE (offset) == CONST_INT)
4950 return plus_constant (base, INTVAL (offset));
4953 if (GET_MODE_SIZE (mode) > 4
4954 && (GET_MODE_CLASS (mode) == MODE_INT
4955 || TARGET_SOFT_FLOAT))
4957 emit_insn (gen_addsi3 (reg, base, offset));
4961 return gen_rtx_PLUS (Pmode, base, offset);
4968 /* Find a spare register to use during the prolog of a function. */
4971 thumb_find_work_register (unsigned long pushed_regs_mask)
4975 /* Check the argument registers first as these are call-used. The
4976 register allocation order means that sometimes r3 might be used
4977 but earlier argument registers might not, so check them all. */
4978 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
4979 if (!df_regs_ever_live_p (reg))
4982 /* Before going on to check the call-saved registers we can try a couple
4983 more ways of deducing that r3 is available. The first is when we are
4984 pushing anonymous arguments onto the stack and we have less than 4
4985 registers worth of fixed arguments(*). In this case r3 will be part of
4986 the variable argument list and so we can be sure that it will be
4987 pushed right at the start of the function. Hence it will be available
4988 for the rest of the prologue.
4989 (*): ie crtl->args.pretend_args_size is greater than 0. */
4990 if (cfun->machine->uses_anonymous_args
4991 && crtl->args.pretend_args_size > 0)
4992 return LAST_ARG_REGNUM;
4994 /* The other case is when we have fixed arguments but less than 4 registers
4995 worth. In this case r3 might be used in the body of the function, but
4996 it is not being used to convey an argument into the function. In theory
4997 we could just check crtl->args.size to see how many bytes are
4998 being passed in argument registers, but it seems that it is unreliable.
4999 Sometimes it will have the value 0 when in fact arguments are being
5000 passed. (See testcase execute/20021111-1.c for an example). So we also
5001 check the args_info.nregs field as well. The problem with this field is
5002 that it makes no allowances for arguments that are passed to the
5003 function but which are not used. Hence we could miss an opportunity
5004 when a function has an unused argument in r3. But it is better to be
5005 safe than to be sorry. */
5006 if (! cfun->machine->uses_anonymous_args
5007 && crtl->args.size >= 0
5008 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5009 && crtl->args.info.nregs < 4)
5010 return LAST_ARG_REGNUM;
5012 /* Otherwise look for a call-saved register that is going to be pushed. */
5013 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5014 if (pushed_regs_mask & (1 << reg))
5019 /* Thumb-2 can use high regs. */
5020 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5021 if (pushed_regs_mask & (1 << reg))
5024 /* Something went wrong - thumb_compute_save_reg_mask()
5025 should have arranged for a suitable register to be pushed. */
5029 static GTY(()) int pic_labelno;
5031 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5035 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5037 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5039 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5042 gcc_assert (flag_pic);
5044 pic_reg = cfun->machine->pic_reg;
5045 if (TARGET_VXWORKS_RTP)
5047 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5048 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5049 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5051 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5053 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5054 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5058 /* We use an UNSPEC rather than a LABEL_REF because this label
5059 never appears in the code stream. */
5061 labelno = GEN_INT (pic_labelno++);
5062 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5063 l1 = gen_rtx_CONST (VOIDmode, l1);
5065 /* On the ARM the PC register contains 'dot + 8' at the time of the
5066 addition, on the Thumb it is 'dot + 4'. */
5067 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5068 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5070 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5074 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5075 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5077 else if (TARGET_THUMB2)
5079 /* Thumb-2 only allows very limited access to the PC. Calculate the
5080 address in a temporary register. */
5081 if (arm_pic_register != INVALID_REGNUM)
5083 pic_tmp = gen_rtx_REG (SImode,
5084 thumb_find_work_register (saved_regs));
5088 gcc_assert (can_create_pseudo_p ());
5089 pic_tmp = gen_reg_rtx (Pmode);
5092 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
5093 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
5094 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
5096 else /* TARGET_THUMB1 */
5098 if (arm_pic_register != INVALID_REGNUM
5099 && REGNO (pic_reg) > LAST_LO_REGNUM)
5101 /* We will have pushed the pic register, so we should always be
5102 able to find a work register. */
5103 pic_tmp = gen_rtx_REG (SImode,
5104 thumb_find_work_register (saved_regs));
5105 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5106 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5109 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5110 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5114 /* Need to emit this whether or not we obey regdecls,
5115 since setjmp/longjmp can cause life info to screw up. */
5120 /* Return nonzero if X is valid as an ARM state addressing register. */
5122 arm_address_register_rtx_p (rtx x, int strict_p)
5126 if (GET_CODE (x) != REG)
5132 return ARM_REGNO_OK_FOR_BASE_P (regno);
5134 return (regno <= LAST_ARM_REGNUM
5135 || regno >= FIRST_PSEUDO_REGISTER
5136 || regno == FRAME_POINTER_REGNUM
5137 || regno == ARG_POINTER_REGNUM);
5140 /* Return TRUE if this rtx is the difference of a symbol and a label,
5141 and will reduce to a PC-relative relocation in the object file.
5142 Expressions like this can be left alone when generating PIC, rather
5143 than forced through the GOT. */
5145 pcrel_constant_p (rtx x)
5147 if (GET_CODE (x) == MINUS)
5148 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5153 /* Return nonzero if X is a valid ARM state address operand. */
5155 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5159 enum rtx_code code = GET_CODE (x);
5161 if (arm_address_register_rtx_p (x, strict_p))
5164 use_ldrd = (TARGET_LDRD
5166 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5168 if (code == POST_INC || code == PRE_DEC
5169 || ((code == PRE_INC || code == POST_DEC)
5170 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5171 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5173 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5174 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5175 && GET_CODE (XEXP (x, 1)) == PLUS
5176 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5178 rtx addend = XEXP (XEXP (x, 1), 1);
5180 /* Don't allow ldrd post increment by register because it's hard
5181 to fixup invalid register choices. */
5183 && GET_CODE (x) == POST_MODIFY
5184 && GET_CODE (addend) == REG)
5187 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5188 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5191 /* After reload constants split into minipools will have addresses
5192 from a LABEL_REF. */
5193 else if (reload_completed
5194 && (code == LABEL_REF
5196 && GET_CODE (XEXP (x, 0)) == PLUS
5197 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5198 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5201 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5204 else if (code == PLUS)
5206 rtx xop0 = XEXP (x, 0);
5207 rtx xop1 = XEXP (x, 1);
5209 return ((arm_address_register_rtx_p (xop0, strict_p)
5210 && GET_CODE(xop1) == CONST_INT
5211 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5212 || (arm_address_register_rtx_p (xop1, strict_p)
5213 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5217 /* Reload currently can't handle MINUS, so disable this for now */
5218 else if (GET_CODE (x) == MINUS)
5220 rtx xop0 = XEXP (x, 0);
5221 rtx xop1 = XEXP (x, 1);
5223 return (arm_address_register_rtx_p (xop0, strict_p)
5224 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5228 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5229 && code == SYMBOL_REF
5230 && CONSTANT_POOL_ADDRESS_P (x)
5232 && symbol_mentioned_p (get_pool_constant (x))
5233 && ! pcrel_constant_p (get_pool_constant (x))))
5239 /* Return nonzero if X is a valid Thumb-2 address operand. */
5241 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5244 enum rtx_code code = GET_CODE (x);
5246 if (arm_address_register_rtx_p (x, strict_p))
5249 use_ldrd = (TARGET_LDRD
5251 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5253 if (code == POST_INC || code == PRE_DEC
5254 || ((code == PRE_INC || code == POST_DEC)
5255 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5256 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5258 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5259 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5260 && GET_CODE (XEXP (x, 1)) == PLUS
5261 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5263 /* Thumb-2 only has autoincrement by constant. */
5264 rtx addend = XEXP (XEXP (x, 1), 1);
5265 HOST_WIDE_INT offset;
5267 if (GET_CODE (addend) != CONST_INT)
5270 offset = INTVAL(addend);
5271 if (GET_MODE_SIZE (mode) <= 4)
5272 return (offset > -256 && offset < 256);
5274 return (use_ldrd && offset > -1024 && offset < 1024
5275 && (offset & 3) == 0);
5278 /* After reload constants split into minipools will have addresses
5279 from a LABEL_REF. */
5280 else if (reload_completed
5281 && (code == LABEL_REF
5283 && GET_CODE (XEXP (x, 0)) == PLUS
5284 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5285 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5288 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5291 else if (code == PLUS)
5293 rtx xop0 = XEXP (x, 0);
5294 rtx xop1 = XEXP (x, 1);
5296 return ((arm_address_register_rtx_p (xop0, strict_p)
5297 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5298 || (arm_address_register_rtx_p (xop1, strict_p)
5299 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5302 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5303 && code == SYMBOL_REF
5304 && CONSTANT_POOL_ADDRESS_P (x)
5306 && symbol_mentioned_p (get_pool_constant (x))
5307 && ! pcrel_constant_p (get_pool_constant (x))))
5313 /* Return nonzero if INDEX is valid for an address index operand in
5316 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5319 HOST_WIDE_INT range;
5320 enum rtx_code code = GET_CODE (index);
5322 /* Standard coprocessor addressing modes. */
5323 if (TARGET_HARD_FLOAT
5324 && (TARGET_FPA || TARGET_MAVERICK)
5325 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5326 || (TARGET_MAVERICK && mode == DImode)))
5327 return (code == CONST_INT && INTVAL (index) < 1024
5328 && INTVAL (index) > -1024
5329 && (INTVAL (index) & 3) == 0);
5332 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5333 return (code == CONST_INT
5334 && INTVAL (index) < 1016
5335 && INTVAL (index) > -1024
5336 && (INTVAL (index) & 3) == 0);
5338 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5339 return (code == CONST_INT
5340 && INTVAL (index) < 1024
5341 && INTVAL (index) > -1024
5342 && (INTVAL (index) & 3) == 0);
5344 if (arm_address_register_rtx_p (index, strict_p)
5345 && (GET_MODE_SIZE (mode) <= 4))
5348 if (mode == DImode || mode == DFmode)
5350 if (code == CONST_INT)
5352 HOST_WIDE_INT val = INTVAL (index);
5355 return val > -256 && val < 256;
5357 return val > -4096 && val < 4092;
5360 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5363 if (GET_MODE_SIZE (mode) <= 4
5367 || (mode == QImode && outer == SIGN_EXTEND))))
5371 rtx xiop0 = XEXP (index, 0);
5372 rtx xiop1 = XEXP (index, 1);
5374 return ((arm_address_register_rtx_p (xiop0, strict_p)
5375 && power_of_two_operand (xiop1, SImode))
5376 || (arm_address_register_rtx_p (xiop1, strict_p)
5377 && power_of_two_operand (xiop0, SImode)));
5379 else if (code == LSHIFTRT || code == ASHIFTRT
5380 || code == ASHIFT || code == ROTATERT)
5382 rtx op = XEXP (index, 1);
5384 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5385 && GET_CODE (op) == CONST_INT
5387 && INTVAL (op) <= 31);
5391 /* For ARM v4 we may be doing a sign-extend operation during the
5397 || (outer == SIGN_EXTEND && mode == QImode))
5403 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5405 return (code == CONST_INT
5406 && INTVAL (index) < range
5407 && INTVAL (index) > -range);
5410 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5411 index operand. i.e. 1, 2, 4 or 8. */
5413 thumb2_index_mul_operand (rtx op)
5417 if (GET_CODE(op) != CONST_INT)
5421 return (val == 1 || val == 2 || val == 4 || val == 8);
5424 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5426 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5428 enum rtx_code code = GET_CODE (index);
5430 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5431 /* Standard coprocessor addressing modes. */
5432 if (TARGET_HARD_FLOAT
5433 && (TARGET_FPA || TARGET_MAVERICK)
5434 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5435 || (TARGET_MAVERICK && mode == DImode)))
5436 return (code == CONST_INT && INTVAL (index) < 1024
5437 && INTVAL (index) > -1024
5438 && (INTVAL (index) & 3) == 0);
5440 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5442 /* For DImode assume values will usually live in core regs
5443 and only allow LDRD addressing modes. */
5444 if (!TARGET_LDRD || mode != DImode)
5445 return (code == CONST_INT
5446 && INTVAL (index) < 1024
5447 && INTVAL (index) > -1024
5448 && (INTVAL (index) & 3) == 0);
5452 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5453 return (code == CONST_INT
5454 && INTVAL (index) < 1016
5455 && INTVAL (index) > -1024
5456 && (INTVAL (index) & 3) == 0);
5458 if (arm_address_register_rtx_p (index, strict_p)
5459 && (GET_MODE_SIZE (mode) <= 4))
5462 if (mode == DImode || mode == DFmode)
5464 if (code == CONST_INT)
5466 HOST_WIDE_INT val = INTVAL (index);
5467 /* ??? Can we assume ldrd for thumb2? */
5468 /* Thumb-2 ldrd only has reg+const addressing modes. */
5469 /* ldrd supports offsets of +-1020.
5470 However the ldr fallback does not. */
5471 return val > -256 && val < 256 && (val & 3) == 0;
5479 rtx xiop0 = XEXP (index, 0);
5480 rtx xiop1 = XEXP (index, 1);
5482 return ((arm_address_register_rtx_p (xiop0, strict_p)
5483 && thumb2_index_mul_operand (xiop1))
5484 || (arm_address_register_rtx_p (xiop1, strict_p)
5485 && thumb2_index_mul_operand (xiop0)));
5487 else if (code == ASHIFT)
5489 rtx op = XEXP (index, 1);
5491 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5492 && GET_CODE (op) == CONST_INT
5494 && INTVAL (op) <= 3);
5497 return (code == CONST_INT
5498 && INTVAL (index) < 4096
5499 && INTVAL (index) > -256);
5502 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5504 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5508 if (GET_CODE (x) != REG)
5514 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5516 return (regno <= LAST_LO_REGNUM
5517 || regno > LAST_VIRTUAL_REGISTER
5518 || regno == FRAME_POINTER_REGNUM
5519 || (GET_MODE_SIZE (mode) >= 4
5520 && (regno == STACK_POINTER_REGNUM
5521 || regno >= FIRST_PSEUDO_REGISTER
5522 || x == hard_frame_pointer_rtx
5523 || x == arg_pointer_rtx)));
5526 /* Return nonzero if x is a legitimate index register. This is the case
5527 for any base register that can access a QImode object. */
5529 thumb1_index_register_rtx_p (rtx x, int strict_p)
5531 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5534 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5536 The AP may be eliminated to either the SP or the FP, so we use the
5537 least common denominator, e.g. SImode, and offsets from 0 to 64.
5539 ??? Verify whether the above is the right approach.
5541 ??? Also, the FP may be eliminated to the SP, so perhaps that
5542 needs special handling also.
5544 ??? Look at how the mips16 port solves this problem. It probably uses
5545 better ways to solve some of these problems.
5547 Although it is not incorrect, we don't accept QImode and HImode
5548 addresses based on the frame pointer or arg pointer until the
5549 reload pass starts. This is so that eliminating such addresses
5550 into stack based ones won't produce impossible code. */
5552 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5554 /* ??? Not clear if this is right. Experiment. */
5555 if (GET_MODE_SIZE (mode) < 4
5556 && !(reload_in_progress || reload_completed)
5557 && (reg_mentioned_p (frame_pointer_rtx, x)
5558 || reg_mentioned_p (arg_pointer_rtx, x)
5559 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5560 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5561 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5562 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5565 /* Accept any base register. SP only in SImode or larger. */
5566 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5569 /* This is PC relative data before arm_reorg runs. */
5570 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5571 && GET_CODE (x) == SYMBOL_REF
5572 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5575 /* This is PC relative data after arm_reorg runs. */
5576 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5578 && (GET_CODE (x) == LABEL_REF
5579 || (GET_CODE (x) == CONST
5580 && GET_CODE (XEXP (x, 0)) == PLUS
5581 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5582 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5585 /* Post-inc indexing only supported for SImode and larger. */
5586 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5587 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5590 else if (GET_CODE (x) == PLUS)
5592 /* REG+REG address can be any two index registers. */
5593 /* We disallow FRAME+REG addressing since we know that FRAME
5594 will be replaced with STACK, and SP relative addressing only
5595 permits SP+OFFSET. */
5596 if (GET_MODE_SIZE (mode) <= 4
5597 && XEXP (x, 0) != frame_pointer_rtx
5598 && XEXP (x, 1) != frame_pointer_rtx
5599 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5600 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5603 /* REG+const has 5-7 bit offset for non-SP registers. */
5604 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5605 || XEXP (x, 0) == arg_pointer_rtx)
5606 && GET_CODE (XEXP (x, 1)) == CONST_INT
5607 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5610 /* REG+const has 10-bit offset for SP, but only SImode and
5611 larger is supported. */
5612 /* ??? Should probably check for DI/DFmode overflow here
5613 just like GO_IF_LEGITIMATE_OFFSET does. */
5614 else if (GET_CODE (XEXP (x, 0)) == REG
5615 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5616 && GET_MODE_SIZE (mode) >= 4
5617 && GET_CODE (XEXP (x, 1)) == CONST_INT
5618 && INTVAL (XEXP (x, 1)) >= 0
5619 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5620 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5623 else if (GET_CODE (XEXP (x, 0)) == REG
5624 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5625 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5626 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5627 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5628 && GET_MODE_SIZE (mode) >= 4
5629 && GET_CODE (XEXP (x, 1)) == CONST_INT
5630 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5634 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5635 && GET_MODE_SIZE (mode) == 4
5636 && GET_CODE (x) == SYMBOL_REF
5637 && CONSTANT_POOL_ADDRESS_P (x)
5639 && symbol_mentioned_p (get_pool_constant (x))
5640 && ! pcrel_constant_p (get_pool_constant (x))))
5646 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5647 instruction of mode MODE. */
5649 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5651 switch (GET_MODE_SIZE (mode))
5654 return val >= 0 && val < 32;
5657 return val >= 0 && val < 64 && (val & 1) == 0;
5661 && (val + GET_MODE_SIZE (mode)) <= 128
5667 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5670 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5671 else if (TARGET_THUMB2)
5672 return thumb2_legitimate_address_p (mode, x, strict_p);
5673 else /* if (TARGET_THUMB1) */
5674 return thumb1_legitimate_address_p (mode, x, strict_p);
5677 /* Build the SYMBOL_REF for __tls_get_addr. */
5679 static GTY(()) rtx tls_get_addr_libfunc;
5682 get_tls_get_addr (void)
5684 if (!tls_get_addr_libfunc)
5685 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5686 return tls_get_addr_libfunc;
5690 arm_load_tp (rtx target)
5693 target = gen_reg_rtx (SImode);
5697 /* Can return in any reg. */
5698 emit_insn (gen_load_tp_hard (target));
5702 /* Always returned in r0. Immediately copy the result into a pseudo,
5703 otherwise other uses of r0 (e.g. setting up function arguments) may
5704 clobber the value. */
5708 emit_insn (gen_load_tp_soft ());
5710 tmp = gen_rtx_REG (SImode, 0);
5711 emit_move_insn (target, tmp);
5717 load_tls_operand (rtx x, rtx reg)
5721 if (reg == NULL_RTX)
5722 reg = gen_reg_rtx (SImode);
5724 tmp = gen_rtx_CONST (SImode, x);
5726 emit_move_insn (reg, tmp);
5732 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5734 rtx insns, label, labelno, sum;
5738 labelno = GEN_INT (pic_labelno++);
5739 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5740 label = gen_rtx_CONST (VOIDmode, label);
5742 sum = gen_rtx_UNSPEC (Pmode,
5743 gen_rtvec (4, x, GEN_INT (reloc), label,
5744 GEN_INT (TARGET_ARM ? 8 : 4)),
5746 reg = load_tls_operand (sum, reg);
5749 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5750 else if (TARGET_THUMB2)
5753 /* Thumb-2 only allows very limited access to the PC. Calculate
5754 the address in a temporary register. */
5755 tmp = gen_reg_rtx (SImode);
5756 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5757 emit_insn (gen_addsi3(reg, reg, tmp));
5759 else /* TARGET_THUMB1 */
5760 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5762 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5763 Pmode, 1, reg, Pmode);
5765 insns = get_insns ();
5772 legitimize_tls_address (rtx x, rtx reg)
5774 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5775 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5779 case TLS_MODEL_GLOBAL_DYNAMIC:
5780 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5781 dest = gen_reg_rtx (Pmode);
5782 emit_libcall_block (insns, dest, ret, x);
5785 case TLS_MODEL_LOCAL_DYNAMIC:
5786 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5788 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5789 share the LDM result with other LD model accesses. */
5790 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5792 dest = gen_reg_rtx (Pmode);
5793 emit_libcall_block (insns, dest, ret, eqv);
5795 /* Load the addend. */
5796 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5798 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5799 return gen_rtx_PLUS (Pmode, dest, addend);
5801 case TLS_MODEL_INITIAL_EXEC:
5802 labelno = GEN_INT (pic_labelno++);
5803 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5804 label = gen_rtx_CONST (VOIDmode, label);
5805 sum = gen_rtx_UNSPEC (Pmode,
5806 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5807 GEN_INT (TARGET_ARM ? 8 : 4)),
5809 reg = load_tls_operand (sum, reg);
5812 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5813 else if (TARGET_THUMB2)
5816 /* Thumb-2 only allows very limited access to the PC. Calculate
5817 the address in a temporary register. */
5818 tmp = gen_reg_rtx (SImode);
5819 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5820 emit_insn (gen_addsi3(reg, reg, tmp));
5821 emit_move_insn (reg, gen_const_mem (SImode, reg));
5825 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5826 emit_move_insn (reg, gen_const_mem (SImode, reg));
5829 tp = arm_load_tp (NULL_RTX);
5831 return gen_rtx_PLUS (Pmode, tp, reg);
5833 case TLS_MODEL_LOCAL_EXEC:
5834 tp = arm_load_tp (NULL_RTX);
5836 reg = gen_rtx_UNSPEC (Pmode,
5837 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5839 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5841 return gen_rtx_PLUS (Pmode, tp, reg);
5848 /* Try machine-dependent ways of modifying an illegitimate address
5849 to be legitimate. If we find one, return the new, valid address. */
5851 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5855 /* TODO: legitimize_address for Thumb2. */
5858 return thumb_legitimize_address (x, orig_x, mode);
5861 if (arm_tls_symbol_p (x))
5862 return legitimize_tls_address (x, NULL_RTX);
5864 if (GET_CODE (x) == PLUS)
5866 rtx xop0 = XEXP (x, 0);
5867 rtx xop1 = XEXP (x, 1);
5869 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5870 xop0 = force_reg (SImode, xop0);
5872 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5873 xop1 = force_reg (SImode, xop1);
5875 if (ARM_BASE_REGISTER_RTX_P (xop0)
5876 && GET_CODE (xop1) == CONST_INT)
5878 HOST_WIDE_INT n, low_n;
5882 /* VFP addressing modes actually allow greater offsets, but for
5883 now we just stick with the lowest common denominator. */
5885 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5897 low_n = ((mode) == TImode ? 0
5898 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5902 base_reg = gen_reg_rtx (SImode);
5903 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5904 emit_move_insn (base_reg, val);
5905 x = plus_constant (base_reg, low_n);
5907 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5908 x = gen_rtx_PLUS (SImode, xop0, xop1);
5911 /* XXX We don't allow MINUS any more -- see comment in
5912 arm_legitimate_address_outer_p (). */
5913 else if (GET_CODE (x) == MINUS)
5915 rtx xop0 = XEXP (x, 0);
5916 rtx xop1 = XEXP (x, 1);
5918 if (CONSTANT_P (xop0))
5919 xop0 = force_reg (SImode, xop0);
5921 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5922 xop1 = force_reg (SImode, xop1);
5924 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5925 x = gen_rtx_MINUS (SImode, xop0, xop1);
5928 /* Make sure to take full advantage of the pre-indexed addressing mode
5929 with absolute addresses which often allows for the base register to
5930 be factorized for multiple adjacent memory references, and it might
5931 even allows for the mini pool to be avoided entirely. */
5932 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5935 HOST_WIDE_INT mask, base, index;
5938 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5939 use a 8-bit index. So let's use a 12-bit index for SImode only and
5940 hope that arm_gen_constant will enable ldrb to use more bits. */
5941 bits = (mode == SImode) ? 12 : 8;
5942 mask = (1 << bits) - 1;
5943 base = INTVAL (x) & ~mask;
5944 index = INTVAL (x) & mask;
5945 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5947 /* It'll most probably be more efficient to generate the base
5948 with more bits set and use a negative index instead. */
5952 base_reg = force_reg (SImode, GEN_INT (base));
5953 x = plus_constant (base_reg, index);
5958 /* We need to find and carefully transform any SYMBOL and LABEL
5959 references; so go back to the original address expression. */
5960 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5962 if (new_x != orig_x)
5970 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5971 to be legitimate. If we find one, return the new, valid address. */
5973 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5975 if (arm_tls_symbol_p (x))
5976 return legitimize_tls_address (x, NULL_RTX);
5978 if (GET_CODE (x) == PLUS
5979 && GET_CODE (XEXP (x, 1)) == CONST_INT
5980 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
5981 || INTVAL (XEXP (x, 1)) < 0))
5983 rtx xop0 = XEXP (x, 0);
5984 rtx xop1 = XEXP (x, 1);
5985 HOST_WIDE_INT offset = INTVAL (xop1);
5987 /* Try and fold the offset into a biasing of the base register and
5988 then offsetting that. Don't do this when optimizing for space
5989 since it can cause too many CSEs. */
5990 if (optimize_size && offset >= 0
5991 && offset < 256 + 31 * GET_MODE_SIZE (mode))
5993 HOST_WIDE_INT delta;
5996 delta = offset - (256 - GET_MODE_SIZE (mode));
5997 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
5998 delta = 31 * GET_MODE_SIZE (mode);
6000 delta = offset & (~31 * GET_MODE_SIZE (mode));
6002 xop0 = force_operand (plus_constant (xop0, offset - delta),
6004 x = plus_constant (xop0, delta);
6006 else if (offset < 0 && offset > -256)
6007 /* Small negative offsets are best done with a subtract before the
6008 dereference, forcing these into a register normally takes two
6010 x = force_operand (x, NULL_RTX);
6013 /* For the remaining cases, force the constant into a register. */
6014 xop1 = force_reg (SImode, xop1);
6015 x = gen_rtx_PLUS (SImode, xop0, xop1);
6018 else if (GET_CODE (x) == PLUS
6019 && s_register_operand (XEXP (x, 1), SImode)
6020 && !s_register_operand (XEXP (x, 0), SImode))
6022 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6024 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6029 /* We need to find and carefully transform any SYMBOL and LABEL
6030 references; so go back to the original address expression. */
6031 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6033 if (new_x != orig_x)
6041 thumb_legitimize_reload_address (rtx *x_p,
6042 enum machine_mode mode,
6043 int opnum, int type,
6044 int ind_levels ATTRIBUTE_UNUSED)
6048 if (GET_CODE (x) == PLUS
6049 && GET_MODE_SIZE (mode) < 4
6050 && REG_P (XEXP (x, 0))
6051 && XEXP (x, 0) == stack_pointer_rtx
6052 && GET_CODE (XEXP (x, 1)) == CONST_INT
6053 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6058 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6059 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6063 /* If both registers are hi-regs, then it's better to reload the
6064 entire expression rather than each register individually. That
6065 only requires one reload register rather than two. */
6066 if (GET_CODE (x) == PLUS
6067 && REG_P (XEXP (x, 0))
6068 && REG_P (XEXP (x, 1))
6069 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6070 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6075 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6076 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6083 /* Test for various thread-local symbols. */
6085 /* Return TRUE if X is a thread-local symbol. */
6088 arm_tls_symbol_p (rtx x)
6090 if (! TARGET_HAVE_TLS)
6093 if (GET_CODE (x) != SYMBOL_REF)
6096 return SYMBOL_REF_TLS_MODEL (x) != 0;
6099 /* Helper for arm_tls_referenced_p. */
6102 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6104 if (GET_CODE (*x) == SYMBOL_REF)
6105 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6107 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6108 TLS offsets, not real symbol references. */
6109 if (GET_CODE (*x) == UNSPEC
6110 && XINT (*x, 1) == UNSPEC_TLS)
6116 /* Return TRUE if X contains any TLS symbol references. */
6119 arm_tls_referenced_p (rtx x)
6121 if (! TARGET_HAVE_TLS)
6124 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6127 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6130 arm_cannot_force_const_mem (rtx x)
6134 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6136 split_const (x, &base, &offset);
6137 if (GET_CODE (base) == SYMBOL_REF
6138 && !offset_within_block_p (base, INTVAL (offset)))
6141 return arm_tls_referenced_p (x);
6144 #define REG_OR_SUBREG_REG(X) \
6145 (GET_CODE (X) == REG \
6146 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6148 #define REG_OR_SUBREG_RTX(X) \
6149 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6151 #ifndef COSTS_N_INSNS
6152 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6155 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6157 enum machine_mode mode = GET_MODE (x);
6170 return COSTS_N_INSNS (1);
6173 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6176 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6183 return COSTS_N_INSNS (2) + cycles;
6185 return COSTS_N_INSNS (1) + 16;
6188 return (COSTS_N_INSNS (1)
6189 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6190 + GET_CODE (SET_DEST (x)) == MEM));
6195 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6197 if (thumb_shiftable_const (INTVAL (x)))
6198 return COSTS_N_INSNS (2);
6199 return COSTS_N_INSNS (3);
6201 else if ((outer == PLUS || outer == COMPARE)
6202 && INTVAL (x) < 256 && INTVAL (x) > -256)
6204 else if (outer == AND
6205 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6206 return COSTS_N_INSNS (1);
6207 else if (outer == ASHIFT || outer == ASHIFTRT
6208 || outer == LSHIFTRT)
6210 return COSTS_N_INSNS (2);
6216 return COSTS_N_INSNS (3);
6234 /* XXX another guess. */
6235 /* Memory costs quite a lot for the first word, but subsequent words
6236 load at the equivalent of a single insn each. */
6237 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6238 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6243 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6248 /* XXX still guessing. */
6249 switch (GET_MODE (XEXP (x, 0)))
6252 return (1 + (mode == DImode ? 4 : 0)
6253 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6256 return (4 + (mode == DImode ? 4 : 0)
6257 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6260 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6272 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6274 enum machine_mode mode = GET_MODE (x);
6275 enum rtx_code subcode;
6277 enum rtx_code code = GET_CODE (x);
6284 /* Memory costs quite a lot for the first word, but subsequent words
6285 load at the equivalent of a single insn each. */
6286 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6293 if (TARGET_HARD_FLOAT && mode == SFmode)
6294 *total = COSTS_N_INSNS (2);
6295 else if (TARGET_HARD_FLOAT && mode == DFmode)
6296 *total = COSTS_N_INSNS (4);
6298 *total = COSTS_N_INSNS (20);
6302 if (GET_CODE (XEXP (x, 1)) == REG)
6303 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6304 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6305 *total = rtx_cost (XEXP (x, 1), code, speed);
6311 *total += COSTS_N_INSNS (4);
6316 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6317 *total += rtx_cost (XEXP (x, 0), code, speed);
6320 *total += COSTS_N_INSNS (3);
6324 *total += COSTS_N_INSNS (1);
6325 /* Increase the cost of complex shifts because they aren't any faster,
6326 and reduce dual issue opportunities. */
6327 if (arm_tune_cortex_a9
6328 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6336 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6338 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6339 *total = COSTS_N_INSNS (1);
6341 *total = COSTS_N_INSNS (20);
6344 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6345 /* Thumb2 does not have RSB, so all arguments must be
6346 registers (subtracting a constant is canonicalized as
6347 addition of the negated constant). */
6353 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6354 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6355 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6357 *total += rtx_cost (XEXP (x, 1), code, speed);
6361 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6362 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6364 *total += rtx_cost (XEXP (x, 0), code, speed);
6371 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6373 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6375 *total = COSTS_N_INSNS (1);
6376 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6377 && arm_const_double_rtx (XEXP (x, 0)))
6379 *total += rtx_cost (XEXP (x, 1), code, speed);
6383 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6384 && arm_const_double_rtx (XEXP (x, 1)))
6386 *total += rtx_cost (XEXP (x, 0), code, speed);
6392 *total = COSTS_N_INSNS (20);
6396 *total = COSTS_N_INSNS (1);
6397 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6398 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6400 *total += rtx_cost (XEXP (x, 1), code, speed);
6404 subcode = GET_CODE (XEXP (x, 1));
6405 if (subcode == ASHIFT || subcode == ASHIFTRT
6406 || subcode == LSHIFTRT
6407 || subcode == ROTATE || subcode == ROTATERT)
6409 *total += rtx_cost (XEXP (x, 0), code, speed);
6410 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6414 /* A shift as a part of RSB costs no more than RSB itself. */
6415 if (GET_CODE (XEXP (x, 0)) == MULT
6416 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6418 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6419 *total += rtx_cost (XEXP (x, 1), code, speed);
6424 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6426 *total += rtx_cost (XEXP (x, 0), code, speed);
6427 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6431 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6432 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6434 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6435 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6436 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6437 *total += COSTS_N_INSNS (1);
6445 if (code == PLUS && arm_arch6 && mode == SImode
6446 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6447 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6449 *total = COSTS_N_INSNS (1);
6450 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6452 *total += rtx_cost (XEXP (x, 1), code, speed);
6456 /* MLA: All arguments must be registers. We filter out
6457 multiplication by a power of two, so that we fall down into
6459 if (GET_CODE (XEXP (x, 0)) == MULT
6460 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6462 /* The cost comes from the cost of the multiply. */
6466 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6468 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6470 *total = COSTS_N_INSNS (1);
6471 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6472 && arm_const_double_rtx (XEXP (x, 1)))
6474 *total += rtx_cost (XEXP (x, 0), code, speed);
6481 *total = COSTS_N_INSNS (20);
6485 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6486 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6488 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6489 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6490 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6491 *total += COSTS_N_INSNS (1);
6497 case AND: case XOR: case IOR:
6500 /* Normally the frame registers will be spilt into reg+const during
6501 reload, so it is a bad idea to combine them with other instructions,
6502 since then they might not be moved outside of loops. As a compromise
6503 we allow integration with ops that have a constant as their second
6505 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6506 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6507 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6508 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6509 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6514 *total += COSTS_N_INSNS (2);
6515 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6516 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6518 *total += rtx_cost (XEXP (x, 0), code, speed);
6525 *total += COSTS_N_INSNS (1);
6526 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6527 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6529 *total += rtx_cost (XEXP (x, 0), code, speed);
6532 subcode = GET_CODE (XEXP (x, 0));
6533 if (subcode == ASHIFT || subcode == ASHIFTRT
6534 || subcode == LSHIFTRT
6535 || subcode == ROTATE || subcode == ROTATERT)
6537 *total += rtx_cost (XEXP (x, 1), code, speed);
6538 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6543 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6545 *total += rtx_cost (XEXP (x, 1), code, speed);
6546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6550 if (subcode == UMIN || subcode == UMAX
6551 || subcode == SMIN || subcode == SMAX)
6553 *total = COSTS_N_INSNS (3);
6560 /* This should have been handled by the CPU specific routines. */
6564 if (arm_arch3m && mode == SImode
6565 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6567 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6568 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6569 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6570 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6572 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6575 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6579 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6581 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6583 *total = COSTS_N_INSNS (1);
6586 *total = COSTS_N_INSNS (2);
6592 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6593 if (mode == SImode && code == NOT)
6595 subcode = GET_CODE (XEXP (x, 0));
6596 if (subcode == ASHIFT || subcode == ASHIFTRT
6597 || subcode == LSHIFTRT
6598 || subcode == ROTATE || subcode == ROTATERT
6600 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6602 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6603 /* Register shifts cost an extra cycle. */
6604 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6605 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6614 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6616 *total = COSTS_N_INSNS (4);
6620 operand = XEXP (x, 0);
6622 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6623 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6624 && GET_CODE (XEXP (operand, 0)) == REG
6625 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6626 *total += COSTS_N_INSNS (1);
6627 *total += (rtx_cost (XEXP (x, 1), code, speed)
6628 + rtx_cost (XEXP (x, 2), code, speed));
6632 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6634 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6640 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6641 && mode == SImode && XEXP (x, 1) == const0_rtx)
6643 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6649 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6650 && mode == SImode && XEXP (x, 1) == const0_rtx)
6652 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6672 /* SCC insns. In the case where the comparison has already been
6673 performed, then they cost 2 instructions. Otherwise they need
6674 an additional comparison before them. */
6675 *total = COSTS_N_INSNS (2);
6676 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6683 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6689 *total += COSTS_N_INSNS (1);
6690 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6691 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6693 *total += rtx_cost (XEXP (x, 0), code, speed);
6697 subcode = GET_CODE (XEXP (x, 0));
6698 if (subcode == ASHIFT || subcode == ASHIFTRT
6699 || subcode == LSHIFTRT
6700 || subcode == ROTATE || subcode == ROTATERT)
6702 *total += rtx_cost (XEXP (x, 1), code, speed);
6703 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6708 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6710 *total += rtx_cost (XEXP (x, 1), code, speed);
6711 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6721 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6722 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6723 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6724 *total += rtx_cost (XEXP (x, 1), code, speed);
6728 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6730 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6732 *total = COSTS_N_INSNS (1);
6735 *total = COSTS_N_INSNS (20);
6738 *total = COSTS_N_INSNS (1);
6740 *total += COSTS_N_INSNS (3);
6744 if (GET_MODE_CLASS (mode) == MODE_INT)
6748 *total += COSTS_N_INSNS (1);
6750 if (GET_MODE (XEXP (x, 0)) != SImode)
6754 if (GET_CODE (XEXP (x, 0)) != MEM)
6755 *total += COSTS_N_INSNS (1);
6757 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6758 *total += COSTS_N_INSNS (2);
6767 if (GET_MODE_CLASS (mode) == MODE_INT)
6770 *total += COSTS_N_INSNS (1);
6772 if (GET_MODE (XEXP (x, 0)) != SImode)
6776 if (GET_CODE (XEXP (x, 0)) != MEM)
6777 *total += COSTS_N_INSNS (1);
6779 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6780 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6787 switch (GET_MODE (XEXP (x, 0)))
6794 *total = COSTS_N_INSNS (1);
6804 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6808 if (const_ok_for_arm (INTVAL (x))
6809 || const_ok_for_arm (~INTVAL (x)))
6810 *total = COSTS_N_INSNS (1);
6812 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6813 INTVAL (x), NULL_RTX,
6820 *total = COSTS_N_INSNS (3);
6824 *total = COSTS_N_INSNS (1);
6828 *total = COSTS_N_INSNS (1);
6829 *total += rtx_cost (XEXP (x, 0), code, speed);
6833 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
6834 *total = COSTS_N_INSNS (1);
6836 *total = COSTS_N_INSNS (4);
6840 *total = COSTS_N_INSNS (4);
6845 /* RTX costs when optimizing for size. */
6847 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6850 enum machine_mode mode = GET_MODE (x);
6853 /* XXX TBD. For now, use the standard costs. */
6854 *total = thumb1_rtx_costs (x, code, outer_code);
6858 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6862 /* A memory access costs 1 insn if the mode is small, or the address is
6863 a single register, otherwise it costs one insn per word. */
6864 if (REG_P (XEXP (x, 0)))
6865 *total = COSTS_N_INSNS (1);
6867 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6874 /* Needs a libcall, so it costs about this. */
6875 *total = COSTS_N_INSNS (2);
6879 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6881 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6889 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6891 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6894 else if (mode == SImode)
6896 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6897 /* Slightly disparage register shifts, but not by much. */
6898 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6899 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6903 /* Needs a libcall. */
6904 *total = COSTS_N_INSNS (2);
6908 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6910 *total = COSTS_N_INSNS (1);
6916 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6917 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6919 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6920 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6921 || subcode1 == ROTATE || subcode1 == ROTATERT
6922 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6923 || subcode1 == ASHIFTRT)
6925 /* It's just the cost of the two operands. */
6930 *total = COSTS_N_INSNS (1);
6934 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6938 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6940 *total = COSTS_N_INSNS (1);
6944 /* A shift as a part of ADD costs nothing. */
6945 if (GET_CODE (XEXP (x, 0)) == MULT
6946 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6948 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6949 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6950 *total += rtx_cost (XEXP (x, 1), code, false);
6955 case AND: case XOR: case IOR:
6958 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
6960 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
6961 || subcode == LSHIFTRT || subcode == ASHIFTRT
6962 || (code == AND && subcode == NOT))
6964 /* It's just the cost of the two operands. */
6970 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6974 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6978 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6980 *total = COSTS_N_INSNS (1);
6986 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6995 if (cc_register (XEXP (x, 0), VOIDmode))
6998 *total = COSTS_N_INSNS (1);
7002 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
7003 *total = COSTS_N_INSNS (1);
7005 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7010 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7012 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7013 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7016 *total += COSTS_N_INSNS (1);
7021 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7023 switch (GET_MODE (XEXP (x, 0)))
7026 *total += COSTS_N_INSNS (1);
7030 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7036 *total += COSTS_N_INSNS (2);
7041 *total += COSTS_N_INSNS (1);
7046 if (const_ok_for_arm (INTVAL (x)))
7047 /* A multiplication by a constant requires another instruction
7048 to load the constant to a register. */
7049 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7051 else if (const_ok_for_arm (~INTVAL (x)))
7052 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7053 else if (const_ok_for_arm (-INTVAL (x)))
7055 if (outer_code == COMPARE || outer_code == PLUS
7056 || outer_code == MINUS)
7059 *total = COSTS_N_INSNS (1);
7062 *total = COSTS_N_INSNS (2);
7068 *total = COSTS_N_INSNS (2);
7072 *total = COSTS_N_INSNS (4);
7077 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7078 cost of these slightly. */
7079 *total = COSTS_N_INSNS (1) + 1;
7083 if (mode != VOIDmode)
7084 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7086 *total = COSTS_N_INSNS (4); /* How knows? */
7091 /* RTX costs when optimizing for size. */
7093 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7097 return arm_size_rtx_costs (x, (enum rtx_code) code,
7098 (enum rtx_code) outer_code, total);
7100 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7101 (enum rtx_code) outer_code,
7105 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7106 supported on any "slowmul" cores, so it can be ignored. */
7109 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7110 int *total, bool speed)
7112 enum machine_mode mode = GET_MODE (x);
7116 *total = thumb1_rtx_costs (x, code, outer_code);
7123 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7126 *total = COSTS_N_INSNS (20);
7130 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7132 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7133 & (unsigned HOST_WIDE_INT) 0xffffffff);
7134 int cost, const_ok = const_ok_for_arm (i);
7135 int j, booth_unit_size;
7137 /* Tune as appropriate. */
7138 cost = const_ok ? 4 : 8;
7139 booth_unit_size = 2;
7140 for (j = 0; i && j < 32; j += booth_unit_size)
7142 i >>= booth_unit_size;
7146 *total = COSTS_N_INSNS (cost);
7147 *total += rtx_cost (XEXP (x, 0), code, speed);
7151 *total = COSTS_N_INSNS (20);
7155 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7160 /* RTX cost for cores with a fast multiply unit (M variants). */
7163 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7164 int *total, bool speed)
7166 enum machine_mode mode = GET_MODE (x);
7170 *total = thumb1_rtx_costs (x, code, outer_code);
7174 /* ??? should thumb2 use different costs? */
7178 /* There is no point basing this on the tuning, since it is always the
7179 fast variant if it exists at all. */
7181 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7182 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7183 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7185 *total = COSTS_N_INSNS(2);
7192 *total = COSTS_N_INSNS (5);
7196 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7198 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7199 & (unsigned HOST_WIDE_INT) 0xffffffff);
7200 int cost, const_ok = const_ok_for_arm (i);
7201 int j, booth_unit_size;
7203 /* Tune as appropriate. */
7204 cost = const_ok ? 4 : 8;
7205 booth_unit_size = 8;
7206 for (j = 0; i && j < 32; j += booth_unit_size)
7208 i >>= booth_unit_size;
7212 *total = COSTS_N_INSNS(cost);
7218 *total = COSTS_N_INSNS (4);
7222 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7224 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7226 *total = COSTS_N_INSNS (1);
7231 /* Requires a lib call */
7232 *total = COSTS_N_INSNS (20);
7236 return arm_rtx_costs_1 (x, outer_code, total, speed);
7241 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7242 so it can be ignored. */
7245 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7247 enum machine_mode mode = GET_MODE (x);
7251 *total = thumb1_rtx_costs (x, code, outer_code);
7258 if (GET_CODE (XEXP (x, 0)) != MULT)
7259 return arm_rtx_costs_1 (x, outer_code, total, speed);
7261 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7262 will stall until the multiplication is complete. */
7263 *total = COSTS_N_INSNS (3);
7267 /* There is no point basing this on the tuning, since it is always the
7268 fast variant if it exists at all. */
7270 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7271 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7272 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7274 *total = COSTS_N_INSNS (2);
7281 *total = COSTS_N_INSNS (5);
7285 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7287 /* If operand 1 is a constant we can more accurately
7288 calculate the cost of the multiply. The multiplier can
7289 retire 15 bits on the first cycle and a further 12 on the
7290 second. We do, of course, have to load the constant into
7291 a register first. */
7292 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7293 /* There's a general overhead of one cycle. */
7295 unsigned HOST_WIDE_INT masked_const;
7300 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7302 masked_const = i & 0xffff8000;
7303 if (masked_const != 0)
7306 masked_const = i & 0xf8000000;
7307 if (masked_const != 0)
7310 *total = COSTS_N_INSNS (cost);
7316 *total = COSTS_N_INSNS (3);
7320 /* Requires a lib call */
7321 *total = COSTS_N_INSNS (20);
7325 return arm_rtx_costs_1 (x, outer_code, total, speed);
7330 /* RTX costs for 9e (and later) cores. */
7333 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7334 int *total, bool speed)
7336 enum machine_mode mode = GET_MODE (x);
7343 *total = COSTS_N_INSNS (3);
7347 *total = thumb1_rtx_costs (x, code, outer_code);
7355 /* There is no point basing this on the tuning, since it is always the
7356 fast variant if it exists at all. */
7358 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7359 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7360 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7362 *total = COSTS_N_INSNS (2);
7369 *total = COSTS_N_INSNS (5);
7375 *total = COSTS_N_INSNS (2);
7379 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7381 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7383 *total = COSTS_N_INSNS (1);
7388 *total = COSTS_N_INSNS (20);
7392 return arm_rtx_costs_1 (x, outer_code, total, speed);
7395 /* All address computations that can be done are free, but rtx cost returns
7396 the same for practically all of them. So we weight the different types
7397 of address here in the order (most pref first):
7398 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7400 arm_arm_address_cost (rtx x)
7402 enum rtx_code c = GET_CODE (x);
7404 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7406 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7411 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7414 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7424 arm_thumb_address_cost (rtx x)
7426 enum rtx_code c = GET_CODE (x);
7431 && GET_CODE (XEXP (x, 0)) == REG
7432 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7439 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7441 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7445 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7449 /* Some true dependencies can have a higher cost depending
7450 on precisely how certain input operands are used. */
7452 && REG_NOTE_KIND (link) == 0
7453 && recog_memoized (insn) >= 0
7454 && recog_memoized (dep) >= 0)
7456 int shift_opnum = get_attr_shift (insn);
7457 enum attr_type attr_type = get_attr_type (dep);
7459 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7460 operand for INSN. If we have a shifted input operand and the
7461 instruction we depend on is another ALU instruction, then we may
7462 have to account for an additional stall. */
7463 if (shift_opnum != 0
7464 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7466 rtx shifted_operand;
7469 /* Get the shifted operand. */
7470 extract_insn (insn);
7471 shifted_operand = recog_data.operand[shift_opnum];
7473 /* Iterate over all the operands in DEP. If we write an operand
7474 that overlaps with SHIFTED_OPERAND, then we have increase the
7475 cost of this dependency. */
7477 preprocess_constraints ();
7478 for (opno = 0; opno < recog_data.n_operands; opno++)
7480 /* We can ignore strict inputs. */
7481 if (recog_data.operand_type[opno] == OP_IN)
7484 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7491 /* XXX This is not strictly true for the FPA. */
7492 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7493 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7496 /* Call insns don't incur a stall, even if they follow a load. */
7497 if (REG_NOTE_KIND (link) == 0
7498 && GET_CODE (insn) == CALL_INSN)
7501 if ((i_pat = single_set (insn)) != NULL
7502 && GET_CODE (SET_SRC (i_pat)) == MEM
7503 && (d_pat = single_set (dep)) != NULL
7504 && GET_CODE (SET_DEST (d_pat)) == MEM)
7506 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7507 /* This is a load after a store, there is no conflict if the load reads
7508 from a cached area. Assume that loads from the stack, and from the
7509 constant pool are cached, and that others will miss. This is a
7512 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7513 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7514 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7515 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7522 static int fp_consts_inited = 0;
7524 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7525 static const char * const strings_fp[8] =
7528 "4", "5", "0.5", "10"
7531 static REAL_VALUE_TYPE values_fp[8];
7534 init_fp_table (void)
7540 fp_consts_inited = 1;
7542 fp_consts_inited = 8;
7544 for (i = 0; i < fp_consts_inited; i++)
7546 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7551 /* Return TRUE if rtx X is a valid immediate FP constant. */
7553 arm_const_double_rtx (rtx x)
7558 if (!fp_consts_inited)
7561 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7562 if (REAL_VALUE_MINUS_ZERO (r))
7565 for (i = 0; i < fp_consts_inited; i++)
7566 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7572 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7574 neg_const_double_rtx_ok_for_fpa (rtx x)
7579 if (!fp_consts_inited)
7582 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7583 r = REAL_VALUE_NEGATE (r);
7584 if (REAL_VALUE_MINUS_ZERO (r))
7587 for (i = 0; i < 8; i++)
7588 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7595 /* VFPv3 has a fairly wide range of representable immediates, formed from
7596 "quarter-precision" floating-point values. These can be evaluated using this
7597 formula (with ^ for exponentiation):
7601 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7602 16 <= n <= 31 and 0 <= r <= 7.
7604 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7606 - A (most-significant) is the sign bit.
7607 - BCD are the exponent (encoded as r XOR 3).
7608 - EFGH are the mantissa (encoded as n - 16).
7611 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7612 fconst[sd] instruction, or -1 if X isn't suitable. */
7614 vfp3_const_double_index (rtx x)
7616 REAL_VALUE_TYPE r, m;
7618 unsigned HOST_WIDE_INT mantissa, mant_hi;
7619 unsigned HOST_WIDE_INT mask;
7620 HOST_WIDE_INT m1, m2;
7621 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7623 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7626 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7628 /* We can't represent these things, so detect them first. */
7629 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7632 /* Extract sign, exponent and mantissa. */
7633 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7634 r = REAL_VALUE_ABS (r);
7635 exponent = REAL_EXP (&r);
7636 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7637 highest (sign) bit, with a fixed binary point at bit point_pos.
7638 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7639 bits for the mantissa, this may fail (low bits would be lost). */
7640 real_ldexp (&m, &r, point_pos - exponent);
7641 REAL_VALUE_TO_INT (&m1, &m2, m);
7645 /* If there are bits set in the low part of the mantissa, we can't
7646 represent this value. */
7650 /* Now make it so that mantissa contains the most-significant bits, and move
7651 the point_pos to indicate that the least-significant bits have been
7653 point_pos -= HOST_BITS_PER_WIDE_INT;
7656 /* We can permit four significant bits of mantissa only, plus a high bit
7657 which is always 1. */
7658 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7659 if ((mantissa & mask) != 0)
7662 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7663 mantissa >>= point_pos - 5;
7665 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7666 floating-point immediate zero with Neon using an integer-zero load, but
7667 that case is handled elsewhere.) */
7671 gcc_assert (mantissa >= 16 && mantissa <= 31);
7673 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7674 normalized significands are in the range [1, 2). (Our mantissa is shifted
7675 left 4 places at this point relative to normalized IEEE754 values). GCC
7676 internally uses [0.5, 1) (see real.c), so the exponent returned from
7677 REAL_EXP must be altered. */
7678 exponent = 5 - exponent;
7680 if (exponent < 0 || exponent > 7)
7683 /* Sign, mantissa and exponent are now in the correct form to plug into the
7684 formula described in the comment above. */
7685 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7688 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7690 vfp3_const_double_rtx (rtx x)
7695 return vfp3_const_double_index (x) != -1;
7698 /* Recognize immediates which can be used in various Neon instructions. Legal
7699 immediates are described by the following table (for VMVN variants, the
7700 bitwise inverse of the constant shown is recognized. In either case, VMOV
7701 is output and the correct instruction to use for a given constant is chosen
7702 by the assembler). The constant shown is replicated across all elements of
7703 the destination vector.
7705 insn elems variant constant (binary)
7706 ---- ----- ------- -----------------
7707 vmov i32 0 00000000 00000000 00000000 abcdefgh
7708 vmov i32 1 00000000 00000000 abcdefgh 00000000
7709 vmov i32 2 00000000 abcdefgh 00000000 00000000
7710 vmov i32 3 abcdefgh 00000000 00000000 00000000
7711 vmov i16 4 00000000 abcdefgh
7712 vmov i16 5 abcdefgh 00000000
7713 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7714 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7715 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7716 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7717 vmvn i16 10 00000000 abcdefgh
7718 vmvn i16 11 abcdefgh 00000000
7719 vmov i32 12 00000000 00000000 abcdefgh 11111111
7720 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7721 vmov i32 14 00000000 abcdefgh 11111111 11111111
7722 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7724 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7725 eeeeeeee ffffffff gggggggg hhhhhhhh
7726 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7728 For case 18, B = !b. Representable values are exactly those accepted by
7729 vfp3_const_double_index, but are output as floating-point numbers rather
7732 Variants 0-5 (inclusive) may also be used as immediates for the second
7733 operand of VORR/VBIC instructions.
7735 The INVERSE argument causes the bitwise inverse of the given operand to be
7736 recognized instead (used for recognizing legal immediates for the VAND/VORN
7737 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7738 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7739 output, rather than the real insns vbic/vorr).
7741 INVERSE makes no difference to the recognition of float vectors.
7743 The return value is the variant of immediate as shown in the above table, or
7744 -1 if the given value doesn't match any of the listed patterns.
7747 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7748 rtx *modconst, int *elementwidth)
7750 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7752 for (i = 0; i < idx; i += (STRIDE)) \
7757 immtype = (CLASS); \
7758 elsize = (ELSIZE); \
7762 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7763 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7764 unsigned char bytes[16];
7765 int immtype = -1, matches;
7766 unsigned int invmask = inverse ? 0xff : 0;
7768 /* Vectors of float constants. */
7769 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7771 rtx el0 = CONST_VECTOR_ELT (op, 0);
7774 if (!vfp3_const_double_rtx (el0))
7777 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7779 for (i = 1; i < n_elts; i++)
7781 rtx elt = CONST_VECTOR_ELT (op, i);
7784 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7786 if (!REAL_VALUES_EQUAL (r0, re))
7791 *modconst = CONST_VECTOR_ELT (op, 0);
7799 /* Splat vector constant out into a byte vector. */
7800 for (i = 0; i < n_elts; i++)
7802 rtx el = CONST_VECTOR_ELT (op, i);
7803 unsigned HOST_WIDE_INT elpart;
7804 unsigned int part, parts;
7806 if (GET_CODE (el) == CONST_INT)
7808 elpart = INTVAL (el);
7811 else if (GET_CODE (el) == CONST_DOUBLE)
7813 elpart = CONST_DOUBLE_LOW (el);
7819 for (part = 0; part < parts; part++)
7822 for (byte = 0; byte < innersize; byte++)
7824 bytes[idx++] = (elpart & 0xff) ^ invmask;
7825 elpart >>= BITS_PER_UNIT;
7827 if (GET_CODE (el) == CONST_DOUBLE)
7828 elpart = CONST_DOUBLE_HIGH (el);
7833 gcc_assert (idx == GET_MODE_SIZE (mode));
7837 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7838 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7840 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7841 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7843 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7844 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7846 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7847 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7849 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7851 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7853 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7854 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7856 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7857 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7859 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7860 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7862 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7863 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7865 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7867 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7869 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7870 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7872 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7873 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7875 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7876 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7878 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7879 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7881 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7883 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7884 && bytes[i] == bytes[(i + 8) % idx]);
7892 *elementwidth = elsize;
7896 unsigned HOST_WIDE_INT imm = 0;
7898 /* Un-invert bytes of recognized vector, if necessary. */
7900 for (i = 0; i < idx; i++)
7901 bytes[i] ^= invmask;
7905 /* FIXME: Broken on 32-bit H_W_I hosts. */
7906 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7908 for (i = 0; i < 8; i++)
7909 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7910 << (i * BITS_PER_UNIT);
7912 *modconst = GEN_INT (imm);
7916 unsigned HOST_WIDE_INT imm = 0;
7918 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7919 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7921 *modconst = GEN_INT (imm);
7929 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7930 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7931 float elements), and a modified constant (whatever should be output for a
7932 VMOV) in *MODCONST. */
7935 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7936 rtx *modconst, int *elementwidth)
7940 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7946 *modconst = tmpconst;
7949 *elementwidth = tmpwidth;
7954 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
7955 the immediate is valid, write a constant suitable for using as an operand
7956 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
7957 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
7960 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
7961 rtx *modconst, int *elementwidth)
7965 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
7967 if (retval < 0 || retval > 5)
7971 *modconst = tmpconst;
7974 *elementwidth = tmpwidth;
7979 /* Return a string suitable for output of Neon immediate logic operation
7983 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
7984 int inverse, int quad)
7986 int width, is_valid;
7987 static char templ[40];
7989 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
7991 gcc_assert (is_valid != 0);
7994 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
7996 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8001 /* Output a sequence of pairwise operations to implement a reduction.
8002 NOTE: We do "too much work" here, because pairwise operations work on two
8003 registers-worth of operands in one go. Unfortunately we can't exploit those
8004 extra calculations to do the full operation in fewer steps, I don't think.
8005 Although all vector elements of the result but the first are ignored, we
8006 actually calculate the same result in each of the elements. An alternative
8007 such as initially loading a vector with zero to use as each of the second
8008 operands would use up an additional register and take an extra instruction,
8009 for no particular gain. */
8012 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8013 rtx (*reduc) (rtx, rtx, rtx))
8015 enum machine_mode inner = GET_MODE_INNER (mode);
8016 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8019 for (i = parts / 2; i >= 1; i /= 2)
8021 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8022 emit_insn (reduc (dest, tmpsum, tmpsum));
8027 /* Initialize a vector with non-constant elements. FIXME: We can do better
8028 than the current implementation (building a vector on the stack and then
8029 loading it) in many cases. See rs6000.c. */
8032 neon_expand_vector_init (rtx target, rtx vals)
8034 enum machine_mode mode = GET_MODE (target);
8035 enum machine_mode inner = GET_MODE_INNER (mode);
8036 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
8039 gcc_assert (VECTOR_MODE_P (mode));
8041 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8042 for (i = 0; i < n_elts; i++)
8043 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
8044 XVECEXP (vals, 0, i));
8046 emit_move_insn (target, mem);
8049 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8050 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8051 reported source locations are bogus. */
8054 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8059 gcc_assert (GET_CODE (operand) == CONST_INT);
8061 lane = INTVAL (operand);
8063 if (lane < low || lane >= high)
8067 /* Bounds-check lanes. */
8070 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8072 bounds_check (operand, low, high, "lane out of range");
8075 /* Bounds-check constants. */
8078 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8080 bounds_check (operand, low, high, "constant out of range");
8084 neon_element_bits (enum machine_mode mode)
8087 return GET_MODE_BITSIZE (mode);
8089 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8093 /* Predicates for `match_operand' and `match_operator'. */
8095 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8097 cirrus_memory_offset (rtx op)
8099 /* Reject eliminable registers. */
8100 if (! (reload_in_progress || reload_completed)
8101 && ( reg_mentioned_p (frame_pointer_rtx, op)
8102 || reg_mentioned_p (arg_pointer_rtx, op)
8103 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8104 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8105 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8106 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8109 if (GET_CODE (op) == MEM)
8115 /* Match: (mem (reg)). */
8116 if (GET_CODE (ind) == REG)
8122 if (GET_CODE (ind) == PLUS
8123 && GET_CODE (XEXP (ind, 0)) == REG
8124 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8125 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8132 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8133 WB is true if full writeback address modes are allowed and is false
8134 if limited writeback address modes (POST_INC and PRE_DEC) are
8138 arm_coproc_mem_operand (rtx op, bool wb)
8142 /* Reject eliminable registers. */
8143 if (! (reload_in_progress || reload_completed)
8144 && ( reg_mentioned_p (frame_pointer_rtx, op)
8145 || reg_mentioned_p (arg_pointer_rtx, op)
8146 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8147 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8148 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8149 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8152 /* Constants are converted into offsets from labels. */
8153 if (GET_CODE (op) != MEM)
8158 if (reload_completed
8159 && (GET_CODE (ind) == LABEL_REF
8160 || (GET_CODE (ind) == CONST
8161 && GET_CODE (XEXP (ind, 0)) == PLUS
8162 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8163 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8166 /* Match: (mem (reg)). */
8167 if (GET_CODE (ind) == REG)
8168 return arm_address_register_rtx_p (ind, 0);
8170 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8171 acceptable in any case (subject to verification by
8172 arm_address_register_rtx_p). We need WB to be true to accept
8173 PRE_INC and POST_DEC. */
8174 if (GET_CODE (ind) == POST_INC
8175 || GET_CODE (ind) == PRE_DEC
8177 && (GET_CODE (ind) == PRE_INC
8178 || GET_CODE (ind) == POST_DEC)))
8179 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8182 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8183 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8184 && GET_CODE (XEXP (ind, 1)) == PLUS
8185 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8186 ind = XEXP (ind, 1);
8191 if (GET_CODE (ind) == PLUS
8192 && GET_CODE (XEXP (ind, 0)) == REG
8193 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8194 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8195 && INTVAL (XEXP (ind, 1)) > -1024
8196 && INTVAL (XEXP (ind, 1)) < 1024
8197 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8203 /* Return TRUE if OP is a memory operand which we can load or store a vector
8204 to/from. TYPE is one of the following values:
8205 0 - Vector load/stor (vldr)
8206 1 - Core registers (ldm)
8207 2 - Element/structure loads (vld1)
8210 neon_vector_mem_operand (rtx op, int type)
8214 /* Reject eliminable registers. */
8215 if (! (reload_in_progress || reload_completed)
8216 && ( reg_mentioned_p (frame_pointer_rtx, op)
8217 || reg_mentioned_p (arg_pointer_rtx, op)
8218 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8219 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8220 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8221 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8224 /* Constants are converted into offsets from labels. */
8225 if (GET_CODE (op) != MEM)
8230 if (reload_completed
8231 && (GET_CODE (ind) == LABEL_REF
8232 || (GET_CODE (ind) == CONST
8233 && GET_CODE (XEXP (ind, 0)) == PLUS
8234 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8235 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8238 /* Match: (mem (reg)). */
8239 if (GET_CODE (ind) == REG)
8240 return arm_address_register_rtx_p (ind, 0);
8242 /* Allow post-increment with Neon registers. */
8243 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8244 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8246 /* FIXME: vld1 allows register post-modify. */
8252 && GET_CODE (ind) == PLUS
8253 && GET_CODE (XEXP (ind, 0)) == REG
8254 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8255 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8256 && INTVAL (XEXP (ind, 1)) > -1024
8257 && INTVAL (XEXP (ind, 1)) < 1016
8258 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8264 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8267 neon_struct_mem_operand (rtx op)
8271 /* Reject eliminable registers. */
8272 if (! (reload_in_progress || reload_completed)
8273 && ( reg_mentioned_p (frame_pointer_rtx, op)
8274 || reg_mentioned_p (arg_pointer_rtx, op)
8275 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8276 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8277 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8278 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8281 /* Constants are converted into offsets from labels. */
8282 if (GET_CODE (op) != MEM)
8287 if (reload_completed
8288 && (GET_CODE (ind) == LABEL_REF
8289 || (GET_CODE (ind) == CONST
8290 && GET_CODE (XEXP (ind, 0)) == PLUS
8291 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8292 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8295 /* Match: (mem (reg)). */
8296 if (GET_CODE (ind) == REG)
8297 return arm_address_register_rtx_p (ind, 0);
8302 /* Return true if X is a register that will be eliminated later on. */
8304 arm_eliminable_register (rtx x)
8306 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8307 || REGNO (x) == ARG_POINTER_REGNUM
8308 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8309 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8312 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8313 coprocessor registers. Otherwise return NO_REGS. */
8316 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8320 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8322 return GENERAL_REGS;
8326 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8327 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8328 && neon_vector_mem_operand (x, 0))
8331 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8334 return GENERAL_REGS;
8337 /* Values which must be returned in the most-significant end of the return
8341 arm_return_in_msb (const_tree valtype)
8343 return (TARGET_AAPCS_BASED
8345 && (AGGREGATE_TYPE_P (valtype)
8346 || TREE_CODE (valtype) == COMPLEX_TYPE));
8349 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8350 Use by the Cirrus Maverick code which has to workaround
8351 a hardware bug triggered by such instructions. */
8353 arm_memory_load_p (rtx insn)
8355 rtx body, lhs, rhs;;
8357 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8360 body = PATTERN (insn);
8362 if (GET_CODE (body) != SET)
8365 lhs = XEXP (body, 0);
8366 rhs = XEXP (body, 1);
8368 lhs = REG_OR_SUBREG_RTX (lhs);
8370 /* If the destination is not a general purpose
8371 register we do not have to worry. */
8372 if (GET_CODE (lhs) != REG
8373 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8376 /* As well as loads from memory we also have to react
8377 to loads of invalid constants which will be turned
8378 into loads from the minipool. */
8379 return (GET_CODE (rhs) == MEM
8380 || GET_CODE (rhs) == SYMBOL_REF
8381 || note_invalid_constants (insn, -1, false));
8384 /* Return TRUE if INSN is a Cirrus instruction. */
8386 arm_cirrus_insn_p (rtx insn)
8388 enum attr_cirrus attr;
8390 /* get_attr cannot accept USE or CLOBBER. */
8392 || GET_CODE (insn) != INSN
8393 || GET_CODE (PATTERN (insn)) == USE
8394 || GET_CODE (PATTERN (insn)) == CLOBBER)
8397 attr = get_attr_cirrus (insn);
8399 return attr != CIRRUS_NOT;
8402 /* Cirrus reorg for invalid instruction combinations. */
8404 cirrus_reorg (rtx first)
8406 enum attr_cirrus attr;
8407 rtx body = PATTERN (first);
8411 /* Any branch must be followed by 2 non Cirrus instructions. */
8412 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8415 t = next_nonnote_insn (first);
8417 if (arm_cirrus_insn_p (t))
8420 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8424 emit_insn_after (gen_nop (), first);
8429 /* (float (blah)) is in parallel with a clobber. */
8430 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8431 body = XVECEXP (body, 0, 0);
8433 if (GET_CODE (body) == SET)
8435 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8437 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8438 be followed by a non Cirrus insn. */
8439 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8441 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8442 emit_insn_after (gen_nop (), first);
8446 else if (arm_memory_load_p (first))
8448 unsigned int arm_regno;
8450 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8451 ldr/cfmv64hr combination where the Rd field is the same
8452 in both instructions must be split with a non Cirrus
8459 /* Get Arm register number for ldr insn. */
8460 if (GET_CODE (lhs) == REG)
8461 arm_regno = REGNO (lhs);
8464 gcc_assert (GET_CODE (rhs) == REG);
8465 arm_regno = REGNO (rhs);
8469 first = next_nonnote_insn (first);
8471 if (! arm_cirrus_insn_p (first))
8474 body = PATTERN (first);
8476 /* (float (blah)) is in parallel with a clobber. */
8477 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8478 body = XVECEXP (body, 0, 0);
8480 if (GET_CODE (body) == FLOAT)
8481 body = XEXP (body, 0);
8483 if (get_attr_cirrus (first) == CIRRUS_MOVE
8484 && GET_CODE (XEXP (body, 1)) == REG
8485 && arm_regno == REGNO (XEXP (body, 1)))
8486 emit_insn_after (gen_nop (), first);
8492 /* get_attr cannot accept USE or CLOBBER. */
8494 || GET_CODE (first) != INSN
8495 || GET_CODE (PATTERN (first)) == USE
8496 || GET_CODE (PATTERN (first)) == CLOBBER)
8499 attr = get_attr_cirrus (first);
8501 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8502 must be followed by a non-coprocessor instruction. */
8503 if (attr == CIRRUS_COMPARE)
8507 t = next_nonnote_insn (first);
8509 if (arm_cirrus_insn_p (t))
8512 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8516 emit_insn_after (gen_nop (), first);
8522 /* Return TRUE if X references a SYMBOL_REF. */
8524 symbol_mentioned_p (rtx x)
8529 if (GET_CODE (x) == SYMBOL_REF)
8532 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8533 are constant offsets, not symbols. */
8534 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8537 fmt = GET_RTX_FORMAT (GET_CODE (x));
8539 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8545 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8546 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8549 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8556 /* Return TRUE if X references a LABEL_REF. */
8558 label_mentioned_p (rtx x)
8563 if (GET_CODE (x) == LABEL_REF)
8566 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8567 instruction, but they are constant offsets, not symbols. */
8568 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8571 fmt = GET_RTX_FORMAT (GET_CODE (x));
8572 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8578 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8579 if (label_mentioned_p (XVECEXP (x, i, j)))
8582 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8590 tls_mentioned_p (rtx x)
8592 switch (GET_CODE (x))
8595 return tls_mentioned_p (XEXP (x, 0));
8598 if (XINT (x, 1) == UNSPEC_TLS)
8606 /* Must not copy a SET whose source operand is PC-relative. */
8609 arm_cannot_copy_insn_p (rtx insn)
8611 rtx pat = PATTERN (insn);
8613 if (GET_CODE (pat) == SET)
8615 rtx rhs = SET_SRC (pat);
8617 if (GET_CODE (rhs) == UNSPEC
8618 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8621 if (GET_CODE (rhs) == MEM
8622 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8623 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8633 enum rtx_code code = GET_CODE (x);
8650 /* Return 1 if memory locations are adjacent. */
8652 adjacent_mem_locations (rtx a, rtx b)
8654 /* We don't guarantee to preserve the order of these memory refs. */
8655 if (volatile_refs_p (a) || volatile_refs_p (b))
8658 if ((GET_CODE (XEXP (a, 0)) == REG
8659 || (GET_CODE (XEXP (a, 0)) == PLUS
8660 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8661 && (GET_CODE (XEXP (b, 0)) == REG
8662 || (GET_CODE (XEXP (b, 0)) == PLUS
8663 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8665 HOST_WIDE_INT val0 = 0, val1 = 0;
8669 if (GET_CODE (XEXP (a, 0)) == PLUS)
8671 reg0 = XEXP (XEXP (a, 0), 0);
8672 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8677 if (GET_CODE (XEXP (b, 0)) == PLUS)
8679 reg1 = XEXP (XEXP (b, 0), 0);
8680 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8685 /* Don't accept any offset that will require multiple
8686 instructions to handle, since this would cause the
8687 arith_adjacentmem pattern to output an overlong sequence. */
8688 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8691 /* Don't allow an eliminable register: register elimination can make
8692 the offset too large. */
8693 if (arm_eliminable_register (reg0))
8696 val_diff = val1 - val0;
8700 /* If the target has load delay slots, then there's no benefit
8701 to using an ldm instruction unless the offset is zero and
8702 we are optimizing for size. */
8703 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8704 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8705 && (val_diff == 4 || val_diff == -4));
8708 return ((REGNO (reg0) == REGNO (reg1))
8709 && (val_diff == 4 || val_diff == -4));
8716 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8717 HOST_WIDE_INT *load_offset)
8719 int unsorted_regs[4];
8720 HOST_WIDE_INT unsorted_offsets[4];
8725 /* Can only handle 2, 3, or 4 insns at present,
8726 though could be easily extended if required. */
8727 gcc_assert (nops >= 2 && nops <= 4);
8729 memset (order, 0, 4 * sizeof (int));
8731 /* Loop over the operands and check that the memory references are
8732 suitable (i.e. immediate offsets from the same base register). At
8733 the same time, extract the target register, and the memory
8735 for (i = 0; i < nops; i++)
8740 /* Convert a subreg of a mem into the mem itself. */
8741 if (GET_CODE (operands[nops + i]) == SUBREG)
8742 operands[nops + i] = alter_subreg (operands + (nops + i));
8744 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8746 /* Don't reorder volatile memory references; it doesn't seem worth
8747 looking for the case where the order is ok anyway. */
8748 if (MEM_VOLATILE_P (operands[nops + i]))
8751 offset = const0_rtx;
8753 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8754 || (GET_CODE (reg) == SUBREG
8755 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8756 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8757 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8759 || (GET_CODE (reg) == SUBREG
8760 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8761 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8766 base_reg = REGNO (reg);
8767 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8768 ? REGNO (operands[i])
8769 : REGNO (SUBREG_REG (operands[i])));
8774 if (base_reg != (int) REGNO (reg))
8775 /* Not addressed from the same base register. */
8778 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8779 ? REGNO (operands[i])
8780 : REGNO (SUBREG_REG (operands[i])));
8781 if (unsorted_regs[i] < unsorted_regs[order[0]])
8785 /* If it isn't an integer register, or if it overwrites the
8786 base register but isn't the last insn in the list, then
8787 we can't do this. */
8788 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8789 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8792 unsorted_offsets[i] = INTVAL (offset);
8795 /* Not a suitable memory address. */
8799 /* All the useful information has now been extracted from the
8800 operands into unsorted_regs and unsorted_offsets; additionally,
8801 order[0] has been set to the lowest numbered register in the
8802 list. Sort the registers into order, and check that the memory
8803 offsets are ascending and adjacent. */
8805 for (i = 1; i < nops; i++)
8809 order[i] = order[i - 1];
8810 for (j = 0; j < nops; j++)
8811 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
8812 && (order[i] == order[i - 1]
8813 || unsorted_regs[j] < unsorted_regs[order[i]]))
8816 /* Have we found a suitable register? if not, one must be used more
8818 if (order[i] == order[i - 1])
8821 /* Is the memory address adjacent and ascending? */
8822 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
8830 for (i = 0; i < nops; i++)
8831 regs[i] = unsorted_regs[order[i]];
8833 *load_offset = unsorted_offsets[order[0]];
8836 if (unsorted_offsets[order[0]] == 0)
8837 return 1; /* ldmia */
8839 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
8840 return 2; /* ldmib */
8842 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
8843 return 3; /* ldmda */
8845 if (unsorted_offsets[order[nops - 1]] == -4)
8846 return 4; /* ldmdb */
8848 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
8849 if the offset isn't small enough. The reason 2 ldrs are faster
8850 is because these ARMs are able to do more than one cache access
8851 in a single cycle. The ARM9 and StrongARM have Harvard caches,
8852 whilst the ARM8 has a double bandwidth cache. This means that
8853 these cores can do both an instruction fetch and a data fetch in
8854 a single cycle, so the trick of calculating the address into a
8855 scratch register (one of the result regs) and then doing a load
8856 multiple actually becomes slower (and no smaller in code size).
8857 That is the transformation
8859 ldr rd1, [rbase + offset]
8860 ldr rd2, [rbase + offset + 4]
8864 add rd1, rbase, offset
8865 ldmia rd1, {rd1, rd2}
8867 produces worse code -- '3 cycles + any stalls on rd2' instead of
8868 '2 cycles + any stalls on rd2'. On ARMs with only one cache
8869 access per cycle, the first sequence could never complete in less
8870 than 6 cycles, whereas the ldm sequence would only take 5 and
8871 would make better use of sequential accesses if not hitting the
8874 We cheat here and test 'arm_ld_sched' which we currently know to
8875 only be true for the ARM8, ARM9 and StrongARM. If this ever
8876 changes, then the test below needs to be reworked. */
8877 if (nops == 2 && arm_ld_sched)
8880 /* Can't do it without setting up the offset, only do this if it takes
8881 no more than one insn. */
8882 return (const_ok_for_arm (unsorted_offsets[order[0]])
8883 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
8887 emit_ldm_seq (rtx *operands, int nops)
8891 HOST_WIDE_INT offset;
8895 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8898 strcpy (buf, "ldm%(ia%)\t");
8902 strcpy (buf, "ldm%(ib%)\t");
8906 strcpy (buf, "ldm%(da%)\t");
8910 strcpy (buf, "ldm%(db%)\t");
8915 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8916 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8919 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8920 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8922 output_asm_insn (buf, operands);
8924 strcpy (buf, "ldm%(ia%)\t");
8931 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8932 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8934 for (i = 1; i < nops; i++)
8935 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8936 reg_names[regs[i]]);
8938 strcat (buf, "}\t%@ phole ldm");
8940 output_asm_insn (buf, operands);
8945 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8946 HOST_WIDE_INT * load_offset)
8948 int unsorted_regs[4];
8949 HOST_WIDE_INT unsorted_offsets[4];
8954 /* Can only handle 2, 3, or 4 insns at present, though could be easily
8955 extended if required. */
8956 gcc_assert (nops >= 2 && nops <= 4);
8958 memset (order, 0, 4 * sizeof (int));
8960 /* Loop over the operands and check that the memory references are
8961 suitable (i.e. immediate offsets from the same base register). At
8962 the same time, extract the target register, and the memory
8964 for (i = 0; i < nops; i++)
8969 /* Convert a subreg of a mem into the mem itself. */
8970 if (GET_CODE (operands[nops + i]) == SUBREG)
8971 operands[nops + i] = alter_subreg (operands + (nops + i));
8973 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8975 /* Don't reorder volatile memory references; it doesn't seem worth
8976 looking for the case where the order is ok anyway. */
8977 if (MEM_VOLATILE_P (operands[nops + i]))
8980 offset = const0_rtx;
8982 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8983 || (GET_CODE (reg) == SUBREG
8984 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8985 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8986 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8988 || (GET_CODE (reg) == SUBREG
8989 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8990 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8995 base_reg = REGNO (reg);
8996 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8997 ? REGNO (operands[i])
8998 : REGNO (SUBREG_REG (operands[i])));
9003 if (base_reg != (int) REGNO (reg))
9004 /* Not addressed from the same base register. */
9007 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9008 ? REGNO (operands[i])
9009 : REGNO (SUBREG_REG (operands[i])));
9010 if (unsorted_regs[i] < unsorted_regs[order[0]])
9014 /* If it isn't an integer register, then we can't do this. */
9015 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9018 unsorted_offsets[i] = INTVAL (offset);
9021 /* Not a suitable memory address. */
9025 /* All the useful information has now been extracted from the
9026 operands into unsorted_regs and unsorted_offsets; additionally,
9027 order[0] has been set to the lowest numbered register in the
9028 list. Sort the registers into order, and check that the memory
9029 offsets are ascending and adjacent. */
9031 for (i = 1; i < nops; i++)
9035 order[i] = order[i - 1];
9036 for (j = 0; j < nops; j++)
9037 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9038 && (order[i] == order[i - 1]
9039 || unsorted_regs[j] < unsorted_regs[order[i]]))
9042 /* Have we found a suitable register? if not, one must be used more
9044 if (order[i] == order[i - 1])
9047 /* Is the memory address adjacent and ascending? */
9048 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9056 for (i = 0; i < nops; i++)
9057 regs[i] = unsorted_regs[order[i]];
9059 *load_offset = unsorted_offsets[order[0]];
9062 if (unsorted_offsets[order[0]] == 0)
9063 return 1; /* stmia */
9065 if (unsorted_offsets[order[0]] == 4)
9066 return 2; /* stmib */
9068 if (unsorted_offsets[order[nops - 1]] == 0)
9069 return 3; /* stmda */
9071 if (unsorted_offsets[order[nops - 1]] == -4)
9072 return 4; /* stmdb */
9078 emit_stm_seq (rtx *operands, int nops)
9082 HOST_WIDE_INT offset;
9086 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9089 strcpy (buf, "stm%(ia%)\t");
9093 strcpy (buf, "stm%(ib%)\t");
9097 strcpy (buf, "stm%(da%)\t");
9101 strcpy (buf, "stm%(db%)\t");
9108 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9109 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9111 for (i = 1; i < nops; i++)
9112 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9113 reg_names[regs[i]]);
9115 strcat (buf, "}\t%@ phole stm");
9117 output_asm_insn (buf, operands);
9121 /* Routines for use in generating RTL. */
9124 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9125 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9127 HOST_WIDE_INT offset = *offsetp;
9130 int sign = up ? 1 : -1;
9133 /* XScale has load-store double instructions, but they have stricter
9134 alignment requirements than load-store multiple, so we cannot
9137 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9138 the pipeline until completion.
9146 An ldr instruction takes 1-3 cycles, but does not block the
9155 Best case ldr will always win. However, the more ldr instructions
9156 we issue, the less likely we are to be able to schedule them well.
9157 Using ldr instructions also increases code size.
9159 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9160 for counts of 3 or 4 regs. */
9161 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9167 for (i = 0; i < count; i++)
9169 addr = plus_constant (from, i * 4 * sign);
9170 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9171 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9177 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9187 result = gen_rtx_PARALLEL (VOIDmode,
9188 rtvec_alloc (count + (write_back ? 1 : 0)));
9191 XVECEXP (result, 0, 0)
9192 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9197 for (j = 0; i < count; i++, j++)
9199 addr = plus_constant (from, j * 4 * sign);
9200 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9201 XVECEXP (result, 0, i)
9202 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9213 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9214 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9216 HOST_WIDE_INT offset = *offsetp;
9219 int sign = up ? 1 : -1;
9222 /* See arm_gen_load_multiple for discussion of
9223 the pros/cons of ldm/stm usage for XScale. */
9224 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9230 for (i = 0; i < count; i++)
9232 addr = plus_constant (to, i * 4 * sign);
9233 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9234 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9240 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9250 result = gen_rtx_PARALLEL (VOIDmode,
9251 rtvec_alloc (count + (write_back ? 1 : 0)));
9254 XVECEXP (result, 0, 0)
9255 = gen_rtx_SET (VOIDmode, to,
9256 plus_constant (to, count * 4 * sign));
9261 for (j = 0; i < count; i++, j++)
9263 addr = plus_constant (to, j * 4 * sign);
9264 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9265 XVECEXP (result, 0, i)
9266 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9277 arm_gen_movmemqi (rtx *operands)
9279 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9280 HOST_WIDE_INT srcoffset, dstoffset;
9282 rtx src, dst, srcbase, dstbase;
9283 rtx part_bytes_reg = NULL;
9286 if (GET_CODE (operands[2]) != CONST_INT
9287 || GET_CODE (operands[3]) != CONST_INT
9288 || INTVAL (operands[2]) > 64
9289 || INTVAL (operands[3]) & 3)
9292 dstbase = operands[0];
9293 srcbase = operands[1];
9295 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9296 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9298 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9299 out_words_to_go = INTVAL (operands[2]) / 4;
9300 last_bytes = INTVAL (operands[2]) & 3;
9301 dstoffset = srcoffset = 0;
9303 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9304 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9306 for (i = 0; in_words_to_go >= 2; i+=4)
9308 if (in_words_to_go > 4)
9309 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9310 srcbase, &srcoffset));
9312 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9313 FALSE, srcbase, &srcoffset));
9315 if (out_words_to_go)
9317 if (out_words_to_go > 4)
9318 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9319 dstbase, &dstoffset));
9320 else if (out_words_to_go != 1)
9321 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9325 dstbase, &dstoffset));
9328 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9329 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9330 if (last_bytes != 0)
9332 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9338 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9339 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9342 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9343 if (out_words_to_go)
9347 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9348 sreg = copy_to_reg (mem);
9350 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9351 emit_move_insn (mem, sreg);
9354 gcc_assert (!in_words_to_go); /* Sanity check */
9359 gcc_assert (in_words_to_go > 0);
9361 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9362 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9365 gcc_assert (!last_bytes || part_bytes_reg);
9367 if (BYTES_BIG_ENDIAN && last_bytes)
9369 rtx tmp = gen_reg_rtx (SImode);
9371 /* The bytes we want are in the top end of the word. */
9372 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9373 GEN_INT (8 * (4 - last_bytes))));
9374 part_bytes_reg = tmp;
9378 mem = adjust_automodify_address (dstbase, QImode,
9379 plus_constant (dst, last_bytes - 1),
9380 dstoffset + last_bytes - 1);
9381 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9385 tmp = gen_reg_rtx (SImode);
9386 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9387 part_bytes_reg = tmp;
9396 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9397 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9401 rtx tmp = gen_reg_rtx (SImode);
9402 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9403 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9404 part_bytes_reg = tmp;
9411 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9412 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9419 /* Select a dominance comparison mode if possible for a test of the general
9420 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9421 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9422 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9423 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9424 In all cases OP will be either EQ or NE, but we don't need to know which
9425 here. If we are unable to support a dominance comparison we return
9426 CC mode. This will then fail to match for the RTL expressions that
9427 generate this call. */
9429 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9431 enum rtx_code cond1, cond2;
9434 /* Currently we will probably get the wrong result if the individual
9435 comparisons are not simple. This also ensures that it is safe to
9436 reverse a comparison if necessary. */
9437 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9439 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9443 /* The if_then_else variant of this tests the second condition if the
9444 first passes, but is true if the first fails. Reverse the first
9445 condition to get a true "inclusive-or" expression. */
9446 if (cond_or == DOM_CC_NX_OR_Y)
9447 cond1 = reverse_condition (cond1);
9449 /* If the comparisons are not equal, and one doesn't dominate the other,
9450 then we can't do this. */
9452 && !comparison_dominates_p (cond1, cond2)
9453 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9458 enum rtx_code temp = cond1;
9466 if (cond_or == DOM_CC_X_AND_Y)
9471 case EQ: return CC_DEQmode;
9472 case LE: return CC_DLEmode;
9473 case LEU: return CC_DLEUmode;
9474 case GE: return CC_DGEmode;
9475 case GEU: return CC_DGEUmode;
9476 default: gcc_unreachable ();
9480 if (cond_or == DOM_CC_X_AND_Y)
9496 if (cond_or == DOM_CC_X_AND_Y)
9512 if (cond_or == DOM_CC_X_AND_Y)
9528 if (cond_or == DOM_CC_X_AND_Y)
9543 /* The remaining cases only occur when both comparisons are the
9546 gcc_assert (cond1 == cond2);
9550 gcc_assert (cond1 == cond2);
9554 gcc_assert (cond1 == cond2);
9558 gcc_assert (cond1 == cond2);
9562 gcc_assert (cond1 == cond2);
9571 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9573 /* All floating point compares return CCFP if it is an equality
9574 comparison, and CCFPE otherwise. */
9575 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9595 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9604 /* A compare with a shifted operand. Because of canonicalization, the
9605 comparison will have to be swapped when we emit the assembler. */
9606 if (GET_MODE (y) == SImode
9607 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9608 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9609 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9610 || GET_CODE (x) == ROTATERT))
9613 /* This operation is performed swapped, but since we only rely on the Z
9614 flag we don't need an additional mode. */
9615 if (GET_MODE (y) == SImode
9616 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9617 && GET_CODE (x) == NEG
9618 && (op == EQ || op == NE))
9621 /* This is a special case that is used by combine to allow a
9622 comparison of a shifted byte load to be split into a zero-extend
9623 followed by a comparison of the shifted integer (only valid for
9624 equalities and unsigned inequalities). */
9625 if (GET_MODE (x) == SImode
9626 && GET_CODE (x) == ASHIFT
9627 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9628 && GET_CODE (XEXP (x, 0)) == SUBREG
9629 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9630 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9631 && (op == EQ || op == NE
9632 || op == GEU || op == GTU || op == LTU || op == LEU)
9633 && GET_CODE (y) == CONST_INT)
9636 /* A construct for a conditional compare, if the false arm contains
9637 0, then both conditions must be true, otherwise either condition
9638 must be true. Not all conditions are possible, so CCmode is
9639 returned if it can't be done. */
9640 if (GET_CODE (x) == IF_THEN_ELSE
9641 && (XEXP (x, 2) == const0_rtx
9642 || XEXP (x, 2) == const1_rtx)
9643 && COMPARISON_P (XEXP (x, 0))
9644 && COMPARISON_P (XEXP (x, 1)))
9645 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9646 INTVAL (XEXP (x, 2)));
9648 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9649 if (GET_CODE (x) == AND
9650 && COMPARISON_P (XEXP (x, 0))
9651 && COMPARISON_P (XEXP (x, 1)))
9652 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9655 if (GET_CODE (x) == IOR
9656 && COMPARISON_P (XEXP (x, 0))
9657 && COMPARISON_P (XEXP (x, 1)))
9658 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9661 /* An operation (on Thumb) where we want to test for a single bit.
9662 This is done by shifting that bit up into the top bit of a
9663 scratch register; we can then branch on the sign bit. */
9665 && GET_MODE (x) == SImode
9666 && (op == EQ || op == NE)
9667 && GET_CODE (x) == ZERO_EXTRACT
9668 && XEXP (x, 1) == const1_rtx)
9671 /* An operation that sets the condition codes as a side-effect, the
9672 V flag is not set correctly, so we can only use comparisons where
9673 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9675 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9676 if (GET_MODE (x) == SImode
9678 && (op == EQ || op == NE || op == LT || op == GE)
9679 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9680 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9681 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9682 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9683 || GET_CODE (x) == LSHIFTRT
9684 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9685 || GET_CODE (x) == ROTATERT
9686 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9689 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9692 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9693 && GET_CODE (x) == PLUS
9694 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9700 /* X and Y are two things to compare using CODE. Emit the compare insn and
9701 return the rtx for register 0 in the proper mode. FP means this is a
9702 floating point compare: I don't think that it is needed on the arm. */
9704 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9706 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9707 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9709 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9714 /* Generate a sequence of insns that will generate the correct return
9715 address mask depending on the physical architecture that the program
9718 arm_gen_return_addr_mask (void)
9720 rtx reg = gen_reg_rtx (Pmode);
9722 emit_insn (gen_return_addr_mask (reg));
9727 arm_reload_in_hi (rtx *operands)
9729 rtx ref = operands[1];
9731 HOST_WIDE_INT offset = 0;
9733 if (GET_CODE (ref) == SUBREG)
9735 offset = SUBREG_BYTE (ref);
9736 ref = SUBREG_REG (ref);
9739 if (GET_CODE (ref) == REG)
9741 /* We have a pseudo which has been spilt onto the stack; there
9742 are two cases here: the first where there is a simple
9743 stack-slot replacement and a second where the stack-slot is
9744 out of range, or is used as a subreg. */
9745 if (reg_equiv_mem[REGNO (ref)])
9747 ref = reg_equiv_mem[REGNO (ref)];
9748 base = find_replacement (&XEXP (ref, 0));
9751 /* The slot is out of range, or was dressed up in a SUBREG. */
9752 base = reg_equiv_address[REGNO (ref)];
9755 base = find_replacement (&XEXP (ref, 0));
9757 /* Handle the case where the address is too complex to be offset by 1. */
9758 if (GET_CODE (base) == MINUS
9759 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9761 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9763 emit_set_insn (base_plus, base);
9766 else if (GET_CODE (base) == PLUS)
9768 /* The addend must be CONST_INT, or we would have dealt with it above. */
9769 HOST_WIDE_INT hi, lo;
9771 offset += INTVAL (XEXP (base, 1));
9772 base = XEXP (base, 0);
9774 /* Rework the address into a legal sequence of insns. */
9775 /* Valid range for lo is -4095 -> 4095 */
9778 : -((-offset) & 0xfff));
9780 /* Corner case, if lo is the max offset then we would be out of range
9781 once we have added the additional 1 below, so bump the msb into the
9782 pre-loading insn(s). */
9786 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9787 ^ (HOST_WIDE_INT) 0x80000000)
9788 - (HOST_WIDE_INT) 0x80000000);
9790 gcc_assert (hi + lo == offset);
9794 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9796 /* Get the base address; addsi3 knows how to handle constants
9797 that require more than one insn. */
9798 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9804 /* Operands[2] may overlap operands[0] (though it won't overlap
9805 operands[1]), that's why we asked for a DImode reg -- so we can
9806 use the bit that does not overlap. */
9807 if (REGNO (operands[2]) == REGNO (operands[0]))
9808 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9810 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9812 emit_insn (gen_zero_extendqisi2 (scratch,
9813 gen_rtx_MEM (QImode,
9814 plus_constant (base,
9816 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
9817 gen_rtx_MEM (QImode,
9818 plus_constant (base,
9820 if (!BYTES_BIG_ENDIAN)
9821 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9822 gen_rtx_IOR (SImode,
9825 gen_rtx_SUBREG (SImode, operands[0], 0),
9829 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9830 gen_rtx_IOR (SImode,
9831 gen_rtx_ASHIFT (SImode, scratch,
9833 gen_rtx_SUBREG (SImode, operands[0], 0)));
9836 /* Handle storing a half-word to memory during reload by synthesizing as two
9837 byte stores. Take care not to clobber the input values until after we
9838 have moved them somewhere safe. This code assumes that if the DImode
9839 scratch in operands[2] overlaps either the input value or output address
9840 in some way, then that value must die in this insn (we absolutely need
9841 two scratch registers for some corner cases). */
9843 arm_reload_out_hi (rtx *operands)
9845 rtx ref = operands[0];
9846 rtx outval = operands[1];
9848 HOST_WIDE_INT offset = 0;
9850 if (GET_CODE (ref) == SUBREG)
9852 offset = SUBREG_BYTE (ref);
9853 ref = SUBREG_REG (ref);
9856 if (GET_CODE (ref) == REG)
9858 /* We have a pseudo which has been spilt onto the stack; there
9859 are two cases here: the first where there is a simple
9860 stack-slot replacement and a second where the stack-slot is
9861 out of range, or is used as a subreg. */
9862 if (reg_equiv_mem[REGNO (ref)])
9864 ref = reg_equiv_mem[REGNO (ref)];
9865 base = find_replacement (&XEXP (ref, 0));
9868 /* The slot is out of range, or was dressed up in a SUBREG. */
9869 base = reg_equiv_address[REGNO (ref)];
9872 base = find_replacement (&XEXP (ref, 0));
9874 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9876 /* Handle the case where the address is too complex to be offset by 1. */
9877 if (GET_CODE (base) == MINUS
9878 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9880 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9882 /* Be careful not to destroy OUTVAL. */
9883 if (reg_overlap_mentioned_p (base_plus, outval))
9885 /* Updating base_plus might destroy outval, see if we can
9886 swap the scratch and base_plus. */
9887 if (!reg_overlap_mentioned_p (scratch, outval))
9890 scratch = base_plus;
9895 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9897 /* Be conservative and copy OUTVAL into the scratch now,
9898 this should only be necessary if outval is a subreg
9899 of something larger than a word. */
9900 /* XXX Might this clobber base? I can't see how it can,
9901 since scratch is known to overlap with OUTVAL, and
9902 must be wider than a word. */
9903 emit_insn (gen_movhi (scratch_hi, outval));
9904 outval = scratch_hi;
9908 emit_set_insn (base_plus, base);
9911 else if (GET_CODE (base) == PLUS)
9913 /* The addend must be CONST_INT, or we would have dealt with it above. */
9914 HOST_WIDE_INT hi, lo;
9916 offset += INTVAL (XEXP (base, 1));
9917 base = XEXP (base, 0);
9919 /* Rework the address into a legal sequence of insns. */
9920 /* Valid range for lo is -4095 -> 4095 */
9923 : -((-offset) & 0xfff));
9925 /* Corner case, if lo is the max offset then we would be out of range
9926 once we have added the additional 1 below, so bump the msb into the
9927 pre-loading insn(s). */
9931 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9932 ^ (HOST_WIDE_INT) 0x80000000)
9933 - (HOST_WIDE_INT) 0x80000000);
9935 gcc_assert (hi + lo == offset);
9939 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9941 /* Be careful not to destroy OUTVAL. */
9942 if (reg_overlap_mentioned_p (base_plus, outval))
9944 /* Updating base_plus might destroy outval, see if we
9945 can swap the scratch and base_plus. */
9946 if (!reg_overlap_mentioned_p (scratch, outval))
9949 scratch = base_plus;
9954 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9956 /* Be conservative and copy outval into scratch now,
9957 this should only be necessary if outval is a
9958 subreg of something larger than a word. */
9959 /* XXX Might this clobber base? I can't see how it
9960 can, since scratch is known to overlap with
9962 emit_insn (gen_movhi (scratch_hi, outval));
9963 outval = scratch_hi;
9967 /* Get the base address; addsi3 knows how to handle constants
9968 that require more than one insn. */
9969 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9975 if (BYTES_BIG_ENDIAN)
9977 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9978 plus_constant (base, offset + 1)),
9979 gen_lowpart (QImode, outval)));
9980 emit_insn (gen_lshrsi3 (scratch,
9981 gen_rtx_SUBREG (SImode, outval, 0),
9983 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9984 gen_lowpart (QImode, scratch)));
9988 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9989 gen_lowpart (QImode, outval)));
9990 emit_insn (gen_lshrsi3 (scratch,
9991 gen_rtx_SUBREG (SImode, outval, 0),
9993 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9994 plus_constant (base, offset + 1)),
9995 gen_lowpart (QImode, scratch)));
9999 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10000 (padded to the size of a word) should be passed in a register. */
10003 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10005 if (TARGET_AAPCS_BASED)
10006 return must_pass_in_stack_var_size (mode, type);
10008 return must_pass_in_stack_var_size_or_pad (mode, type);
10012 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10013 Return true if an argument passed on the stack should be padded upwards,
10014 i.e. if the least-significant byte has useful data.
10015 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10016 aggregate types are placed in the lowest memory address. */
10019 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10021 if (!TARGET_AAPCS_BASED)
10022 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10024 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10031 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10032 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10033 byte of the register has useful data, and return the opposite if the
10034 most significant byte does.
10035 For AAPCS, small aggregates and small complex types are always padded
10039 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10040 tree type, int first ATTRIBUTE_UNUSED)
10042 if (TARGET_AAPCS_BASED
10043 && BYTES_BIG_ENDIAN
10044 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10045 && int_size_in_bytes (type) <= 4)
10048 /* Otherwise, use default padding. */
10049 return !BYTES_BIG_ENDIAN;
10053 /* Print a symbolic form of X to the debug file, F. */
10055 arm_print_value (FILE *f, rtx x)
10057 switch (GET_CODE (x))
10060 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10064 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10072 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10074 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10075 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10083 fprintf (f, "\"%s\"", XSTR (x, 0));
10087 fprintf (f, "`%s'", XSTR (x, 0));
10091 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10095 arm_print_value (f, XEXP (x, 0));
10099 arm_print_value (f, XEXP (x, 0));
10101 arm_print_value (f, XEXP (x, 1));
10109 fprintf (f, "????");
10114 /* Routines for manipulation of the constant pool. */
10116 /* Arm instructions cannot load a large constant directly into a
10117 register; they have to come from a pc relative load. The constant
10118 must therefore be placed in the addressable range of the pc
10119 relative load. Depending on the precise pc relative load
10120 instruction the range is somewhere between 256 bytes and 4k. This
10121 means that we often have to dump a constant inside a function, and
10122 generate code to branch around it.
10124 It is important to minimize this, since the branches will slow
10125 things down and make the code larger.
10127 Normally we can hide the table after an existing unconditional
10128 branch so that there is no interruption of the flow, but in the
10129 worst case the code looks like this:
10147 We fix this by performing a scan after scheduling, which notices
10148 which instructions need to have their operands fetched from the
10149 constant table and builds the table.
10151 The algorithm starts by building a table of all the constants that
10152 need fixing up and all the natural barriers in the function (places
10153 where a constant table can be dropped without breaking the flow).
10154 For each fixup we note how far the pc-relative replacement will be
10155 able to reach and the offset of the instruction into the function.
10157 Having built the table we then group the fixes together to form
10158 tables that are as large as possible (subject to addressing
10159 constraints) and emit each table of constants after the last
10160 barrier that is within range of all the instructions in the group.
10161 If a group does not contain a barrier, then we forcibly create one
10162 by inserting a jump instruction into the flow. Once the table has
10163 been inserted, the insns are then modified to reference the
10164 relevant entry in the pool.
10166 Possible enhancements to the algorithm (not implemented) are:
10168 1) For some processors and object formats, there may be benefit in
10169 aligning the pools to the start of cache lines; this alignment
10170 would need to be taken into account when calculating addressability
10173 /* These typedefs are located at the start of this file, so that
10174 they can be used in the prototypes there. This comment is to
10175 remind readers of that fact so that the following structures
10176 can be understood more easily.
10178 typedef struct minipool_node Mnode;
10179 typedef struct minipool_fixup Mfix; */
10181 struct minipool_node
10183 /* Doubly linked chain of entries. */
10186 /* The maximum offset into the code that this entry can be placed. While
10187 pushing fixes for forward references, all entries are sorted in order
10188 of increasing max_address. */
10189 HOST_WIDE_INT max_address;
10190 /* Similarly for an entry inserted for a backwards ref. */
10191 HOST_WIDE_INT min_address;
10192 /* The number of fixes referencing this entry. This can become zero
10193 if we "unpush" an entry. In this case we ignore the entry when we
10194 come to emit the code. */
10196 /* The offset from the start of the minipool. */
10197 HOST_WIDE_INT offset;
10198 /* The value in table. */
10200 /* The mode of value. */
10201 enum machine_mode mode;
10202 /* The size of the value. With iWMMXt enabled
10203 sizes > 4 also imply an alignment of 8-bytes. */
10207 struct minipool_fixup
10211 HOST_WIDE_INT address;
10213 enum machine_mode mode;
10217 HOST_WIDE_INT forwards;
10218 HOST_WIDE_INT backwards;
10221 /* Fixes less than a word need padding out to a word boundary. */
10222 #define MINIPOOL_FIX_SIZE(mode) \
10223 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10225 static Mnode * minipool_vector_head;
10226 static Mnode * minipool_vector_tail;
10227 static rtx minipool_vector_label;
10228 static int minipool_pad;
10230 /* The linked list of all minipool fixes required for this function. */
10231 Mfix * minipool_fix_head;
10232 Mfix * minipool_fix_tail;
10233 /* The fix entry for the current minipool, once it has been placed. */
10234 Mfix * minipool_barrier;
10236 /* Determines if INSN is the start of a jump table. Returns the end
10237 of the TABLE or NULL_RTX. */
10239 is_jump_table (rtx insn)
10243 if (GET_CODE (insn) == JUMP_INSN
10244 && JUMP_LABEL (insn) != NULL
10245 && ((table = next_real_insn (JUMP_LABEL (insn)))
10246 == next_real_insn (insn))
10248 && GET_CODE (table) == JUMP_INSN
10249 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10250 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10256 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10257 #define JUMP_TABLES_IN_TEXT_SECTION 0
10260 static HOST_WIDE_INT
10261 get_jump_table_size (rtx insn)
10263 /* ADDR_VECs only take room if read-only data does into the text
10265 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10267 rtx body = PATTERN (insn);
10268 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10269 HOST_WIDE_INT size;
10270 HOST_WIDE_INT modesize;
10272 modesize = GET_MODE_SIZE (GET_MODE (body));
10273 size = modesize * XVECLEN (body, elt);
10277 /* Round up size of TBB table to a halfword boundary. */
10278 size = (size + 1) & ~(HOST_WIDE_INT)1;
10281 /* No padding necessary for TBH. */
10284 /* Add two bytes for alignment on Thumb. */
10289 gcc_unreachable ();
10297 /* Move a minipool fix MP from its current location to before MAX_MP.
10298 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10299 constraints may need updating. */
10301 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10302 HOST_WIDE_INT max_address)
10304 /* The code below assumes these are different. */
10305 gcc_assert (mp != max_mp);
10307 if (max_mp == NULL)
10309 if (max_address < mp->max_address)
10310 mp->max_address = max_address;
10314 if (max_address > max_mp->max_address - mp->fix_size)
10315 mp->max_address = max_mp->max_address - mp->fix_size;
10317 mp->max_address = max_address;
10319 /* Unlink MP from its current position. Since max_mp is non-null,
10320 mp->prev must be non-null. */
10321 mp->prev->next = mp->next;
10322 if (mp->next != NULL)
10323 mp->next->prev = mp->prev;
10325 minipool_vector_tail = mp->prev;
10327 /* Re-insert it before MAX_MP. */
10329 mp->prev = max_mp->prev;
10332 if (mp->prev != NULL)
10333 mp->prev->next = mp;
10335 minipool_vector_head = mp;
10338 /* Save the new entry. */
10341 /* Scan over the preceding entries and adjust their addresses as
10343 while (mp->prev != NULL
10344 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10346 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10353 /* Add a constant to the minipool for a forward reference. Returns the
10354 node added or NULL if the constant will not fit in this pool. */
10356 add_minipool_forward_ref (Mfix *fix)
10358 /* If set, max_mp is the first pool_entry that has a lower
10359 constraint than the one we are trying to add. */
10360 Mnode * max_mp = NULL;
10361 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10364 /* If the minipool starts before the end of FIX->INSN then this FIX
10365 can not be placed into the current pool. Furthermore, adding the
10366 new constant pool entry may cause the pool to start FIX_SIZE bytes
10368 if (minipool_vector_head &&
10369 (fix->address + get_attr_length (fix->insn)
10370 >= minipool_vector_head->max_address - fix->fix_size))
10373 /* Scan the pool to see if a constant with the same value has
10374 already been added. While we are doing this, also note the
10375 location where we must insert the constant if it doesn't already
10377 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10379 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10380 && fix->mode == mp->mode
10381 && (GET_CODE (fix->value) != CODE_LABEL
10382 || (CODE_LABEL_NUMBER (fix->value)
10383 == CODE_LABEL_NUMBER (mp->value)))
10384 && rtx_equal_p (fix->value, mp->value))
10386 /* More than one fix references this entry. */
10388 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10391 /* Note the insertion point if necessary. */
10393 && mp->max_address > max_address)
10396 /* If we are inserting an 8-bytes aligned quantity and
10397 we have not already found an insertion point, then
10398 make sure that all such 8-byte aligned quantities are
10399 placed at the start of the pool. */
10400 if (ARM_DOUBLEWORD_ALIGN
10402 && fix->fix_size >= 8
10403 && mp->fix_size < 8)
10406 max_address = mp->max_address;
10410 /* The value is not currently in the minipool, so we need to create
10411 a new entry for it. If MAX_MP is NULL, the entry will be put on
10412 the end of the list since the placement is less constrained than
10413 any existing entry. Otherwise, we insert the new fix before
10414 MAX_MP and, if necessary, adjust the constraints on the other
10417 mp->fix_size = fix->fix_size;
10418 mp->mode = fix->mode;
10419 mp->value = fix->value;
10421 /* Not yet required for a backwards ref. */
10422 mp->min_address = -65536;
10424 if (max_mp == NULL)
10426 mp->max_address = max_address;
10428 mp->prev = minipool_vector_tail;
10430 if (mp->prev == NULL)
10432 minipool_vector_head = mp;
10433 minipool_vector_label = gen_label_rtx ();
10436 mp->prev->next = mp;
10438 minipool_vector_tail = mp;
10442 if (max_address > max_mp->max_address - mp->fix_size)
10443 mp->max_address = max_mp->max_address - mp->fix_size;
10445 mp->max_address = max_address;
10448 mp->prev = max_mp->prev;
10450 if (mp->prev != NULL)
10451 mp->prev->next = mp;
10453 minipool_vector_head = mp;
10456 /* Save the new entry. */
10459 /* Scan over the preceding entries and adjust their addresses as
10461 while (mp->prev != NULL
10462 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10464 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10472 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10473 HOST_WIDE_INT min_address)
10475 HOST_WIDE_INT offset;
10477 /* The code below assumes these are different. */
10478 gcc_assert (mp != min_mp);
10480 if (min_mp == NULL)
10482 if (min_address > mp->min_address)
10483 mp->min_address = min_address;
10487 /* We will adjust this below if it is too loose. */
10488 mp->min_address = min_address;
10490 /* Unlink MP from its current position. Since min_mp is non-null,
10491 mp->next must be non-null. */
10492 mp->next->prev = mp->prev;
10493 if (mp->prev != NULL)
10494 mp->prev->next = mp->next;
10496 minipool_vector_head = mp->next;
10498 /* Reinsert it after MIN_MP. */
10500 mp->next = min_mp->next;
10502 if (mp->next != NULL)
10503 mp->next->prev = mp;
10505 minipool_vector_tail = mp;
10511 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10513 mp->offset = offset;
10514 if (mp->refcount > 0)
10515 offset += mp->fix_size;
10517 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10518 mp->next->min_address = mp->min_address + mp->fix_size;
10524 /* Add a constant to the minipool for a backward reference. Returns the
10525 node added or NULL if the constant will not fit in this pool.
10527 Note that the code for insertion for a backwards reference can be
10528 somewhat confusing because the calculated offsets for each fix do
10529 not take into account the size of the pool (which is still under
10532 add_minipool_backward_ref (Mfix *fix)
10534 /* If set, min_mp is the last pool_entry that has a lower constraint
10535 than the one we are trying to add. */
10536 Mnode *min_mp = NULL;
10537 /* This can be negative, since it is only a constraint. */
10538 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10541 /* If we can't reach the current pool from this insn, or if we can't
10542 insert this entry at the end of the pool without pushing other
10543 fixes out of range, then we don't try. This ensures that we
10544 can't fail later on. */
10545 if (min_address >= minipool_barrier->address
10546 || (minipool_vector_tail->min_address + fix->fix_size
10547 >= minipool_barrier->address))
10550 /* Scan the pool to see if a constant with the same value has
10551 already been added. While we are doing this, also note the
10552 location where we must insert the constant if it doesn't already
10554 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10556 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10557 && fix->mode == mp->mode
10558 && (GET_CODE (fix->value) != CODE_LABEL
10559 || (CODE_LABEL_NUMBER (fix->value)
10560 == CODE_LABEL_NUMBER (mp->value)))
10561 && rtx_equal_p (fix->value, mp->value)
10562 /* Check that there is enough slack to move this entry to the
10563 end of the table (this is conservative). */
10564 && (mp->max_address
10565 > (minipool_barrier->address
10566 + minipool_vector_tail->offset
10567 + minipool_vector_tail->fix_size)))
10570 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10573 if (min_mp != NULL)
10574 mp->min_address += fix->fix_size;
10577 /* Note the insertion point if necessary. */
10578 if (mp->min_address < min_address)
10580 /* For now, we do not allow the insertion of 8-byte alignment
10581 requiring nodes anywhere but at the start of the pool. */
10582 if (ARM_DOUBLEWORD_ALIGN
10583 && fix->fix_size >= 8 && mp->fix_size < 8)
10588 else if (mp->max_address
10589 < minipool_barrier->address + mp->offset + fix->fix_size)
10591 /* Inserting before this entry would push the fix beyond
10592 its maximum address (which can happen if we have
10593 re-located a forwards fix); force the new fix to come
10595 if (ARM_DOUBLEWORD_ALIGN
10596 && fix->fix_size >= 8 && mp->fix_size < 8)
10601 min_address = mp->min_address + fix->fix_size;
10604 /* Do not insert a non-8-byte aligned quantity before 8-byte
10605 aligned quantities. */
10606 else if (ARM_DOUBLEWORD_ALIGN
10607 && fix->fix_size < 8
10608 && mp->fix_size >= 8)
10611 min_address = mp->min_address + fix->fix_size;
10616 /* We need to create a new entry. */
10618 mp->fix_size = fix->fix_size;
10619 mp->mode = fix->mode;
10620 mp->value = fix->value;
10622 mp->max_address = minipool_barrier->address + 65536;
10624 mp->min_address = min_address;
10626 if (min_mp == NULL)
10629 mp->next = minipool_vector_head;
10631 if (mp->next == NULL)
10633 minipool_vector_tail = mp;
10634 minipool_vector_label = gen_label_rtx ();
10637 mp->next->prev = mp;
10639 minipool_vector_head = mp;
10643 mp->next = min_mp->next;
10647 if (mp->next != NULL)
10648 mp->next->prev = mp;
10650 minipool_vector_tail = mp;
10653 /* Save the new entry. */
10661 /* Scan over the following entries and adjust their offsets. */
10662 while (mp->next != NULL)
10664 if (mp->next->min_address < mp->min_address + mp->fix_size)
10665 mp->next->min_address = mp->min_address + mp->fix_size;
10668 mp->next->offset = mp->offset + mp->fix_size;
10670 mp->next->offset = mp->offset;
10679 assign_minipool_offsets (Mfix *barrier)
10681 HOST_WIDE_INT offset = 0;
10684 minipool_barrier = barrier;
10686 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10688 mp->offset = offset;
10690 if (mp->refcount > 0)
10691 offset += mp->fix_size;
10695 /* Output the literal table */
10697 dump_minipool (rtx scan)
10703 if (ARM_DOUBLEWORD_ALIGN)
10704 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10705 if (mp->refcount > 0 && mp->fix_size >= 8)
10712 fprintf (dump_file,
10713 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10714 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10716 scan = emit_label_after (gen_label_rtx (), scan);
10717 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10718 scan = emit_label_after (minipool_vector_label, scan);
10720 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10722 if (mp->refcount > 0)
10726 fprintf (dump_file,
10727 ";; Offset %u, min %ld, max %ld ",
10728 (unsigned) mp->offset, (unsigned long) mp->min_address,
10729 (unsigned long) mp->max_address);
10730 arm_print_value (dump_file, mp->value);
10731 fputc ('\n', dump_file);
10734 switch (mp->fix_size)
10736 #ifdef HAVE_consttable_1
10738 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10742 #ifdef HAVE_consttable_2
10744 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10748 #ifdef HAVE_consttable_4
10750 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10754 #ifdef HAVE_consttable_8
10756 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10760 #ifdef HAVE_consttable_16
10762 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10767 gcc_unreachable ();
10775 minipool_vector_head = minipool_vector_tail = NULL;
10776 scan = emit_insn_after (gen_consttable_end (), scan);
10777 scan = emit_barrier_after (scan);
10780 /* Return the cost of forcibly inserting a barrier after INSN. */
10782 arm_barrier_cost (rtx insn)
10784 /* Basing the location of the pool on the loop depth is preferable,
10785 but at the moment, the basic block information seems to be
10786 corrupt by this stage of the compilation. */
10787 int base_cost = 50;
10788 rtx next = next_nonnote_insn (insn);
10790 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10793 switch (GET_CODE (insn))
10796 /* It will always be better to place the table before the label, rather
10805 return base_cost - 10;
10808 return base_cost + 10;
10812 /* Find the best place in the insn stream in the range
10813 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
10814 Create the barrier by inserting a jump and add a new fix entry for
10817 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
10819 HOST_WIDE_INT count = 0;
10821 rtx from = fix->insn;
10822 /* The instruction after which we will insert the jump. */
10823 rtx selected = NULL;
10825 /* The address at which the jump instruction will be placed. */
10826 HOST_WIDE_INT selected_address;
10828 HOST_WIDE_INT max_count = max_address - fix->address;
10829 rtx label = gen_label_rtx ();
10831 selected_cost = arm_barrier_cost (from);
10832 selected_address = fix->address;
10834 while (from && count < max_count)
10839 /* This code shouldn't have been called if there was a natural barrier
10841 gcc_assert (GET_CODE (from) != BARRIER);
10843 /* Count the length of this insn. */
10844 count += get_attr_length (from);
10846 /* If there is a jump table, add its length. */
10847 tmp = is_jump_table (from);
10850 count += get_jump_table_size (tmp);
10852 /* Jump tables aren't in a basic block, so base the cost on
10853 the dispatch insn. If we select this location, we will
10854 still put the pool after the table. */
10855 new_cost = arm_barrier_cost (from);
10857 if (count < max_count
10858 && (!selected || new_cost <= selected_cost))
10861 selected_cost = new_cost;
10862 selected_address = fix->address + count;
10865 /* Continue after the dispatch table. */
10866 from = NEXT_INSN (tmp);
10870 new_cost = arm_barrier_cost (from);
10872 if (count < max_count
10873 && (!selected || new_cost <= selected_cost))
10876 selected_cost = new_cost;
10877 selected_address = fix->address + count;
10880 from = NEXT_INSN (from);
10883 /* Make sure that we found a place to insert the jump. */
10884 gcc_assert (selected);
10886 /* Create a new JUMP_INSN that branches around a barrier. */
10887 from = emit_jump_insn_after (gen_jump (label), selected);
10888 JUMP_LABEL (from) = label;
10889 barrier = emit_barrier_after (from);
10890 emit_label_after (label, barrier);
10892 /* Create a minipool barrier entry for the new barrier. */
10893 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
10894 new_fix->insn = barrier;
10895 new_fix->address = selected_address;
10896 new_fix->next = fix->next;
10897 fix->next = new_fix;
10902 /* Record that there is a natural barrier in the insn stream at
10905 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
10907 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10910 fix->address = address;
10913 if (minipool_fix_head != NULL)
10914 minipool_fix_tail->next = fix;
10916 minipool_fix_head = fix;
10918 minipool_fix_tail = fix;
10921 /* Record INSN, which will need fixing up to load a value from the
10922 minipool. ADDRESS is the offset of the insn since the start of the
10923 function; LOC is a pointer to the part of the insn which requires
10924 fixing; VALUE is the constant that must be loaded, which is of type
10927 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
10928 enum machine_mode mode, rtx value)
10930 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10933 fix->address = address;
10936 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
10937 fix->value = value;
10938 fix->forwards = get_attr_pool_range (insn);
10939 fix->backwards = get_attr_neg_pool_range (insn);
10940 fix->minipool = NULL;
10942 /* If an insn doesn't have a range defined for it, then it isn't
10943 expecting to be reworked by this code. Better to stop now than
10944 to generate duff assembly code. */
10945 gcc_assert (fix->forwards || fix->backwards);
10947 /* If an entry requires 8-byte alignment then assume all constant pools
10948 require 4 bytes of padding. Trying to do this later on a per-pool
10949 basis is awkward because existing pool entries have to be modified. */
10950 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
10955 fprintf (dump_file,
10956 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
10957 GET_MODE_NAME (mode),
10958 INSN_UID (insn), (unsigned long) address,
10959 -1 * (long)fix->backwards, (long)fix->forwards);
10960 arm_print_value (dump_file, fix->value);
10961 fprintf (dump_file, "\n");
10964 /* Add it to the chain of fixes. */
10967 if (minipool_fix_head != NULL)
10968 minipool_fix_tail->next = fix;
10970 minipool_fix_head = fix;
10972 minipool_fix_tail = fix;
10975 /* Return the cost of synthesizing a 64-bit constant VAL inline.
10976 Returns the number of insns needed, or 99 if we don't know how to
10979 arm_const_double_inline_cost (rtx val)
10981 rtx lowpart, highpart;
10982 enum machine_mode mode;
10984 mode = GET_MODE (val);
10986 if (mode == VOIDmode)
10989 gcc_assert (GET_MODE_SIZE (mode) == 8);
10991 lowpart = gen_lowpart (SImode, val);
10992 highpart = gen_highpart_mode (SImode, mode, val);
10994 gcc_assert (GET_CODE (lowpart) == CONST_INT);
10995 gcc_assert (GET_CODE (highpart) == CONST_INT);
10997 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
10998 NULL_RTX, NULL_RTX, 0, 0)
10999 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11000 NULL_RTX, NULL_RTX, 0, 0));
11003 /* Return true if it is worthwhile to split a 64-bit constant into two
11004 32-bit operations. This is the case if optimizing for size, or
11005 if we have load delay slots, or if one 32-bit part can be done with
11006 a single data operation. */
11008 arm_const_double_by_parts (rtx val)
11010 enum machine_mode mode = GET_MODE (val);
11013 if (optimize_size || arm_ld_sched)
11016 if (mode == VOIDmode)
11019 part = gen_highpart_mode (SImode, mode, val);
11021 gcc_assert (GET_CODE (part) == CONST_INT);
11023 if (const_ok_for_arm (INTVAL (part))
11024 || const_ok_for_arm (~INTVAL (part)))
11027 part = gen_lowpart (SImode, val);
11029 gcc_assert (GET_CODE (part) == CONST_INT);
11031 if (const_ok_for_arm (INTVAL (part))
11032 || const_ok_for_arm (~INTVAL (part)))
11038 /* Scan INSN and note any of its operands that need fixing.
11039 If DO_PUSHES is false we do not actually push any of the fixups
11040 needed. The function returns TRUE if any fixups were needed/pushed.
11041 This is used by arm_memory_load_p() which needs to know about loads
11042 of constants that will be converted into minipool loads. */
11044 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11046 bool result = false;
11049 extract_insn (insn);
11051 if (!constrain_operands (1))
11052 fatal_insn_not_found (insn);
11054 if (recog_data.n_alternatives == 0)
11057 /* Fill in recog_op_alt with information about the constraints of
11059 preprocess_constraints ();
11061 for (opno = 0; opno < recog_data.n_operands; opno++)
11063 /* Things we need to fix can only occur in inputs. */
11064 if (recog_data.operand_type[opno] != OP_IN)
11067 /* If this alternative is a memory reference, then any mention
11068 of constants in this alternative is really to fool reload
11069 into allowing us to accept one there. We need to fix them up
11070 now so that we output the right code. */
11071 if (recog_op_alt[opno][which_alternative].memory_ok)
11073 rtx op = recog_data.operand[opno];
11075 if (CONSTANT_P (op))
11078 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11079 recog_data.operand_mode[opno], op);
11082 else if (GET_CODE (op) == MEM
11083 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11084 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11088 rtx cop = avoid_constant_pool_reference (op);
11090 /* Casting the address of something to a mode narrower
11091 than a word can cause avoid_constant_pool_reference()
11092 to return the pool reference itself. That's no good to
11093 us here. Lets just hope that we can use the
11094 constant pool value directly. */
11096 cop = get_pool_constant (XEXP (op, 0));
11098 push_minipool_fix (insn, address,
11099 recog_data.operand_loc[opno],
11100 recog_data.operand_mode[opno], cop);
11111 /* Gcc puts the pool in the wrong place for ARM, since we can only
11112 load addresses a limited distance around the pc. We do some
11113 special munging to move the constant pool values to the correct
11114 point in the code. */
11119 HOST_WIDE_INT address = 0;
11122 minipool_fix_head = minipool_fix_tail = NULL;
11124 /* The first insn must always be a note, or the code below won't
11125 scan it properly. */
11126 insn = get_insns ();
11127 gcc_assert (GET_CODE (insn) == NOTE);
11130 /* Scan all the insns and record the operands that will need fixing. */
11131 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11133 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11134 && (arm_cirrus_insn_p (insn)
11135 || GET_CODE (insn) == JUMP_INSN
11136 || arm_memory_load_p (insn)))
11137 cirrus_reorg (insn);
11139 if (GET_CODE (insn) == BARRIER)
11140 push_minipool_barrier (insn, address);
11141 else if (INSN_P (insn))
11145 note_invalid_constants (insn, address, true);
11146 address += get_attr_length (insn);
11148 /* If the insn is a vector jump, add the size of the table
11149 and skip the table. */
11150 if ((table = is_jump_table (insn)) != NULL)
11152 address += get_jump_table_size (table);
11158 fix = minipool_fix_head;
11160 /* Now scan the fixups and perform the required changes. */
11165 Mfix * last_added_fix;
11166 Mfix * last_barrier = NULL;
11169 /* Skip any further barriers before the next fix. */
11170 while (fix && GET_CODE (fix->insn) == BARRIER)
11173 /* No more fixes. */
11177 last_added_fix = NULL;
11179 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11181 if (GET_CODE (ftmp->insn) == BARRIER)
11183 if (ftmp->address >= minipool_vector_head->max_address)
11186 last_barrier = ftmp;
11188 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11191 last_added_fix = ftmp; /* Keep track of the last fix added. */
11194 /* If we found a barrier, drop back to that; any fixes that we
11195 could have reached but come after the barrier will now go in
11196 the next mini-pool. */
11197 if (last_barrier != NULL)
11199 /* Reduce the refcount for those fixes that won't go into this
11201 for (fdel = last_barrier->next;
11202 fdel && fdel != ftmp;
11205 fdel->minipool->refcount--;
11206 fdel->minipool = NULL;
11209 ftmp = last_barrier;
11213 /* ftmp is first fix that we can't fit into this pool and
11214 there no natural barriers that we could use. Insert a
11215 new barrier in the code somewhere between the previous
11216 fix and this one, and arrange to jump around it. */
11217 HOST_WIDE_INT max_address;
11219 /* The last item on the list of fixes must be a barrier, so
11220 we can never run off the end of the list of fixes without
11221 last_barrier being set. */
11224 max_address = minipool_vector_head->max_address;
11225 /* Check that there isn't another fix that is in range that
11226 we couldn't fit into this pool because the pool was
11227 already too large: we need to put the pool before such an
11228 instruction. The pool itself may come just after the
11229 fix because create_fix_barrier also allows space for a
11230 jump instruction. */
11231 if (ftmp->address < max_address)
11232 max_address = ftmp->address + 1;
11234 last_barrier = create_fix_barrier (last_added_fix, max_address);
11237 assign_minipool_offsets (last_barrier);
11241 if (GET_CODE (ftmp->insn) != BARRIER
11242 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11249 /* Scan over the fixes we have identified for this pool, fixing them
11250 up and adding the constants to the pool itself. */
11251 for (this_fix = fix; this_fix && ftmp != this_fix;
11252 this_fix = this_fix->next)
11253 if (GET_CODE (this_fix->insn) != BARRIER)
11256 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11257 minipool_vector_label),
11258 this_fix->minipool->offset);
11259 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11262 dump_minipool (last_barrier->insn);
11266 /* From now on we must synthesize any constants that we can't handle
11267 directly. This can happen if the RTL gets split during final
11268 instruction generation. */
11269 after_arm_reorg = 1;
11271 /* Free the minipool memory. */
11272 obstack_free (&minipool_obstack, minipool_startobj);
11275 /* Routines to output assembly language. */
11277 /* If the rtx is the correct value then return the string of the number.
11278 In this way we can ensure that valid double constants are generated even
11279 when cross compiling. */
11281 fp_immediate_constant (rtx x)
11286 if (!fp_consts_inited)
11289 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11290 for (i = 0; i < 8; i++)
11291 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11292 return strings_fp[i];
11294 gcc_unreachable ();
11297 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11298 static const char *
11299 fp_const_from_val (REAL_VALUE_TYPE *r)
11303 if (!fp_consts_inited)
11306 for (i = 0; i < 8; i++)
11307 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11308 return strings_fp[i];
11310 gcc_unreachable ();
11313 /* Output the operands of a LDM/STM instruction to STREAM.
11314 MASK is the ARM register set mask of which only bits 0-15 are important.
11315 REG is the base register, either the frame pointer or the stack pointer,
11316 INSTR is the possibly suffixed load or store instruction.
11317 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11320 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11321 unsigned long mask, int rfe)
11324 bool not_first = FALSE;
11326 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11327 fputc ('\t', stream);
11328 asm_fprintf (stream, instr, reg);
11329 fputc ('{', stream);
11331 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11332 if (mask & (1 << i))
11335 fprintf (stream, ", ");
11337 asm_fprintf (stream, "%r", i);
11342 fprintf (stream, "}^\n");
11344 fprintf (stream, "}\n");
11348 /* Output a FLDMD instruction to STREAM.
11349 BASE if the register containing the address.
11350 REG and COUNT specify the register range.
11351 Extra registers may be added to avoid hardware bugs.
11353 We output FLDMD even for ARMv5 VFP implementations. Although
11354 FLDMD is technically not supported until ARMv6, it is believed
11355 that all VFP implementations support its use in this context. */
11358 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11362 /* Workaround ARM10 VFPr1 bug. */
11363 if (count == 2 && !arm_arch6)
11370 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11371 load into multiple parts if we have to handle more than 16 registers. */
11374 vfp_output_fldmd (stream, base, reg, 16);
11375 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11379 fputc ('\t', stream);
11380 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11382 for (i = reg; i < reg + count; i++)
11385 fputs (", ", stream);
11386 asm_fprintf (stream, "d%d", i);
11388 fputs ("}\n", stream);
11393 /* Output the assembly for a store multiple. */
11396 vfp_output_fstmd (rtx * operands)
11403 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11404 p = strlen (pattern);
11406 gcc_assert (GET_CODE (operands[1]) == REG);
11408 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11409 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11411 p += sprintf (&pattern[p], ", d%d", base + i);
11413 strcpy (&pattern[p], "}");
11415 output_asm_insn (pattern, operands);
11420 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11421 number of bytes pushed. */
11424 vfp_emit_fstmd (int base_reg, int count)
11431 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11432 register pairs are stored by a store multiple insn. We avoid this
11433 by pushing an extra pair. */
11434 if (count == 2 && !arm_arch6)
11436 if (base_reg == LAST_VFP_REGNUM - 3)
11441 /* FSTMD may not store more than 16 doubleword registers at once. Split
11442 larger stores into multiple parts (up to a maximum of two, in
11447 /* NOTE: base_reg is an internal register number, so each D register
11449 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11450 saved += vfp_emit_fstmd (base_reg, 16);
11454 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11455 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11457 reg = gen_rtx_REG (DFmode, base_reg);
11460 XVECEXP (par, 0, 0)
11461 = gen_rtx_SET (VOIDmode,
11462 gen_frame_mem (BLKmode,
11463 gen_rtx_PRE_DEC (BLKmode,
11464 stack_pointer_rtx)),
11465 gen_rtx_UNSPEC (BLKmode,
11466 gen_rtvec (1, reg),
11467 UNSPEC_PUSH_MULT));
11469 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11470 plus_constant (stack_pointer_rtx, -(count * 8)));
11471 RTX_FRAME_RELATED_P (tmp) = 1;
11472 XVECEXP (dwarf, 0, 0) = tmp;
11474 tmp = gen_rtx_SET (VOIDmode,
11475 gen_frame_mem (DFmode, stack_pointer_rtx),
11477 RTX_FRAME_RELATED_P (tmp) = 1;
11478 XVECEXP (dwarf, 0, 1) = tmp;
11480 for (i = 1; i < count; i++)
11482 reg = gen_rtx_REG (DFmode, base_reg);
11484 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11486 tmp = gen_rtx_SET (VOIDmode,
11487 gen_frame_mem (DFmode,
11488 plus_constant (stack_pointer_rtx,
11491 RTX_FRAME_RELATED_P (tmp) = 1;
11492 XVECEXP (dwarf, 0, i + 1) = tmp;
11495 par = emit_insn (par);
11496 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11497 RTX_FRAME_RELATED_P (par) = 1;
11502 /* Emit a call instruction with pattern PAT. ADDR is the address of
11503 the call target. */
11506 arm_emit_call_insn (rtx pat, rtx addr)
11510 insn = emit_call_insn (pat);
11512 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11513 If the call might use such an entry, add a use of the PIC register
11514 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11515 if (TARGET_VXWORKS_RTP
11517 && GET_CODE (addr) == SYMBOL_REF
11518 && (SYMBOL_REF_DECL (addr)
11519 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11520 : !SYMBOL_REF_LOCAL_P (addr)))
11522 require_pic_register ();
11523 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11527 /* Output a 'call' insn. */
11529 output_call (rtx *operands)
11531 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11533 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11534 if (REGNO (operands[0]) == LR_REGNUM)
11536 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11537 output_asm_insn ("mov%?\t%0, %|lr", operands);
11540 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11542 if (TARGET_INTERWORK || arm_arch4t)
11543 output_asm_insn ("bx%?\t%0", operands);
11545 output_asm_insn ("mov%?\t%|pc, %0", operands);
11550 /* Output a 'call' insn that is a reference in memory. */
11552 output_call_mem (rtx *operands)
11554 if (TARGET_INTERWORK && !arm_arch5)
11556 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11557 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11558 output_asm_insn ("bx%?\t%|ip", operands);
11560 else if (regno_use_in (LR_REGNUM, operands[0]))
11562 /* LR is used in the memory address. We load the address in the
11563 first instruction. It's safe to use IP as the target of the
11564 load since the call will kill it anyway. */
11565 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11567 output_asm_insn ("blx%?\t%|ip", operands);
11570 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11572 output_asm_insn ("bx%?\t%|ip", operands);
11574 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11579 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11580 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11587 /* Output a move from arm registers to an fpa registers.
11588 OPERANDS[0] is an fpa register.
11589 OPERANDS[1] is the first registers of an arm register pair. */
11591 output_mov_long_double_fpa_from_arm (rtx *operands)
11593 int arm_reg0 = REGNO (operands[1]);
11596 gcc_assert (arm_reg0 != IP_REGNUM);
11598 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11599 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11600 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11602 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11603 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11608 /* Output a move from an fpa register to arm registers.
11609 OPERANDS[0] is the first registers of an arm register pair.
11610 OPERANDS[1] is an fpa register. */
11612 output_mov_long_double_arm_from_fpa (rtx *operands)
11614 int arm_reg0 = REGNO (operands[0]);
11617 gcc_assert (arm_reg0 != IP_REGNUM);
11619 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11620 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11621 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11623 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11624 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11628 /* Output a move from arm registers to arm registers of a long double
11629 OPERANDS[0] is the destination.
11630 OPERANDS[1] is the source. */
11632 output_mov_long_double_arm_from_arm (rtx *operands)
11634 /* We have to be careful here because the two might overlap. */
11635 int dest_start = REGNO (operands[0]);
11636 int src_start = REGNO (operands[1]);
11640 if (dest_start < src_start)
11642 for (i = 0; i < 3; i++)
11644 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11645 ops[1] = gen_rtx_REG (SImode, src_start + i);
11646 output_asm_insn ("mov%?\t%0, %1", ops);
11651 for (i = 2; i >= 0; i--)
11653 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11654 ops[1] = gen_rtx_REG (SImode, src_start + i);
11655 output_asm_insn ("mov%?\t%0, %1", ops);
11663 arm_emit_movpair (rtx dest, rtx src)
11665 /* If the src is an immediate, simplify it. */
11666 if (CONST_INT_P (src))
11668 HOST_WIDE_INT val = INTVAL (src);
11669 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11670 if ((val >> 16) & 0x0000ffff)
11671 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11673 GEN_INT ((val >> 16) & 0x0000ffff));
11676 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11677 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11680 /* Output a move from arm registers to an fpa registers.
11681 OPERANDS[0] is an fpa register.
11682 OPERANDS[1] is the first registers of an arm register pair. */
11684 output_mov_double_fpa_from_arm (rtx *operands)
11686 int arm_reg0 = REGNO (operands[1]);
11689 gcc_assert (arm_reg0 != IP_REGNUM);
11691 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11692 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11693 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11694 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11698 /* Output a move from an fpa register to arm registers.
11699 OPERANDS[0] is the first registers of an arm register pair.
11700 OPERANDS[1] is an fpa register. */
11702 output_mov_double_arm_from_fpa (rtx *operands)
11704 int arm_reg0 = REGNO (operands[0]);
11707 gcc_assert (arm_reg0 != IP_REGNUM);
11709 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11710 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11711 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11712 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11716 /* Output a move between double words.
11717 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11718 or MEM<-REG and all MEMs must be offsettable addresses. */
11720 output_move_double (rtx *operands)
11722 enum rtx_code code0 = GET_CODE (operands[0]);
11723 enum rtx_code code1 = GET_CODE (operands[1]);
11728 unsigned int reg0 = REGNO (operands[0]);
11730 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11732 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11734 switch (GET_CODE (XEXP (operands[1], 0)))
11738 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11739 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11741 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11745 gcc_assert (TARGET_LDRD);
11746 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11751 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11753 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11758 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11760 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11764 gcc_assert (TARGET_LDRD);
11765 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11770 /* Autoicrement addressing modes should never have overlapping
11771 base and destination registers, and overlapping index registers
11772 are already prohibited, so this doesn't need to worry about
11774 otherops[0] = operands[0];
11775 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11776 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11778 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11780 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11782 /* Registers overlap so split out the increment. */
11783 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11784 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11788 /* Use a single insn if we can.
11789 FIXME: IWMMXT allows offsets larger than ldrd can
11790 handle, fix these up with a pair of ldr. */
11792 || GET_CODE (otherops[2]) != CONST_INT
11793 || (INTVAL (otherops[2]) > -256
11794 && INTVAL (otherops[2]) < 256))
11795 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11798 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11799 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11805 /* Use a single insn if we can.
11806 FIXME: IWMMXT allows offsets larger than ldrd can handle,
11807 fix these up with a pair of ldr. */
11809 || GET_CODE (otherops[2]) != CONST_INT
11810 || (INTVAL (otherops[2]) > -256
11811 && INTVAL (otherops[2]) < 256))
11812 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
11815 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11816 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11823 /* We might be able to use ldrd %0, %1 here. However the range is
11824 different to ldr/adr, and it is broken on some ARMv7-M
11825 implementations. */
11826 /* Use the second register of the pair to avoid problematic
11828 otherops[1] = operands[1];
11829 output_asm_insn ("adr%?\t%0, %1", otherops);
11830 operands[1] = otherops[0];
11832 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11834 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
11837 /* ??? This needs checking for thumb2. */
11839 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
11840 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
11842 otherops[0] = operands[0];
11843 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
11844 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
11846 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
11848 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
11850 switch ((int) INTVAL (otherops[2]))
11853 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
11858 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
11863 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
11867 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
11868 operands[1] = otherops[0];
11870 && (GET_CODE (otherops[2]) == REG
11872 || (GET_CODE (otherops[2]) == CONST_INT
11873 && INTVAL (otherops[2]) > -256
11874 && INTVAL (otherops[2]) < 256)))
11876 if (reg_overlap_mentioned_p (operands[0],
11880 /* Swap base and index registers over to
11881 avoid a conflict. */
11883 otherops[1] = otherops[2];
11886 /* If both registers conflict, it will usually
11887 have been fixed by a splitter. */
11888 if (reg_overlap_mentioned_p (operands[0], otherops[2])
11889 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
11891 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11892 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11896 otherops[0] = operands[0];
11897 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
11902 if (GET_CODE (otherops[2]) == CONST_INT)
11904 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
11905 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
11907 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11910 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11913 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
11916 return "ldr%(d%)\t%0, [%1]";
11918 return "ldm%(ia%)\t%1, %M0";
11922 otherops[1] = adjust_address (operands[1], SImode, 4);
11923 /* Take care of overlapping base/data reg. */
11924 if (reg_mentioned_p (operands[0], operands[1]))
11926 output_asm_insn ("ldr%?\t%0, %1", otherops);
11927 output_asm_insn ("ldr%?\t%0, %1", operands);
11931 output_asm_insn ("ldr%?\t%0, %1", operands);
11932 output_asm_insn ("ldr%?\t%0, %1", otherops);
11939 /* Constraints should ensure this. */
11940 gcc_assert (code0 == MEM && code1 == REG);
11941 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
11943 switch (GET_CODE (XEXP (operands[0], 0)))
11947 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
11949 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11953 gcc_assert (TARGET_LDRD);
11954 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
11959 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
11961 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
11966 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
11968 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
11972 gcc_assert (TARGET_LDRD);
11973 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
11978 otherops[0] = operands[1];
11979 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
11980 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
11982 /* IWMMXT allows offsets larger than ldrd can handle,
11983 fix these up with a pair of ldr. */
11985 && GET_CODE (otherops[2]) == CONST_INT
11986 && (INTVAL(otherops[2]) <= -256
11987 || INTVAL(otherops[2]) >= 256))
11989 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
11991 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11992 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11996 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11997 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12000 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12001 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12003 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12007 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12008 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12010 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12013 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12019 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12025 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12030 && (GET_CODE (otherops[2]) == REG
12032 || (GET_CODE (otherops[2]) == CONST_INT
12033 && INTVAL (otherops[2]) > -256
12034 && INTVAL (otherops[2]) < 256)))
12036 otherops[0] = operands[1];
12037 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12038 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12044 otherops[0] = adjust_address (operands[0], SImode, 4);
12045 otherops[1] = operands[1];
12046 output_asm_insn ("str%?\t%1, %0", operands);
12047 output_asm_insn ("str%?\t%H1, %0", otherops);
12054 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12055 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12058 output_move_quad (rtx *operands)
12060 if (REG_P (operands[0]))
12062 /* Load, or reg->reg move. */
12064 if (MEM_P (operands[1]))
12066 switch (GET_CODE (XEXP (operands[1], 0)))
12069 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12074 output_asm_insn ("adr%?\t%0, %1", operands);
12075 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12079 gcc_unreachable ();
12087 gcc_assert (REG_P (operands[1]));
12089 dest = REGNO (operands[0]);
12090 src = REGNO (operands[1]);
12092 /* This seems pretty dumb, but hopefully GCC won't try to do it
12095 for (i = 0; i < 4; i++)
12097 ops[0] = gen_rtx_REG (SImode, dest + i);
12098 ops[1] = gen_rtx_REG (SImode, src + i);
12099 output_asm_insn ("mov%?\t%0, %1", ops);
12102 for (i = 3; i >= 0; i--)
12104 ops[0] = gen_rtx_REG (SImode, dest + i);
12105 ops[1] = gen_rtx_REG (SImode, src + i);
12106 output_asm_insn ("mov%?\t%0, %1", ops);
12112 gcc_assert (MEM_P (operands[0]));
12113 gcc_assert (REG_P (operands[1]));
12114 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12116 switch (GET_CODE (XEXP (operands[0], 0)))
12119 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12123 gcc_unreachable ();
12130 /* Output a VFP load or store instruction. */
12133 output_move_vfp (rtx *operands)
12135 rtx reg, mem, addr, ops[2];
12136 int load = REG_P (operands[0]);
12137 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12138 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12141 enum machine_mode mode;
12143 reg = operands[!load];
12144 mem = operands[load];
12146 mode = GET_MODE (reg);
12148 gcc_assert (REG_P (reg));
12149 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12150 gcc_assert (mode == SFmode
12154 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12155 gcc_assert (MEM_P (mem));
12157 addr = XEXP (mem, 0);
12159 switch (GET_CODE (addr))
12162 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12163 ops[0] = XEXP (addr, 0);
12168 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12169 ops[0] = XEXP (addr, 0);
12174 templ = "f%s%c%%?\t%%%s0, %%1%s";
12180 sprintf (buff, templ,
12181 load ? "ld" : "st",
12184 integer_p ? "\t%@ int" : "");
12185 output_asm_insn (buff, ops);
12190 /* Output a Neon quad-word load or store, or a load or store for
12191 larger structure modes.
12193 WARNING: The ordering of elements is weird in big-endian mode,
12194 because we use VSTM, as required by the EABI. GCC RTL defines
12195 element ordering based on in-memory order. This can be differ
12196 from the architectural ordering of elements within a NEON register.
12197 The intrinsics defined in arm_neon.h use the NEON register element
12198 ordering, not the GCC RTL element ordering.
12200 For example, the in-memory ordering of a big-endian a quadword
12201 vector with 16-bit elements when stored from register pair {d0,d1}
12202 will be (lowest address first, d0[N] is NEON register element N):
12204 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12206 When necessary, quadword registers (dN, dN+1) are moved to ARM
12207 registers from rN in the order:
12209 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12211 So that STM/LDM can be used on vectors in ARM registers, and the
12212 same memory layout will result as if VSTM/VLDM were used. */
12215 output_move_neon (rtx *operands)
12217 rtx reg, mem, addr, ops[2];
12218 int regno, load = REG_P (operands[0]);
12221 enum machine_mode mode;
12223 reg = operands[!load];
12224 mem = operands[load];
12226 mode = GET_MODE (reg);
12228 gcc_assert (REG_P (reg));
12229 regno = REGNO (reg);
12230 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12231 || NEON_REGNO_OK_FOR_QUAD (regno));
12232 gcc_assert (VALID_NEON_DREG_MODE (mode)
12233 || VALID_NEON_QREG_MODE (mode)
12234 || VALID_NEON_STRUCT_MODE (mode));
12235 gcc_assert (MEM_P (mem));
12237 addr = XEXP (mem, 0);
12239 /* Strip off const from addresses like (const (plus (...))). */
12240 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12241 addr = XEXP (addr, 0);
12243 switch (GET_CODE (addr))
12246 templ = "v%smia%%?\t%%0!, %%h1";
12247 ops[0] = XEXP (addr, 0);
12252 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12253 templ = "v%smdb%%?\t%%0!, %%h1";
12254 ops[0] = XEXP (addr, 0);
12259 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12260 gcc_unreachable ();
12265 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12268 for (i = 0; i < nregs; i++)
12270 /* We're only using DImode here because it's a convenient size. */
12271 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12272 ops[1] = adjust_address (mem, SImode, 8 * i);
12273 if (reg_overlap_mentioned_p (ops[0], mem))
12275 gcc_assert (overlap == -1);
12280 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12281 output_asm_insn (buff, ops);
12286 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12287 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12288 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12289 output_asm_insn (buff, ops);
12296 templ = "v%smia%%?\t%%m0, %%h1";
12301 sprintf (buff, templ, load ? "ld" : "st");
12302 output_asm_insn (buff, ops);
12307 /* Output an ADD r, s, #n where n may be too big for one instruction.
12308 If adding zero to one register, output nothing. */
12310 output_add_immediate (rtx *operands)
12312 HOST_WIDE_INT n = INTVAL (operands[2]);
12314 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12317 output_multi_immediate (operands,
12318 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12321 output_multi_immediate (operands,
12322 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12329 /* Output a multiple immediate operation.
12330 OPERANDS is the vector of operands referred to in the output patterns.
12331 INSTR1 is the output pattern to use for the first constant.
12332 INSTR2 is the output pattern to use for subsequent constants.
12333 IMMED_OP is the index of the constant slot in OPERANDS.
12334 N is the constant value. */
12335 static const char *
12336 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12337 int immed_op, HOST_WIDE_INT n)
12339 #if HOST_BITS_PER_WIDE_INT > 32
12345 /* Quick and easy output. */
12346 operands[immed_op] = const0_rtx;
12347 output_asm_insn (instr1, operands);
12352 const char * instr = instr1;
12354 /* Note that n is never zero here (which would give no output). */
12355 for (i = 0; i < 32; i += 2)
12359 operands[immed_op] = GEN_INT (n & (255 << i));
12360 output_asm_insn (instr, operands);
12370 /* Return the name of a shifter operation. */
12371 static const char *
12372 arm_shift_nmem(enum rtx_code code)
12377 return ARM_LSL_NAME;
12393 /* Return the appropriate ARM instruction for the operation code.
12394 The returned result should not be overwritten. OP is the rtx of the
12395 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12398 arithmetic_instr (rtx op, int shift_first_arg)
12400 switch (GET_CODE (op))
12406 return shift_first_arg ? "rsb" : "sub";
12421 return arm_shift_nmem(GET_CODE(op));
12424 gcc_unreachable ();
12428 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12429 for the operation code. The returned result should not be overwritten.
12430 OP is the rtx code of the shift.
12431 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12433 static const char *
12434 shift_op (rtx op, HOST_WIDE_INT *amountp)
12437 enum rtx_code code = GET_CODE (op);
12439 switch (GET_CODE (XEXP (op, 1)))
12447 *amountp = INTVAL (XEXP (op, 1));
12451 gcc_unreachable ();
12457 gcc_assert (*amountp != -1);
12458 *amountp = 32 - *amountp;
12461 /* Fall through. */
12467 mnem = arm_shift_nmem(code);
12471 /* We never have to worry about the amount being other than a
12472 power of 2, since this case can never be reloaded from a reg. */
12473 gcc_assert (*amountp != -1);
12474 *amountp = int_log2 (*amountp);
12475 return ARM_LSL_NAME;
12478 gcc_unreachable ();
12481 if (*amountp != -1)
12483 /* This is not 100% correct, but follows from the desire to merge
12484 multiplication by a power of 2 with the recognizer for a
12485 shift. >=32 is not a valid shift for "lsl", so we must try and
12486 output a shift that produces the correct arithmetical result.
12487 Using lsr #32 is identical except for the fact that the carry bit
12488 is not set correctly if we set the flags; but we never use the
12489 carry bit from such an operation, so we can ignore that. */
12490 if (code == ROTATERT)
12491 /* Rotate is just modulo 32. */
12493 else if (*amountp != (*amountp & 31))
12495 if (code == ASHIFT)
12500 /* Shifts of 0 are no-ops. */
12508 /* Obtain the shift from the POWER of two. */
12510 static HOST_WIDE_INT
12511 int_log2 (HOST_WIDE_INT power)
12513 HOST_WIDE_INT shift = 0;
12515 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12517 gcc_assert (shift <= 31);
12524 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12525 because /bin/as is horribly restrictive. The judgement about
12526 whether or not each character is 'printable' (and can be output as
12527 is) or not (and must be printed with an octal escape) must be made
12528 with reference to the *host* character set -- the situation is
12529 similar to that discussed in the comments above pp_c_char in
12530 c-pretty-print.c. */
12532 #define MAX_ASCII_LEN 51
12535 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12538 int len_so_far = 0;
12540 fputs ("\t.ascii\t\"", stream);
12542 for (i = 0; i < len; i++)
12546 if (len_so_far >= MAX_ASCII_LEN)
12548 fputs ("\"\n\t.ascii\t\"", stream);
12554 if (c == '\\' || c == '\"')
12556 putc ('\\', stream);
12564 fprintf (stream, "\\%03o", c);
12569 fputs ("\"\n", stream);
12572 /* Compute the register save mask for registers 0 through 12
12573 inclusive. This code is used by arm_compute_save_reg_mask. */
12575 static unsigned long
12576 arm_compute_save_reg0_reg12_mask (void)
12578 unsigned long func_type = arm_current_func_type ();
12579 unsigned long save_reg_mask = 0;
12582 if (IS_INTERRUPT (func_type))
12584 unsigned int max_reg;
12585 /* Interrupt functions must not corrupt any registers,
12586 even call clobbered ones. If this is a leaf function
12587 we can just examine the registers used by the RTL, but
12588 otherwise we have to assume that whatever function is
12589 called might clobber anything, and so we have to save
12590 all the call-clobbered registers as well. */
12591 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12592 /* FIQ handlers have registers r8 - r12 banked, so
12593 we only need to check r0 - r7, Normal ISRs only
12594 bank r14 and r15, so we must check up to r12.
12595 r13 is the stack pointer which is always preserved,
12596 so we do not need to consider it here. */
12601 for (reg = 0; reg <= max_reg; reg++)
12602 if (df_regs_ever_live_p (reg)
12603 || (! current_function_is_leaf && call_used_regs[reg]))
12604 save_reg_mask |= (1 << reg);
12606 /* Also save the pic base register if necessary. */
12608 && !TARGET_SINGLE_PIC_BASE
12609 && arm_pic_register != INVALID_REGNUM
12610 && crtl->uses_pic_offset_table)
12611 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12615 /* In the normal case we only need to save those registers
12616 which are call saved and which are used by this function. */
12617 for (reg = 0; reg <= 11; reg++)
12618 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12619 save_reg_mask |= (1 << reg);
12621 /* Handle the frame pointer as a special case. */
12622 if (frame_pointer_needed)
12623 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12625 /* If we aren't loading the PIC register,
12626 don't stack it even though it may be live. */
12628 && !TARGET_SINGLE_PIC_BASE
12629 && arm_pic_register != INVALID_REGNUM
12630 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12631 || crtl->uses_pic_offset_table))
12632 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12634 /* The prologue will copy SP into R0, so save it. */
12635 if (IS_STACKALIGN (func_type))
12636 save_reg_mask |= 1;
12639 /* Save registers so the exception handler can modify them. */
12640 if (crtl->calls_eh_return)
12646 reg = EH_RETURN_DATA_REGNO (i);
12647 if (reg == INVALID_REGNUM)
12649 save_reg_mask |= 1 << reg;
12653 return save_reg_mask;
12657 /* Compute the number of bytes used to store the static chain register on the
12658 stack, above the stack frame. We need to know this accurately to get the
12659 alignment of the rest of the stack frame correct. */
12661 static int arm_compute_static_chain_stack_bytes (void)
12663 unsigned long func_type = arm_current_func_type ();
12664 int static_chain_stack_bytes = 0;
12666 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12667 IS_NESTED (func_type) &&
12668 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12669 static_chain_stack_bytes = 4;
12671 return static_chain_stack_bytes;
12675 /* Compute a bit mask of which registers need to be
12676 saved on the stack for the current function.
12677 This is used by arm_get_frame_offsets, which may add extra registers. */
12679 static unsigned long
12680 arm_compute_save_reg_mask (void)
12682 unsigned int save_reg_mask = 0;
12683 unsigned long func_type = arm_current_func_type ();
12686 if (IS_NAKED (func_type))
12687 /* This should never really happen. */
12690 /* If we are creating a stack frame, then we must save the frame pointer,
12691 IP (which will hold the old stack pointer), LR and the PC. */
12692 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12694 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12697 | (1 << PC_REGNUM);
12699 /* Volatile functions do not return, so there
12700 is no need to save any other registers. */
12701 if (IS_VOLATILE (func_type))
12702 return save_reg_mask;
12704 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12706 /* Decide if we need to save the link register.
12707 Interrupt routines have their own banked link register,
12708 so they never need to save it.
12709 Otherwise if we do not use the link register we do not need to save
12710 it. If we are pushing other registers onto the stack however, we
12711 can save an instruction in the epilogue by pushing the link register
12712 now and then popping it back into the PC. This incurs extra memory
12713 accesses though, so we only do it when optimizing for size, and only
12714 if we know that we will not need a fancy return sequence. */
12715 if (df_regs_ever_live_p (LR_REGNUM)
12718 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12719 && !crtl->calls_eh_return))
12720 save_reg_mask |= 1 << LR_REGNUM;
12722 if (cfun->machine->lr_save_eliminated)
12723 save_reg_mask &= ~ (1 << LR_REGNUM);
12725 if (TARGET_REALLY_IWMMXT
12726 && ((bit_count (save_reg_mask)
12727 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12728 arm_compute_static_chain_stack_bytes())
12731 /* The total number of registers that are going to be pushed
12732 onto the stack is odd. We need to ensure that the stack
12733 is 64-bit aligned before we start to save iWMMXt registers,
12734 and also before we start to create locals. (A local variable
12735 might be a double or long long which we will load/store using
12736 an iWMMXt instruction). Therefore we need to push another
12737 ARM register, so that the stack will be 64-bit aligned. We
12738 try to avoid using the arg registers (r0 -r3) as they might be
12739 used to pass values in a tail call. */
12740 for (reg = 4; reg <= 12; reg++)
12741 if ((save_reg_mask & (1 << reg)) == 0)
12745 save_reg_mask |= (1 << reg);
12748 cfun->machine->sibcall_blocked = 1;
12749 save_reg_mask |= (1 << 3);
12753 /* We may need to push an additional register for use initializing the
12754 PIC base register. */
12755 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12756 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12758 reg = thumb_find_work_register (1 << 4);
12759 if (!call_used_regs[reg])
12760 save_reg_mask |= (1 << reg);
12763 return save_reg_mask;
12767 /* Compute a bit mask of which registers need to be
12768 saved on the stack for the current function. */
12769 static unsigned long
12770 thumb1_compute_save_reg_mask (void)
12772 unsigned long mask;
12776 for (reg = 0; reg < 12; reg ++)
12777 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12781 && !TARGET_SINGLE_PIC_BASE
12782 && arm_pic_register != INVALID_REGNUM
12783 && crtl->uses_pic_offset_table)
12784 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12786 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12787 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12788 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12790 /* LR will also be pushed if any lo regs are pushed. */
12791 if (mask & 0xff || thumb_force_lr_save ())
12792 mask |= (1 << LR_REGNUM);
12794 /* Make sure we have a low work register if we need one.
12795 We will need one if we are going to push a high register,
12796 but we are not currently intending to push a low register. */
12797 if ((mask & 0xff) == 0
12798 && ((mask & 0x0f00) || TARGET_BACKTRACE))
12800 /* Use thumb_find_work_register to choose which register
12801 we will use. If the register is live then we will
12802 have to push it. Use LAST_LO_REGNUM as our fallback
12803 choice for the register to select. */
12804 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
12805 /* Make sure the register returned by thumb_find_work_register is
12806 not part of the return value. */
12807 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
12808 reg = LAST_LO_REGNUM;
12810 if (! call_used_regs[reg])
12814 /* The 504 below is 8 bytes less than 512 because there are two possible
12815 alignment words. We can't tell here if they will be present or not so we
12816 have to play it safe and assume that they are. */
12817 if ((CALLER_INTERWORKING_SLOT_SIZE +
12818 ROUND_UP_WORD (get_frame_size ()) +
12819 crtl->outgoing_args_size) >= 504)
12821 /* This is the same as the code in thumb1_expand_prologue() which
12822 determines which register to use for stack decrement. */
12823 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
12824 if (mask & (1 << reg))
12827 if (reg > LAST_LO_REGNUM)
12829 /* Make sure we have a register available for stack decrement. */
12830 mask |= 1 << LAST_LO_REGNUM;
12838 /* Return the number of bytes required to save VFP registers. */
12840 arm_get_vfp_saved_size (void)
12842 unsigned int regno;
12847 /* Space for saved VFP registers. */
12848 if (TARGET_HARD_FLOAT && TARGET_VFP)
12851 for (regno = FIRST_VFP_REGNUM;
12852 regno < LAST_VFP_REGNUM;
12855 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
12856 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
12860 /* Workaround ARM10 VFPr1 bug. */
12861 if (count == 2 && !arm_arch6)
12863 saved += count * 8;
12872 if (count == 2 && !arm_arch6)
12874 saved += count * 8;
12881 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
12882 everything bar the final return instruction. */
12884 output_return_instruction (rtx operand, int really_return, int reverse)
12886 char conditional[10];
12889 unsigned long live_regs_mask;
12890 unsigned long func_type;
12891 arm_stack_offsets *offsets;
12893 func_type = arm_current_func_type ();
12895 if (IS_NAKED (func_type))
12898 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12900 /* If this function was declared non-returning, and we have
12901 found a tail call, then we have to trust that the called
12902 function won't return. */
12907 /* Otherwise, trap an attempted return by aborting. */
12909 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
12911 assemble_external_libcall (ops[1]);
12912 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
12918 gcc_assert (!cfun->calls_alloca || really_return);
12920 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
12922 cfun->machine->return_used_this_function = 1;
12924 offsets = arm_get_frame_offsets ();
12925 live_regs_mask = offsets->saved_regs_mask;
12927 if (live_regs_mask)
12929 const char * return_reg;
12931 /* If we do not have any special requirements for function exit
12932 (e.g. interworking) then we can load the return address
12933 directly into the PC. Otherwise we must load it into LR. */
12935 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
12936 return_reg = reg_names[PC_REGNUM];
12938 return_reg = reg_names[LR_REGNUM];
12940 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
12942 /* There are three possible reasons for the IP register
12943 being saved. 1) a stack frame was created, in which case
12944 IP contains the old stack pointer, or 2) an ISR routine
12945 corrupted it, or 3) it was saved to align the stack on
12946 iWMMXt. In case 1, restore IP into SP, otherwise just
12948 if (frame_pointer_needed)
12950 live_regs_mask &= ~ (1 << IP_REGNUM);
12951 live_regs_mask |= (1 << SP_REGNUM);
12954 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
12957 /* On some ARM architectures it is faster to use LDR rather than
12958 LDM to load a single register. On other architectures, the
12959 cost is the same. In 26 bit mode, or for exception handlers,
12960 we have to use LDM to load the PC so that the CPSR is also
12962 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12963 if (live_regs_mask == (1U << reg))
12966 if (reg <= LAST_ARM_REGNUM
12967 && (reg != LR_REGNUM
12969 || ! IS_INTERRUPT (func_type)))
12971 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
12972 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
12979 /* Generate the load multiple instruction to restore the
12980 registers. Note we can get here, even if
12981 frame_pointer_needed is true, but only if sp already
12982 points to the base of the saved core registers. */
12983 if (live_regs_mask & (1 << SP_REGNUM))
12985 unsigned HOST_WIDE_INT stack_adjust;
12987 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
12988 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
12990 if (stack_adjust && arm_arch5 && TARGET_ARM)
12991 if (TARGET_UNIFIED_ASM)
12992 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
12994 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
12997 /* If we can't use ldmib (SA110 bug),
12998 then try to pop r3 instead. */
13000 live_regs_mask |= 1 << 3;
13002 if (TARGET_UNIFIED_ASM)
13003 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13005 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13009 if (TARGET_UNIFIED_ASM)
13010 sprintf (instr, "pop%s\t{", conditional);
13012 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13014 p = instr + strlen (instr);
13016 for (reg = 0; reg <= SP_REGNUM; reg++)
13017 if (live_regs_mask & (1 << reg))
13019 int l = strlen (reg_names[reg]);
13025 memcpy (p, ", ", 2);
13029 memcpy (p, "%|", 2);
13030 memcpy (p + 2, reg_names[reg], l);
13034 if (live_regs_mask & (1 << LR_REGNUM))
13036 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13037 /* If returning from an interrupt, restore the CPSR. */
13038 if (IS_INTERRUPT (func_type))
13045 output_asm_insn (instr, & operand);
13047 /* See if we need to generate an extra instruction to
13048 perform the actual function return. */
13050 && func_type != ARM_FT_INTERWORKED
13051 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13053 /* The return has already been handled
13054 by loading the LR into the PC. */
13061 switch ((int) ARM_FUNC_TYPE (func_type))
13065 /* ??? This is wrong for unified assembly syntax. */
13066 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13069 case ARM_FT_INTERWORKED:
13070 sprintf (instr, "bx%s\t%%|lr", conditional);
13073 case ARM_FT_EXCEPTION:
13074 /* ??? This is wrong for unified assembly syntax. */
13075 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13079 /* Use bx if it's available. */
13080 if (arm_arch5 || arm_arch4t)
13081 sprintf (instr, "bx%s\t%%|lr", conditional);
13083 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13087 output_asm_insn (instr, & operand);
13093 /* Write the function name into the code section, directly preceding
13094 the function prologue.
13096 Code will be output similar to this:
13098 .ascii "arm_poke_function_name", 0
13101 .word 0xff000000 + (t1 - t0)
13102 arm_poke_function_name
13104 stmfd sp!, {fp, ip, lr, pc}
13107 When performing a stack backtrace, code can inspect the value
13108 of 'pc' stored at 'fp' + 0. If the trace function then looks
13109 at location pc - 12 and the top 8 bits are set, then we know
13110 that there is a function name embedded immediately preceding this
13111 location and has length ((pc[-3]) & 0xff000000).
13113 We assume that pc is declared as a pointer to an unsigned long.
13115 It is of no benefit to output the function name if we are assembling
13116 a leaf function. These function types will not contain a stack
13117 backtrace structure, therefore it is not possible to determine the
13120 arm_poke_function_name (FILE *stream, const char *name)
13122 unsigned long alignlength;
13123 unsigned long length;
13126 length = strlen (name) + 1;
13127 alignlength = ROUND_UP_WORD (length);
13129 ASM_OUTPUT_ASCII (stream, name, length);
13130 ASM_OUTPUT_ALIGN (stream, 2);
13131 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13132 assemble_aligned_integer (UNITS_PER_WORD, x);
13135 /* Place some comments into the assembler stream
13136 describing the current function. */
13138 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13140 unsigned long func_type;
13144 thumb1_output_function_prologue (f, frame_size);
13148 /* Sanity check. */
13149 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13151 func_type = arm_current_func_type ();
13153 switch ((int) ARM_FUNC_TYPE (func_type))
13156 case ARM_FT_NORMAL:
13158 case ARM_FT_INTERWORKED:
13159 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13162 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13165 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13167 case ARM_FT_EXCEPTION:
13168 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13172 if (IS_NAKED (func_type))
13173 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13175 if (IS_VOLATILE (func_type))
13176 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13178 if (IS_NESTED (func_type))
13179 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13180 if (IS_STACKALIGN (func_type))
13181 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13183 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13185 crtl->args.pretend_args_size, frame_size);
13187 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13188 frame_pointer_needed,
13189 cfun->machine->uses_anonymous_args);
13191 if (cfun->machine->lr_save_eliminated)
13192 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13194 if (crtl->calls_eh_return)
13195 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13200 arm_output_epilogue (rtx sibling)
13203 unsigned long saved_regs_mask;
13204 unsigned long func_type;
13205 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13206 frame that is $fp + 4 for a non-variadic function. */
13207 int floats_offset = 0;
13209 FILE * f = asm_out_file;
13210 unsigned int lrm_count = 0;
13211 int really_return = (sibling == NULL);
13213 arm_stack_offsets *offsets;
13215 /* If we have already generated the return instruction
13216 then it is futile to generate anything else. */
13217 if (use_return_insn (FALSE, sibling) &&
13218 (cfun->machine->return_used_this_function != 0))
13221 func_type = arm_current_func_type ();
13223 if (IS_NAKED (func_type))
13224 /* Naked functions don't have epilogues. */
13227 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13231 /* A volatile function should never return. Call abort. */
13232 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13233 assemble_external_libcall (op);
13234 output_asm_insn ("bl\t%a0", &op);
13239 /* If we are throwing an exception, then we really must be doing a
13240 return, so we can't tail-call. */
13241 gcc_assert (!crtl->calls_eh_return || really_return);
13243 offsets = arm_get_frame_offsets ();
13244 saved_regs_mask = offsets->saved_regs_mask;
13247 lrm_count = bit_count (saved_regs_mask);
13249 floats_offset = offsets->saved_args;
13250 /* Compute how far away the floats will be. */
13251 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13252 if (saved_regs_mask & (1 << reg))
13253 floats_offset += 4;
13255 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13257 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13258 int vfp_offset = offsets->frame;
13260 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13262 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13263 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13265 floats_offset += 12;
13266 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13267 reg, FP_REGNUM, floats_offset - vfp_offset);
13272 start_reg = LAST_FPA_REGNUM;
13274 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13276 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13278 floats_offset += 12;
13280 /* We can't unstack more than four registers at once. */
13281 if (start_reg - reg == 3)
13283 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13284 reg, FP_REGNUM, floats_offset - vfp_offset);
13285 start_reg = reg - 1;
13290 if (reg != start_reg)
13291 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13292 reg + 1, start_reg - reg,
13293 FP_REGNUM, floats_offset - vfp_offset);
13294 start_reg = reg - 1;
13298 /* Just in case the last register checked also needs unstacking. */
13299 if (reg != start_reg)
13300 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13301 reg + 1, start_reg - reg,
13302 FP_REGNUM, floats_offset - vfp_offset);
13305 if (TARGET_HARD_FLOAT && TARGET_VFP)
13309 /* The fldmd insns do not have base+offset addressing
13310 modes, so we use IP to hold the address. */
13311 saved_size = arm_get_vfp_saved_size ();
13313 if (saved_size > 0)
13315 floats_offset += saved_size;
13316 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13317 FP_REGNUM, floats_offset - vfp_offset);
13319 start_reg = FIRST_VFP_REGNUM;
13320 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13322 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13323 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13325 if (start_reg != reg)
13326 vfp_output_fldmd (f, IP_REGNUM,
13327 (start_reg - FIRST_VFP_REGNUM) / 2,
13328 (reg - start_reg) / 2);
13329 start_reg = reg + 2;
13332 if (start_reg != reg)
13333 vfp_output_fldmd (f, IP_REGNUM,
13334 (start_reg - FIRST_VFP_REGNUM) / 2,
13335 (reg - start_reg) / 2);
13340 /* The frame pointer is guaranteed to be non-double-word aligned.
13341 This is because it is set to (old_stack_pointer - 4) and the
13342 old_stack_pointer was double word aligned. Thus the offset to
13343 the iWMMXt registers to be loaded must also be non-double-word
13344 sized, so that the resultant address *is* double-word aligned.
13345 We can ignore floats_offset since that was already included in
13346 the live_regs_mask. */
13347 lrm_count += (lrm_count % 2 ? 2 : 1);
13349 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13350 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13352 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13353 reg, FP_REGNUM, lrm_count * 4);
13358 /* saved_regs_mask should contain the IP, which at the time of stack
13359 frame generation actually contains the old stack pointer. So a
13360 quick way to unwind the stack is just pop the IP register directly
13361 into the stack pointer. */
13362 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13363 saved_regs_mask &= ~ (1 << IP_REGNUM);
13364 saved_regs_mask |= (1 << SP_REGNUM);
13366 /* There are two registers left in saved_regs_mask - LR and PC. We
13367 only need to restore the LR register (the return address), but to
13368 save time we can load it directly into the PC, unless we need a
13369 special function exit sequence, or we are not really returning. */
13371 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13372 && !crtl->calls_eh_return)
13373 /* Delete the LR from the register mask, so that the LR on
13374 the stack is loaded into the PC in the register mask. */
13375 saved_regs_mask &= ~ (1 << LR_REGNUM);
13377 saved_regs_mask &= ~ (1 << PC_REGNUM);
13379 /* We must use SP as the base register, because SP is one of the
13380 registers being restored. If an interrupt or page fault
13381 happens in the ldm instruction, the SP might or might not
13382 have been restored. That would be bad, as then SP will no
13383 longer indicate the safe area of stack, and we can get stack
13384 corruption. Using SP as the base register means that it will
13385 be reset correctly to the original value, should an interrupt
13386 occur. If the stack pointer already points at the right
13387 place, then omit the subtraction. */
13388 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13389 || cfun->calls_alloca)
13390 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13391 4 * bit_count (saved_regs_mask));
13392 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13394 if (IS_INTERRUPT (func_type))
13395 /* Interrupt handlers will have pushed the
13396 IP onto the stack, so restore it now. */
13397 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13401 /* This branch is executed for ARM mode (non-apcs frames) and
13402 Thumb-2 mode. Frame layout is essentially the same for those
13403 cases, except that in ARM mode frame pointer points to the
13404 first saved register, while in Thumb-2 mode the frame pointer points
13405 to the last saved register.
13407 It is possible to make frame pointer point to last saved
13408 register in both cases, and remove some conditionals below.
13409 That means that fp setup in prologue would be just "mov fp, sp"
13410 and sp restore in epilogue would be just "mov sp, fp", whereas
13411 now we have to use add/sub in those cases. However, the value
13412 of that would be marginal, as both mov and add/sub are 32-bit
13413 in ARM mode, and it would require extra conditionals
13414 in arm_expand_prologue to distingish ARM-apcs-frame case
13415 (where frame pointer is required to point at first register)
13416 and ARM-non-apcs-frame. Therefore, such change is postponed
13417 until real need arise. */
13418 unsigned HOST_WIDE_INT amount;
13420 /* Restore stack pointer if necessary. */
13421 if (TARGET_ARM && frame_pointer_needed)
13423 operands[0] = stack_pointer_rtx;
13424 operands[1] = hard_frame_pointer_rtx;
13426 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13427 output_add_immediate (operands);
13431 if (frame_pointer_needed)
13433 /* For Thumb-2 restore sp from the frame pointer.
13434 Operand restrictions mean we have to incrememnt FP, then copy
13436 amount = offsets->locals_base - offsets->saved_regs;
13437 operands[0] = hard_frame_pointer_rtx;
13441 unsigned long count;
13442 operands[0] = stack_pointer_rtx;
13443 amount = offsets->outgoing_args - offsets->saved_regs;
13444 /* pop call clobbered registers if it avoids a
13445 separate stack adjustment. */
13446 count = offsets->saved_regs - offsets->saved_args;
13449 && !crtl->calls_eh_return
13450 && bit_count(saved_regs_mask) * 4 == count
13451 && !IS_INTERRUPT (func_type)
13452 && !crtl->tail_call_emit)
13454 unsigned long mask;
13455 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13457 mask &= ~saved_regs_mask;
13459 while (bit_count (mask) * 4 > amount)
13461 while ((mask & (1 << reg)) == 0)
13463 mask &= ~(1 << reg);
13465 if (bit_count (mask) * 4 == amount) {
13467 saved_regs_mask |= mask;
13474 operands[1] = operands[0];
13475 operands[2] = GEN_INT (amount);
13476 output_add_immediate (operands);
13478 if (frame_pointer_needed)
13479 asm_fprintf (f, "\tmov\t%r, %r\n",
13480 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13483 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13485 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13486 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13487 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13492 start_reg = FIRST_FPA_REGNUM;
13494 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13496 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13498 if (reg - start_reg == 3)
13500 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13501 start_reg, SP_REGNUM);
13502 start_reg = reg + 1;
13507 if (reg != start_reg)
13508 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13509 start_reg, reg - start_reg,
13512 start_reg = reg + 1;
13516 /* Just in case the last register checked also needs unstacking. */
13517 if (reg != start_reg)
13518 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13519 start_reg, reg - start_reg, SP_REGNUM);
13522 if (TARGET_HARD_FLOAT && TARGET_VFP)
13524 start_reg = FIRST_VFP_REGNUM;
13525 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13527 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13528 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13530 if (start_reg != reg)
13531 vfp_output_fldmd (f, SP_REGNUM,
13532 (start_reg - FIRST_VFP_REGNUM) / 2,
13533 (reg - start_reg) / 2);
13534 start_reg = reg + 2;
13537 if (start_reg != reg)
13538 vfp_output_fldmd (f, SP_REGNUM,
13539 (start_reg - FIRST_VFP_REGNUM) / 2,
13540 (reg - start_reg) / 2);
13543 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13544 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13545 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13547 /* If we can, restore the LR into the PC. */
13548 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13549 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13550 && !IS_STACKALIGN (func_type)
13552 && crtl->args.pretend_args_size == 0
13553 && saved_regs_mask & (1 << LR_REGNUM)
13554 && !crtl->calls_eh_return)
13556 saved_regs_mask &= ~ (1 << LR_REGNUM);
13557 saved_regs_mask |= (1 << PC_REGNUM);
13558 rfe = IS_INTERRUPT (func_type);
13563 /* Load the registers off the stack. If we only have one register
13564 to load use the LDR instruction - it is faster. For Thumb-2
13565 always use pop and the assembler will pick the best instruction.*/
13566 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13567 && !IS_INTERRUPT(func_type))
13569 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13571 else if (saved_regs_mask)
13573 if (saved_regs_mask & (1 << SP_REGNUM))
13574 /* Note - write back to the stack register is not enabled
13575 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13576 in the list of registers and if we add writeback the
13577 instruction becomes UNPREDICTABLE. */
13578 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13580 else if (TARGET_ARM)
13581 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13584 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13587 if (crtl->args.pretend_args_size)
13589 /* Unwind the pre-pushed regs. */
13590 operands[0] = operands[1] = stack_pointer_rtx;
13591 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13592 output_add_immediate (operands);
13596 /* We may have already restored PC directly from the stack. */
13597 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13600 /* Stack adjustment for exception handler. */
13601 if (crtl->calls_eh_return)
13602 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13603 ARM_EH_STACKADJ_REGNUM);
13605 /* Generate the return instruction. */
13606 switch ((int) ARM_FUNC_TYPE (func_type))
13610 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13613 case ARM_FT_EXCEPTION:
13614 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13617 case ARM_FT_INTERWORKED:
13618 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13622 if (IS_STACKALIGN (func_type))
13624 /* See comment in arm_expand_prologue. */
13625 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13627 if (arm_arch5 || arm_arch4t)
13628 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13630 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13638 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13639 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13641 arm_stack_offsets *offsets;
13647 /* Emit any call-via-reg trampolines that are needed for v4t support
13648 of call_reg and call_value_reg type insns. */
13649 for (regno = 0; regno < LR_REGNUM; regno++)
13651 rtx label = cfun->machine->call_via[regno];
13655 switch_to_section (function_section (current_function_decl));
13656 targetm.asm_out.internal_label (asm_out_file, "L",
13657 CODE_LABEL_NUMBER (label));
13658 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13662 /* ??? Probably not safe to set this here, since it assumes that a
13663 function will be emitted as assembly immediately after we generate
13664 RTL for it. This does not happen for inline functions. */
13665 cfun->machine->return_used_this_function = 0;
13667 else /* TARGET_32BIT */
13669 /* We need to take into account any stack-frame rounding. */
13670 offsets = arm_get_frame_offsets ();
13672 gcc_assert (!use_return_insn (FALSE, NULL)
13673 || (cfun->machine->return_used_this_function != 0)
13674 || offsets->saved_regs == offsets->outgoing_args
13675 || frame_pointer_needed);
13677 /* Reset the ARM-specific per-function variables. */
13678 after_arm_reorg = 0;
13682 /* Generate and emit an insn that we will recognize as a push_multi.
13683 Unfortunately, since this insn does not reflect very well the actual
13684 semantics of the operation, we need to annotate the insn for the benefit
13685 of DWARF2 frame unwind information. */
13687 emit_multi_reg_push (unsigned long mask)
13690 int num_dwarf_regs;
13694 int dwarf_par_index;
13697 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13698 if (mask & (1 << i))
13701 gcc_assert (num_regs && num_regs <= 16);
13703 /* We don't record the PC in the dwarf frame information. */
13704 num_dwarf_regs = num_regs;
13705 if (mask & (1 << PC_REGNUM))
13708 /* For the body of the insn we are going to generate an UNSPEC in
13709 parallel with several USEs. This allows the insn to be recognized
13710 by the push_multi pattern in the arm.md file. The insn looks
13711 something like this:
13714 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
13715 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13716 (use (reg:SI 11 fp))
13717 (use (reg:SI 12 ip))
13718 (use (reg:SI 14 lr))
13719 (use (reg:SI 15 pc))
13722 For the frame note however, we try to be more explicit and actually
13723 show each register being stored into the stack frame, plus a (single)
13724 decrement of the stack pointer. We do it this way in order to be
13725 friendly to the stack unwinding code, which only wants to see a single
13726 stack decrement per instruction. The RTL we generate for the note looks
13727 something like this:
13730 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13731 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13732 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
13733 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
13734 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
13737 This sequence is used both by the code to support stack unwinding for
13738 exceptions handlers and the code to generate dwarf2 frame debugging. */
13740 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13741 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13742 dwarf_par_index = 1;
13744 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13746 if (mask & (1 << i))
13748 reg = gen_rtx_REG (SImode, i);
13750 XVECEXP (par, 0, 0)
13751 = gen_rtx_SET (VOIDmode,
13752 gen_frame_mem (BLKmode,
13753 gen_rtx_PRE_DEC (BLKmode,
13754 stack_pointer_rtx)),
13755 gen_rtx_UNSPEC (BLKmode,
13756 gen_rtvec (1, reg),
13757 UNSPEC_PUSH_MULT));
13759 if (i != PC_REGNUM)
13761 tmp = gen_rtx_SET (VOIDmode,
13762 gen_frame_mem (SImode, stack_pointer_rtx),
13764 RTX_FRAME_RELATED_P (tmp) = 1;
13765 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13773 for (j = 1, i++; j < num_regs; i++)
13775 if (mask & (1 << i))
13777 reg = gen_rtx_REG (SImode, i);
13779 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
13781 if (i != PC_REGNUM)
13784 = gen_rtx_SET (VOIDmode,
13785 gen_frame_mem (SImode,
13786 plus_constant (stack_pointer_rtx,
13789 RTX_FRAME_RELATED_P (tmp) = 1;
13790 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
13797 par = emit_insn (par);
13799 tmp = gen_rtx_SET (VOIDmode,
13801 plus_constant (stack_pointer_rtx, -4 * num_regs));
13802 RTX_FRAME_RELATED_P (tmp) = 1;
13803 XVECEXP (dwarf, 0, 0) = tmp;
13805 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13810 /* Calculate the size of the return value that is passed in registers. */
13812 arm_size_return_regs (void)
13814 enum machine_mode mode;
13816 if (crtl->return_rtx != 0)
13817 mode = GET_MODE (crtl->return_rtx);
13819 mode = DECL_MODE (DECL_RESULT (current_function_decl));
13821 return GET_MODE_SIZE (mode);
13825 emit_sfm (int base_reg, int count)
13832 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13833 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13835 reg = gen_rtx_REG (XFmode, base_reg++);
13837 XVECEXP (par, 0, 0)
13838 = gen_rtx_SET (VOIDmode,
13839 gen_frame_mem (BLKmode,
13840 gen_rtx_PRE_DEC (BLKmode,
13841 stack_pointer_rtx)),
13842 gen_rtx_UNSPEC (BLKmode,
13843 gen_rtvec (1, reg),
13844 UNSPEC_PUSH_MULT));
13845 tmp = gen_rtx_SET (VOIDmode,
13846 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
13847 RTX_FRAME_RELATED_P (tmp) = 1;
13848 XVECEXP (dwarf, 0, 1) = tmp;
13850 for (i = 1; i < count; i++)
13852 reg = gen_rtx_REG (XFmode, base_reg++);
13853 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13855 tmp = gen_rtx_SET (VOIDmode,
13856 gen_frame_mem (XFmode,
13857 plus_constant (stack_pointer_rtx,
13860 RTX_FRAME_RELATED_P (tmp) = 1;
13861 XVECEXP (dwarf, 0, i + 1) = tmp;
13864 tmp = gen_rtx_SET (VOIDmode,
13866 plus_constant (stack_pointer_rtx, -12 * count));
13868 RTX_FRAME_RELATED_P (tmp) = 1;
13869 XVECEXP (dwarf, 0, 0) = tmp;
13871 par = emit_insn (par);
13872 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13878 /* Return true if the current function needs to save/restore LR. */
13881 thumb_force_lr_save (void)
13883 return !cfun->machine->lr_save_eliminated
13884 && (!leaf_function_p ()
13885 || thumb_far_jump_used_p ()
13886 || df_regs_ever_live_p (LR_REGNUM));
13890 /* Compute the distance from register FROM to register TO.
13891 These can be the arg pointer (26), the soft frame pointer (25),
13892 the stack pointer (13) or the hard frame pointer (11).
13893 In thumb mode r7 is used as the soft frame pointer, if needed.
13894 Typical stack layout looks like this:
13896 old stack pointer -> | |
13899 | | saved arguments for
13900 | | vararg functions
13903 hard FP & arg pointer -> | | \
13911 soft frame pointer -> | | /
13916 locals base pointer -> | | /
13921 current stack pointer -> | | /
13924 For a given function some or all of these stack components
13925 may not be needed, giving rise to the possibility of
13926 eliminating some of the registers.
13928 The values returned by this function must reflect the behavior
13929 of arm_expand_prologue() and arm_compute_save_reg_mask().
13931 The sign of the number returned reflects the direction of stack
13932 growth, so the values are positive for all eliminations except
13933 from the soft frame pointer to the hard frame pointer.
13935 SFP may point just inside the local variables block to ensure correct
13939 /* Calculate stack offsets. These are used to calculate register elimination
13940 offsets and in prologue/epilogue code. Also calculates which registers
13941 should be saved. */
13943 static arm_stack_offsets *
13944 arm_get_frame_offsets (void)
13946 struct arm_stack_offsets *offsets;
13947 unsigned long func_type;
13951 HOST_WIDE_INT frame_size;
13954 offsets = &cfun->machine->stack_offsets;
13956 /* We need to know if we are a leaf function. Unfortunately, it
13957 is possible to be called after start_sequence has been called,
13958 which causes get_insns to return the insns for the sequence,
13959 not the function, which will cause leaf_function_p to return
13960 the incorrect result.
13962 to know about leaf functions once reload has completed, and the
13963 frame size cannot be changed after that time, so we can safely
13964 use the cached value. */
13966 if (reload_completed)
13969 /* Initially this is the size of the local variables. It will translated
13970 into an offset once we have determined the size of preceding data. */
13971 frame_size = ROUND_UP_WORD (get_frame_size ());
13973 leaf = leaf_function_p ();
13975 /* Space for variadic functions. */
13976 offsets->saved_args = crtl->args.pretend_args_size;
13978 /* In Thumb mode this is incorrect, but never used. */
13979 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
13980 arm_compute_static_chain_stack_bytes();
13984 unsigned int regno;
13986 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
13987 core_saved = bit_count (offsets->saved_regs_mask) * 4;
13988 saved = core_saved;
13990 /* We know that SP will be doubleword aligned on entry, and we must
13991 preserve that condition at any subroutine call. We also require the
13992 soft frame pointer to be doubleword aligned. */
13994 if (TARGET_REALLY_IWMMXT)
13996 /* Check for the call-saved iWMMXt registers. */
13997 for (regno = FIRST_IWMMXT_REGNUM;
13998 regno <= LAST_IWMMXT_REGNUM;
14000 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14004 func_type = arm_current_func_type ();
14005 if (! IS_VOLATILE (func_type))
14007 /* Space for saved FPA registers. */
14008 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14009 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14012 /* Space for saved VFP registers. */
14013 if (TARGET_HARD_FLOAT && TARGET_VFP)
14014 saved += arm_get_vfp_saved_size ();
14017 else /* TARGET_THUMB1 */
14019 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14020 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14021 saved = core_saved;
14022 if (TARGET_BACKTRACE)
14026 /* Saved registers include the stack frame. */
14027 offsets->saved_regs = offsets->saved_args + saved +
14028 arm_compute_static_chain_stack_bytes();
14029 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14030 /* A leaf function does not need any stack alignment if it has nothing
14032 if (leaf && frame_size == 0)
14034 offsets->outgoing_args = offsets->soft_frame;
14035 offsets->locals_base = offsets->soft_frame;
14039 /* Ensure SFP has the correct alignment. */
14040 if (ARM_DOUBLEWORD_ALIGN
14041 && (offsets->soft_frame & 7))
14043 offsets->soft_frame += 4;
14044 /* Try to align stack by pushing an extra reg. Don't bother doing this
14045 when there is a stack frame as the alignment will be rolled into
14046 the normal stack adjustment. */
14047 if (frame_size + crtl->outgoing_args_size == 0)
14051 /* If it is safe to use r3, then do so. This sometimes
14052 generates better code on Thumb-2 by avoiding the need to
14053 use 32-bit push/pop instructions. */
14054 if (!crtl->tail_call_emit
14055 && arm_size_return_regs () <= 12)
14060 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14062 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14071 offsets->saved_regs += 4;
14072 offsets->saved_regs_mask |= (1 << reg);
14077 offsets->locals_base = offsets->soft_frame + frame_size;
14078 offsets->outgoing_args = (offsets->locals_base
14079 + crtl->outgoing_args_size);
14081 if (ARM_DOUBLEWORD_ALIGN)
14083 /* Ensure SP remains doubleword aligned. */
14084 if (offsets->outgoing_args & 7)
14085 offsets->outgoing_args += 4;
14086 gcc_assert (!(offsets->outgoing_args & 7));
14093 /* Calculate the relative offsets for the different stack pointers. Positive
14094 offsets are in the direction of stack growth. */
14097 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14099 arm_stack_offsets *offsets;
14101 offsets = arm_get_frame_offsets ();
14103 /* OK, now we have enough information to compute the distances.
14104 There must be an entry in these switch tables for each pair
14105 of registers in ELIMINABLE_REGS, even if some of the entries
14106 seem to be redundant or useless. */
14109 case ARG_POINTER_REGNUM:
14112 case THUMB_HARD_FRAME_POINTER_REGNUM:
14115 case FRAME_POINTER_REGNUM:
14116 /* This is the reverse of the soft frame pointer
14117 to hard frame pointer elimination below. */
14118 return offsets->soft_frame - offsets->saved_args;
14120 case ARM_HARD_FRAME_POINTER_REGNUM:
14121 /* This is only non-zero in the case where the static chain register
14122 is stored above the frame. */
14123 return offsets->frame - offsets->saved_args - 4;
14125 case STACK_POINTER_REGNUM:
14126 /* If nothing has been pushed on the stack at all
14127 then this will return -4. This *is* correct! */
14128 return offsets->outgoing_args - (offsets->saved_args + 4);
14131 gcc_unreachable ();
14133 gcc_unreachable ();
14135 case FRAME_POINTER_REGNUM:
14138 case THUMB_HARD_FRAME_POINTER_REGNUM:
14141 case ARM_HARD_FRAME_POINTER_REGNUM:
14142 /* The hard frame pointer points to the top entry in the
14143 stack frame. The soft frame pointer to the bottom entry
14144 in the stack frame. If there is no stack frame at all,
14145 then they are identical. */
14147 return offsets->frame - offsets->soft_frame;
14149 case STACK_POINTER_REGNUM:
14150 return offsets->outgoing_args - offsets->soft_frame;
14153 gcc_unreachable ();
14155 gcc_unreachable ();
14158 /* You cannot eliminate from the stack pointer.
14159 In theory you could eliminate from the hard frame
14160 pointer to the stack pointer, but this will never
14161 happen, since if a stack frame is not needed the
14162 hard frame pointer will never be used. */
14163 gcc_unreachable ();
14167 /* Given FROM and TO register numbers, say whether this elimination is
14168 allowed. Frame pointer elimination is automatically handled.
14170 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14171 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14172 pointer, we must eliminate FRAME_POINTER_REGNUM into
14173 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14174 ARG_POINTER_REGNUM. */
14177 arm_can_eliminate (const int from, const int to)
14179 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14180 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14181 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14182 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14186 /* Emit RTL to save coprocessor registers on function entry. Returns the
14187 number of bytes pushed. */
14190 arm_save_coproc_regs(void)
14192 int saved_size = 0;
14194 unsigned start_reg;
14197 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14198 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14200 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14201 insn = gen_rtx_MEM (V2SImode, insn);
14202 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14203 RTX_FRAME_RELATED_P (insn) = 1;
14207 /* Save any floating point call-saved registers used by this
14209 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
14211 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14212 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14214 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14215 insn = gen_rtx_MEM (XFmode, insn);
14216 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14217 RTX_FRAME_RELATED_P (insn) = 1;
14223 start_reg = LAST_FPA_REGNUM;
14225 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14227 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14229 if (start_reg - reg == 3)
14231 insn = emit_sfm (reg, 4);
14232 RTX_FRAME_RELATED_P (insn) = 1;
14234 start_reg = reg - 1;
14239 if (start_reg != reg)
14241 insn = emit_sfm (reg + 1, start_reg - reg);
14242 RTX_FRAME_RELATED_P (insn) = 1;
14243 saved_size += (start_reg - reg) * 12;
14245 start_reg = reg - 1;
14249 if (start_reg != reg)
14251 insn = emit_sfm (reg + 1, start_reg - reg);
14252 saved_size += (start_reg - reg) * 12;
14253 RTX_FRAME_RELATED_P (insn) = 1;
14256 if (TARGET_HARD_FLOAT && TARGET_VFP)
14258 start_reg = FIRST_VFP_REGNUM;
14260 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14262 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14263 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14265 if (start_reg != reg)
14266 saved_size += vfp_emit_fstmd (start_reg,
14267 (reg - start_reg) / 2);
14268 start_reg = reg + 2;
14271 if (start_reg != reg)
14272 saved_size += vfp_emit_fstmd (start_reg,
14273 (reg - start_reg) / 2);
14279 /* Set the Thumb frame pointer from the stack pointer. */
14282 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14284 HOST_WIDE_INT amount;
14287 amount = offsets->outgoing_args - offsets->locals_base;
14289 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14290 stack_pointer_rtx, GEN_INT (amount)));
14293 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14294 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14295 expects the first two operands to be the same. */
14298 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14300 hard_frame_pointer_rtx));
14304 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14305 hard_frame_pointer_rtx,
14306 stack_pointer_rtx));
14308 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14309 plus_constant (stack_pointer_rtx, amount));
14310 RTX_FRAME_RELATED_P (dwarf) = 1;
14311 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14314 RTX_FRAME_RELATED_P (insn) = 1;
14317 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14320 arm_expand_prologue (void)
14325 unsigned long live_regs_mask;
14326 unsigned long func_type;
14328 int saved_pretend_args = 0;
14329 int saved_regs = 0;
14330 unsigned HOST_WIDE_INT args_to_push;
14331 arm_stack_offsets *offsets;
14333 func_type = arm_current_func_type ();
14335 /* Naked functions don't have prologues. */
14336 if (IS_NAKED (func_type))
14339 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14340 args_to_push = crtl->args.pretend_args_size;
14342 /* Compute which register we will have to save onto the stack. */
14343 offsets = arm_get_frame_offsets ();
14344 live_regs_mask = offsets->saved_regs_mask;
14346 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14348 if (IS_STACKALIGN (func_type))
14353 /* Handle a word-aligned stack pointer. We generate the following:
14358 <save and restore r0 in normal prologue/epilogue>
14362 The unwinder doesn't need to know about the stack realignment.
14363 Just tell it we saved SP in r0. */
14364 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14366 r0 = gen_rtx_REG (SImode, 0);
14367 r1 = gen_rtx_REG (SImode, 1);
14368 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14369 compiler won't choke. */
14370 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14371 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14372 insn = gen_movsi (r0, stack_pointer_rtx);
14373 RTX_FRAME_RELATED_P (insn) = 1;
14374 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14376 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14377 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14380 /* For APCS frames, if IP register is clobbered
14381 when creating frame, save that register in a special
14383 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14385 if (IS_INTERRUPT (func_type))
14387 /* Interrupt functions must not corrupt any registers.
14388 Creating a frame pointer however, corrupts the IP
14389 register, so we must push it first. */
14390 insn = emit_multi_reg_push (1 << IP_REGNUM);
14392 /* Do not set RTX_FRAME_RELATED_P on this insn.
14393 The dwarf stack unwinding code only wants to see one
14394 stack decrement per function, and this is not it. If
14395 this instruction is labeled as being part of the frame
14396 creation sequence then dwarf2out_frame_debug_expr will
14397 die when it encounters the assignment of IP to FP
14398 later on, since the use of SP here establishes SP as
14399 the CFA register and not IP.
14401 Anyway this instruction is not really part of the stack
14402 frame creation although it is part of the prologue. */
14404 else if (IS_NESTED (func_type))
14406 /* The Static chain register is the same as the IP register
14407 used as a scratch register during stack frame creation.
14408 To get around this need to find somewhere to store IP
14409 whilst the frame is being created. We try the following
14412 1. The last argument register.
14413 2. A slot on the stack above the frame. (This only
14414 works if the function is not a varargs function).
14415 3. Register r3, after pushing the argument registers
14418 Note - we only need to tell the dwarf2 backend about the SP
14419 adjustment in the second variant; the static chain register
14420 doesn't need to be unwound, as it doesn't contain a value
14421 inherited from the caller. */
14423 if (df_regs_ever_live_p (3) == false)
14424 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14425 else if (args_to_push == 0)
14429 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14432 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14433 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14436 /* Just tell the dwarf backend that we adjusted SP. */
14437 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14438 plus_constant (stack_pointer_rtx,
14440 RTX_FRAME_RELATED_P (insn) = 1;
14441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14445 /* Store the args on the stack. */
14446 if (cfun->machine->uses_anonymous_args)
14447 insn = emit_multi_reg_push
14448 ((0xf0 >> (args_to_push / 4)) & 0xf);
14451 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14452 GEN_INT (- args_to_push)));
14454 RTX_FRAME_RELATED_P (insn) = 1;
14456 saved_pretend_args = 1;
14457 fp_offset = args_to_push;
14460 /* Now reuse r3 to preserve IP. */
14461 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14465 insn = emit_set_insn (ip_rtx,
14466 plus_constant (stack_pointer_rtx, fp_offset));
14467 RTX_FRAME_RELATED_P (insn) = 1;
14472 /* Push the argument registers, or reserve space for them. */
14473 if (cfun->machine->uses_anonymous_args)
14474 insn = emit_multi_reg_push
14475 ((0xf0 >> (args_to_push / 4)) & 0xf);
14478 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14479 GEN_INT (- args_to_push)));
14480 RTX_FRAME_RELATED_P (insn) = 1;
14483 /* If this is an interrupt service routine, and the link register
14484 is going to be pushed, and we're not generating extra
14485 push of IP (needed when frame is needed and frame layout if apcs),
14486 subtracting four from LR now will mean that the function return
14487 can be done with a single instruction. */
14488 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14489 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14490 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14493 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14495 emit_set_insn (lr, plus_constant (lr, -4));
14498 if (live_regs_mask)
14500 saved_regs += bit_count (live_regs_mask) * 4;
14501 if (optimize_size && !frame_pointer_needed
14502 && saved_regs == offsets->saved_regs - offsets->saved_args)
14504 /* If no coprocessor registers are being pushed and we don't have
14505 to worry about a frame pointer then push extra registers to
14506 create the stack frame. This is done is a way that does not
14507 alter the frame layout, so is independent of the epilogue. */
14511 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14513 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14514 if (frame && n * 4 >= frame)
14517 live_regs_mask |= (1 << n) - 1;
14518 saved_regs += frame;
14521 insn = emit_multi_reg_push (live_regs_mask);
14522 RTX_FRAME_RELATED_P (insn) = 1;
14525 if (! IS_VOLATILE (func_type))
14526 saved_regs += arm_save_coproc_regs ();
14528 if (frame_pointer_needed && TARGET_ARM)
14530 /* Create the new frame pointer. */
14531 if (TARGET_APCS_FRAME)
14533 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14534 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14535 RTX_FRAME_RELATED_P (insn) = 1;
14537 if (IS_NESTED (func_type))
14539 /* Recover the static chain register. */
14540 if (!df_regs_ever_live_p (3)
14541 || saved_pretend_args)
14542 insn = gen_rtx_REG (SImode, 3);
14543 else /* if (crtl->args.pretend_args_size == 0) */
14545 insn = plus_constant (hard_frame_pointer_rtx, 4);
14546 insn = gen_frame_mem (SImode, insn);
14548 emit_set_insn (ip_rtx, insn);
14549 /* Add a USE to stop propagate_one_insn() from barfing. */
14550 emit_insn (gen_prologue_use (ip_rtx));
14555 insn = GEN_INT (saved_regs - 4);
14556 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14557 stack_pointer_rtx, insn));
14558 RTX_FRAME_RELATED_P (insn) = 1;
14562 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14564 /* This add can produce multiple insns for a large constant, so we
14565 need to get tricky. */
14566 rtx last = get_last_insn ();
14568 amount = GEN_INT (offsets->saved_args + saved_regs
14569 - offsets->outgoing_args);
14571 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14575 last = last ? NEXT_INSN (last) : get_insns ();
14576 RTX_FRAME_RELATED_P (last) = 1;
14578 while (last != insn);
14580 /* If the frame pointer is needed, emit a special barrier that
14581 will prevent the scheduler from moving stores to the frame
14582 before the stack adjustment. */
14583 if (frame_pointer_needed)
14584 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14585 hard_frame_pointer_rtx));
14589 if (frame_pointer_needed && TARGET_THUMB2)
14590 thumb_set_frame_pointer (offsets);
14592 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14594 unsigned long mask;
14596 mask = live_regs_mask;
14597 mask &= THUMB2_WORK_REGS;
14598 if (!IS_NESTED (func_type))
14599 mask |= (1 << IP_REGNUM);
14600 arm_load_pic_register (mask);
14603 /* If we are profiling, make sure no instructions are scheduled before
14604 the call to mcount. Similarly if the user has requested no
14605 scheduling in the prolog. Similarly if we want non-call exceptions
14606 using the EABI unwinder, to prevent faulting instructions from being
14607 swapped with a stack adjustment. */
14608 if (crtl->profile || !TARGET_SCHED_PROLOG
14609 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14610 emit_insn (gen_blockage ());
14612 /* If the link register is being kept alive, with the return address in it,
14613 then make sure that it does not get reused by the ce2 pass. */
14614 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14615 cfun->machine->lr_save_eliminated = 1;
14618 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14620 arm_print_condition (FILE *stream)
14622 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14624 /* Branch conversion is not implemented for Thumb-2. */
14627 output_operand_lossage ("predicated Thumb instruction");
14630 if (current_insn_predicate != NULL)
14632 output_operand_lossage
14633 ("predicated instruction in conditional sequence");
14637 fputs (arm_condition_codes[arm_current_cc], stream);
14639 else if (current_insn_predicate)
14641 enum arm_cond_code code;
14645 output_operand_lossage ("predicated Thumb instruction");
14649 code = get_arm_condition_code (current_insn_predicate);
14650 fputs (arm_condition_codes[code], stream);
14655 /* If CODE is 'd', then the X is a condition operand and the instruction
14656 should only be executed if the condition is true.
14657 if CODE is 'D', then the X is a condition operand and the instruction
14658 should only be executed if the condition is false: however, if the mode
14659 of the comparison is CCFPEmode, then always execute the instruction -- we
14660 do this because in these circumstances !GE does not necessarily imply LT;
14661 in these cases the instruction pattern will take care to make sure that
14662 an instruction containing %d will follow, thereby undoing the effects of
14663 doing this instruction unconditionally.
14664 If CODE is 'N' then X is a floating point operand that must be negated
14666 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14667 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14669 arm_print_operand (FILE *stream, rtx x, int code)
14674 fputs (ASM_COMMENT_START, stream);
14678 fputs (user_label_prefix, stream);
14682 fputs (REGISTER_PREFIX, stream);
14686 arm_print_condition (stream);
14690 /* Nothing in unified syntax, otherwise the current condition code. */
14691 if (!TARGET_UNIFIED_ASM)
14692 arm_print_condition (stream);
14696 /* The current condition code in unified syntax, otherwise nothing. */
14697 if (TARGET_UNIFIED_ASM)
14698 arm_print_condition (stream);
14702 /* The current condition code for a condition code setting instruction.
14703 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14704 if (TARGET_UNIFIED_ASM)
14706 fputc('s', stream);
14707 arm_print_condition (stream);
14711 arm_print_condition (stream);
14712 fputc('s', stream);
14717 /* If the instruction is conditionally executed then print
14718 the current condition code, otherwise print 's'. */
14719 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14720 if (current_insn_predicate)
14721 arm_print_condition (stream);
14723 fputc('s', stream);
14726 /* %# is a "break" sequence. It doesn't output anything, but is used to
14727 separate e.g. operand numbers from following text, if that text consists
14728 of further digits which we don't want to be part of the operand
14736 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14737 r = REAL_VALUE_NEGATE (r);
14738 fprintf (stream, "%s", fp_const_from_val (&r));
14742 /* An integer or symbol address without a preceding # sign. */
14744 switch (GET_CODE (x))
14747 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14751 output_addr_const (stream, x);
14755 gcc_unreachable ();
14760 if (GET_CODE (x) == CONST_INT)
14763 val = ARM_SIGN_EXTEND (~INTVAL (x));
14764 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14768 putc ('~', stream);
14769 output_addr_const (stream, x);
14774 /* The low 16 bits of an immediate constant. */
14775 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
14779 fprintf (stream, "%s", arithmetic_instr (x, 1));
14782 /* Truncate Cirrus shift counts. */
14784 if (GET_CODE (x) == CONST_INT)
14786 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
14789 arm_print_operand (stream, x, 0);
14793 fprintf (stream, "%s", arithmetic_instr (x, 0));
14801 if (!shift_operator (x, SImode))
14803 output_operand_lossage ("invalid shift operand");
14807 shift = shift_op (x, &val);
14811 fprintf (stream, ", %s ", shift);
14813 arm_print_operand (stream, XEXP (x, 1), 0);
14815 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
14820 /* An explanation of the 'Q', 'R' and 'H' register operands:
14822 In a pair of registers containing a DI or DF value the 'Q'
14823 operand returns the register number of the register containing
14824 the least significant part of the value. The 'R' operand returns
14825 the register number of the register containing the most
14826 significant part of the value.
14828 The 'H' operand returns the higher of the two register numbers.
14829 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
14830 same as the 'Q' operand, since the most significant part of the
14831 value is held in the lower number register. The reverse is true
14832 on systems where WORDS_BIG_ENDIAN is false.
14834 The purpose of these operands is to distinguish between cases
14835 where the endian-ness of the values is important (for example
14836 when they are added together), and cases where the endian-ness
14837 is irrelevant, but the order of register operations is important.
14838 For example when loading a value from memory into a register
14839 pair, the endian-ness does not matter. Provided that the value
14840 from the lower memory address is put into the lower numbered
14841 register, and the value from the higher address is put into the
14842 higher numbered register, the load will work regardless of whether
14843 the value being loaded is big-wordian or little-wordian. The
14844 order of the two register loads can matter however, if the address
14845 of the memory location is actually held in one of the registers
14846 being overwritten by the load. */
14848 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14850 output_operand_lossage ("invalid operand for code '%c'", code);
14854 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
14858 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14860 output_operand_lossage ("invalid operand for code '%c'", code);
14864 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
14868 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14870 output_operand_lossage ("invalid operand for code '%c'", code);
14874 asm_fprintf (stream, "%r", REGNO (x) + 1);
14878 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14880 output_operand_lossage ("invalid operand for code '%c'", code);
14884 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
14888 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14890 output_operand_lossage ("invalid operand for code '%c'", code);
14894 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
14898 asm_fprintf (stream, "%r",
14899 GET_CODE (XEXP (x, 0)) == REG
14900 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
14904 asm_fprintf (stream, "{%r-%r}",
14906 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
14909 /* Like 'M', but writing doubleword vector registers, for use by Neon
14913 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
14914 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
14916 asm_fprintf (stream, "{d%d}", regno);
14918 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
14923 /* CONST_TRUE_RTX means always -- that's the default. */
14924 if (x == const_true_rtx)
14927 if (!COMPARISON_P (x))
14929 output_operand_lossage ("invalid operand for code '%c'", code);
14933 fputs (arm_condition_codes[get_arm_condition_code (x)],
14938 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
14939 want to do that. */
14940 if (x == const_true_rtx)
14942 output_operand_lossage ("instruction never executed");
14945 if (!COMPARISON_P (x))
14947 output_operand_lossage ("invalid operand for code '%c'", code);
14951 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
14952 (get_arm_condition_code (x))],
14956 /* Cirrus registers can be accessed in a variety of ways:
14957 single floating point (f)
14958 double floating point (d)
14960 64bit integer (dx). */
14961 case 'W': /* Cirrus register in F mode. */
14962 case 'X': /* Cirrus register in D mode. */
14963 case 'Y': /* Cirrus register in FX mode. */
14964 case 'Z': /* Cirrus register in DX mode. */
14965 gcc_assert (GET_CODE (x) == REG
14966 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
14968 fprintf (stream, "mv%s%s",
14970 : code == 'X' ? "d"
14971 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
14975 /* Print cirrus register in the mode specified by the register's mode. */
14978 int mode = GET_MODE (x);
14980 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
14982 output_operand_lossage ("invalid operand for code '%c'", code);
14986 fprintf (stream, "mv%s%s",
14987 mode == DFmode ? "d"
14988 : mode == SImode ? "fx"
14989 : mode == DImode ? "dx"
14990 : "f", reg_names[REGNO (x)] + 2);
14996 if (GET_CODE (x) != REG
14997 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
14998 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
14999 /* Bad value for wCG register number. */
15001 output_operand_lossage ("invalid operand for code '%c'", code);
15006 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15009 /* Print an iWMMXt control register name. */
15011 if (GET_CODE (x) != CONST_INT
15013 || INTVAL (x) >= 16)
15014 /* Bad value for wC register number. */
15016 output_operand_lossage ("invalid operand for code '%c'", code);
15022 static const char * wc_reg_names [16] =
15024 "wCID", "wCon", "wCSSF", "wCASF",
15025 "wC4", "wC5", "wC6", "wC7",
15026 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15027 "wC12", "wC13", "wC14", "wC15"
15030 fprintf (stream, wc_reg_names [INTVAL (x)]);
15034 /* Print a VFP/Neon double precision or quad precision register name. */
15038 int mode = GET_MODE (x);
15039 int is_quad = (code == 'q');
15042 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15044 output_operand_lossage ("invalid operand for code '%c'", code);
15048 if (GET_CODE (x) != REG
15049 || !IS_VFP_REGNUM (REGNO (x)))
15051 output_operand_lossage ("invalid operand for code '%c'", code);
15056 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15057 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15059 output_operand_lossage ("invalid operand for code '%c'", code);
15063 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15064 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15068 /* These two codes print the low/high doubleword register of a Neon quad
15069 register, respectively. For pair-structure types, can also print
15070 low/high quadword registers. */
15074 int mode = GET_MODE (x);
15077 if ((GET_MODE_SIZE (mode) != 16
15078 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15080 output_operand_lossage ("invalid operand for code '%c'", code);
15085 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15087 output_operand_lossage ("invalid operand for code '%c'", code);
15091 if (GET_MODE_SIZE (mode) == 16)
15092 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15093 + (code == 'f' ? 1 : 0));
15095 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15096 + (code == 'f' ? 1 : 0));
15100 /* Print a VFPv3 floating-point constant, represented as an integer
15104 int index = vfp3_const_double_index (x);
15105 gcc_assert (index != -1);
15106 fprintf (stream, "%d", index);
15110 /* Print bits representing opcode features for Neon.
15112 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15113 and polynomials as unsigned.
15115 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15117 Bit 2 is 1 for rounding functions, 0 otherwise. */
15119 /* Identify the type as 's', 'u', 'p' or 'f'. */
15122 HOST_WIDE_INT bits = INTVAL (x);
15123 fputc ("uspf"[bits & 3], stream);
15127 /* Likewise, but signed and unsigned integers are both 'i'. */
15130 HOST_WIDE_INT bits = INTVAL (x);
15131 fputc ("iipf"[bits & 3], stream);
15135 /* As for 'T', but emit 'u' instead of 'p'. */
15138 HOST_WIDE_INT bits = INTVAL (x);
15139 fputc ("usuf"[bits & 3], stream);
15143 /* Bit 2: rounding (vs none). */
15146 HOST_WIDE_INT bits = INTVAL (x);
15147 fputs ((bits & 4) != 0 ? "r" : "", stream);
15151 /* Memory operand for vld1/vst1 instruction. */
15155 bool postinc = FALSE;
15156 gcc_assert (GET_CODE (x) == MEM);
15157 addr = XEXP (x, 0);
15158 if (GET_CODE (addr) == POST_INC)
15161 addr = XEXP (addr, 0);
15163 asm_fprintf (stream, "[%r]", REGNO (addr));
15165 fputs("!", stream);
15169 /* Register specifier for vld1.16/vst1.16. Translate the S register
15170 number into a D register number and element index. */
15173 int mode = GET_MODE (x);
15176 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15178 output_operand_lossage ("invalid operand for code '%c'", code);
15183 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15185 output_operand_lossage ("invalid operand for code '%c'", code);
15189 regno = regno - FIRST_VFP_REGNUM;
15190 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15197 output_operand_lossage ("missing operand");
15201 switch (GET_CODE (x))
15204 asm_fprintf (stream, "%r", REGNO (x));
15208 output_memory_reference_mode = GET_MODE (x);
15209 output_address (XEXP (x, 0));
15216 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15217 sizeof (fpstr), 0, 1);
15218 fprintf (stream, "#%s", fpstr);
15221 fprintf (stream, "#%s", fp_immediate_constant (x));
15225 gcc_assert (GET_CODE (x) != NEG);
15226 fputc ('#', stream);
15227 if (GET_CODE (x) == HIGH)
15229 fputs (":lower16:", stream);
15233 output_addr_const (stream, x);
15239 /* Target hook for assembling integer objects. The ARM version needs to
15240 handle word-sized values specially. */
15242 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15244 enum machine_mode mode;
15246 if (size == UNITS_PER_WORD && aligned_p)
15248 fputs ("\t.word\t", asm_out_file);
15249 output_addr_const (asm_out_file, x);
15251 /* Mark symbols as position independent. We only do this in the
15252 .text segment, not in the .data segment. */
15253 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15254 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15256 /* See legitimize_pic_address for an explanation of the
15257 TARGET_VXWORKS_RTP check. */
15258 if (TARGET_VXWORKS_RTP
15259 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15260 fputs ("(GOT)", asm_out_file);
15262 fputs ("(GOTOFF)", asm_out_file);
15264 fputc ('\n', asm_out_file);
15268 mode = GET_MODE (x);
15270 if (arm_vector_mode_supported_p (mode))
15274 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15276 units = CONST_VECTOR_NUNITS (x);
15277 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15279 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15280 for (i = 0; i < units; i++)
15282 rtx elt = CONST_VECTOR_ELT (x, i);
15284 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15287 for (i = 0; i < units; i++)
15289 rtx elt = CONST_VECTOR_ELT (x, i);
15290 REAL_VALUE_TYPE rval;
15292 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15295 (rval, GET_MODE_INNER (mode),
15296 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15302 return default_assemble_integer (x, size, aligned_p);
15306 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15310 if (!TARGET_AAPCS_BASED)
15313 default_named_section_asm_out_constructor
15314 : default_named_section_asm_out_destructor) (symbol, priority);
15318 /* Put these in the .init_array section, using a special relocation. */
15319 if (priority != DEFAULT_INIT_PRIORITY)
15322 sprintf (buf, "%s.%.5u",
15323 is_ctor ? ".init_array" : ".fini_array",
15325 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15332 switch_to_section (s);
15333 assemble_align (POINTER_SIZE);
15334 fputs ("\t.word\t", asm_out_file);
15335 output_addr_const (asm_out_file, symbol);
15336 fputs ("(target1)\n", asm_out_file);
15339 /* Add a function to the list of static constructors. */
15342 arm_elf_asm_constructor (rtx symbol, int priority)
15344 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15347 /* Add a function to the list of static destructors. */
15350 arm_elf_asm_destructor (rtx symbol, int priority)
15352 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15355 /* A finite state machine takes care of noticing whether or not instructions
15356 can be conditionally executed, and thus decrease execution time and code
15357 size by deleting branch instructions. The fsm is controlled by
15358 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15360 /* The state of the fsm controlling condition codes are:
15361 0: normal, do nothing special
15362 1: make ASM_OUTPUT_OPCODE not output this instruction
15363 2: make ASM_OUTPUT_OPCODE not output this instruction
15364 3: make instructions conditional
15365 4: make instructions conditional
15367 State transitions (state->state by whom under condition):
15368 0 -> 1 final_prescan_insn if the `target' is a label
15369 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15370 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15371 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15372 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15373 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15374 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15375 (the target insn is arm_target_insn).
15377 If the jump clobbers the conditions then we use states 2 and 4.
15379 A similar thing can be done with conditional return insns.
15381 XXX In case the `target' is an unconditional branch, this conditionalising
15382 of the instructions always reduces code size, but not always execution
15383 time. But then, I want to reduce the code size to somewhere near what
15384 /bin/cc produces. */
15386 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15387 instructions. When a COND_EXEC instruction is seen the subsequent
15388 instructions are scanned so that multiple conditional instructions can be
15389 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15390 specify the length and true/false mask for the IT block. These will be
15391 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15393 /* Returns the index of the ARM condition code string in
15394 `arm_condition_codes'. COMPARISON should be an rtx like
15395 `(eq (...) (...))'. */
15396 static enum arm_cond_code
15397 get_arm_condition_code (rtx comparison)
15399 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15400 enum arm_cond_code code;
15401 enum rtx_code comp_code = GET_CODE (comparison);
15403 if (GET_MODE_CLASS (mode) != MODE_CC)
15404 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15405 XEXP (comparison, 1));
15409 case CC_DNEmode: code = ARM_NE; goto dominance;
15410 case CC_DEQmode: code = ARM_EQ; goto dominance;
15411 case CC_DGEmode: code = ARM_GE; goto dominance;
15412 case CC_DGTmode: code = ARM_GT; goto dominance;
15413 case CC_DLEmode: code = ARM_LE; goto dominance;
15414 case CC_DLTmode: code = ARM_LT; goto dominance;
15415 case CC_DGEUmode: code = ARM_CS; goto dominance;
15416 case CC_DGTUmode: code = ARM_HI; goto dominance;
15417 case CC_DLEUmode: code = ARM_LS; goto dominance;
15418 case CC_DLTUmode: code = ARM_CC;
15421 gcc_assert (comp_code == EQ || comp_code == NE);
15423 if (comp_code == EQ)
15424 return ARM_INVERSE_CONDITION_CODE (code);
15430 case NE: return ARM_NE;
15431 case EQ: return ARM_EQ;
15432 case GE: return ARM_PL;
15433 case LT: return ARM_MI;
15434 default: gcc_unreachable ();
15440 case NE: return ARM_NE;
15441 case EQ: return ARM_EQ;
15442 default: gcc_unreachable ();
15448 case NE: return ARM_MI;
15449 case EQ: return ARM_PL;
15450 default: gcc_unreachable ();
15455 /* These encodings assume that AC=1 in the FPA system control
15456 byte. This allows us to handle all cases except UNEQ and
15460 case GE: return ARM_GE;
15461 case GT: return ARM_GT;
15462 case LE: return ARM_LS;
15463 case LT: return ARM_MI;
15464 case NE: return ARM_NE;
15465 case EQ: return ARM_EQ;
15466 case ORDERED: return ARM_VC;
15467 case UNORDERED: return ARM_VS;
15468 case UNLT: return ARM_LT;
15469 case UNLE: return ARM_LE;
15470 case UNGT: return ARM_HI;
15471 case UNGE: return ARM_PL;
15472 /* UNEQ and LTGT do not have a representation. */
15473 case UNEQ: /* Fall through. */
15474 case LTGT: /* Fall through. */
15475 default: gcc_unreachable ();
15481 case NE: return ARM_NE;
15482 case EQ: return ARM_EQ;
15483 case GE: return ARM_LE;
15484 case GT: return ARM_LT;
15485 case LE: return ARM_GE;
15486 case LT: return ARM_GT;
15487 case GEU: return ARM_LS;
15488 case GTU: return ARM_CC;
15489 case LEU: return ARM_CS;
15490 case LTU: return ARM_HI;
15491 default: gcc_unreachable ();
15497 case LTU: return ARM_CS;
15498 case GEU: return ARM_CC;
15499 default: gcc_unreachable ();
15505 case NE: return ARM_NE;
15506 case EQ: return ARM_EQ;
15507 case GE: return ARM_GE;
15508 case GT: return ARM_GT;
15509 case LE: return ARM_LE;
15510 case LT: return ARM_LT;
15511 case GEU: return ARM_CS;
15512 case GTU: return ARM_HI;
15513 case LEU: return ARM_LS;
15514 case LTU: return ARM_CC;
15515 default: gcc_unreachable ();
15518 default: gcc_unreachable ();
15522 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15525 thumb2_final_prescan_insn (rtx insn)
15527 rtx first_insn = insn;
15528 rtx body = PATTERN (insn);
15530 enum arm_cond_code code;
15534 /* Remove the previous insn from the count of insns to be output. */
15535 if (arm_condexec_count)
15536 arm_condexec_count--;
15538 /* Nothing to do if we are already inside a conditional block. */
15539 if (arm_condexec_count)
15542 if (GET_CODE (body) != COND_EXEC)
15545 /* Conditional jumps are implemented directly. */
15546 if (GET_CODE (insn) == JUMP_INSN)
15549 predicate = COND_EXEC_TEST (body);
15550 arm_current_cc = get_arm_condition_code (predicate);
15552 n = get_attr_ce_count (insn);
15553 arm_condexec_count = 1;
15554 arm_condexec_mask = (1 << n) - 1;
15555 arm_condexec_masklen = n;
15556 /* See if subsequent instructions can be combined into the same block. */
15559 insn = next_nonnote_insn (insn);
15561 /* Jumping into the middle of an IT block is illegal, so a label or
15562 barrier terminates the block. */
15563 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15566 body = PATTERN (insn);
15567 /* USE and CLOBBER aren't really insns, so just skip them. */
15568 if (GET_CODE (body) == USE
15569 || GET_CODE (body) == CLOBBER)
15572 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15573 if (GET_CODE (body) != COND_EXEC)
15575 /* Allow up to 4 conditionally executed instructions in a block. */
15576 n = get_attr_ce_count (insn);
15577 if (arm_condexec_masklen + n > 4)
15580 predicate = COND_EXEC_TEST (body);
15581 code = get_arm_condition_code (predicate);
15582 mask = (1 << n) - 1;
15583 if (arm_current_cc == code)
15584 arm_condexec_mask |= (mask << arm_condexec_masklen);
15585 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15588 arm_condexec_count++;
15589 arm_condexec_masklen += n;
15591 /* A jump must be the last instruction in a conditional block. */
15592 if (GET_CODE(insn) == JUMP_INSN)
15595 /* Restore recog_data (getting the attributes of other insns can
15596 destroy this array, but final.c assumes that it remains intact
15597 across this call). */
15598 extract_constrain_insn_cached (first_insn);
15602 arm_final_prescan_insn (rtx insn)
15604 /* BODY will hold the body of INSN. */
15605 rtx body = PATTERN (insn);
15607 /* This will be 1 if trying to repeat the trick, and things need to be
15608 reversed if it appears to fail. */
15611 /* If we start with a return insn, we only succeed if we find another one. */
15612 int seeking_return = 0;
15614 /* START_INSN will hold the insn from where we start looking. This is the
15615 first insn after the following code_label if REVERSE is true. */
15616 rtx start_insn = insn;
15618 /* If in state 4, check if the target branch is reached, in order to
15619 change back to state 0. */
15620 if (arm_ccfsm_state == 4)
15622 if (insn == arm_target_insn)
15624 arm_target_insn = NULL;
15625 arm_ccfsm_state = 0;
15630 /* If in state 3, it is possible to repeat the trick, if this insn is an
15631 unconditional branch to a label, and immediately following this branch
15632 is the previous target label which is only used once, and the label this
15633 branch jumps to is not too far off. */
15634 if (arm_ccfsm_state == 3)
15636 if (simplejump_p (insn))
15638 start_insn = next_nonnote_insn (start_insn);
15639 if (GET_CODE (start_insn) == BARRIER)
15641 /* XXX Isn't this always a barrier? */
15642 start_insn = next_nonnote_insn (start_insn);
15644 if (GET_CODE (start_insn) == CODE_LABEL
15645 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15646 && LABEL_NUSES (start_insn) == 1)
15651 else if (GET_CODE (body) == RETURN)
15653 start_insn = next_nonnote_insn (start_insn);
15654 if (GET_CODE (start_insn) == BARRIER)
15655 start_insn = next_nonnote_insn (start_insn);
15656 if (GET_CODE (start_insn) == CODE_LABEL
15657 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15658 && LABEL_NUSES (start_insn) == 1)
15661 seeking_return = 1;
15670 gcc_assert (!arm_ccfsm_state || reverse);
15671 if (GET_CODE (insn) != JUMP_INSN)
15674 /* This jump might be paralleled with a clobber of the condition codes
15675 the jump should always come first */
15676 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15677 body = XVECEXP (body, 0, 0);
15680 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15681 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15684 int fail = FALSE, succeed = FALSE;
15685 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15686 int then_not_else = TRUE;
15687 rtx this_insn = start_insn, label = 0;
15689 /* Register the insn jumped to. */
15692 if (!seeking_return)
15693 label = XEXP (SET_SRC (body), 0);
15695 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15696 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15697 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15699 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15700 then_not_else = FALSE;
15702 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15703 seeking_return = 1;
15704 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15706 seeking_return = 1;
15707 then_not_else = FALSE;
15710 gcc_unreachable ();
15712 /* See how many insns this branch skips, and what kind of insns. If all
15713 insns are okay, and the label or unconditional branch to the same
15714 label is not too far away, succeed. */
15715 for (insns_skipped = 0;
15716 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15720 this_insn = next_nonnote_insn (this_insn);
15724 switch (GET_CODE (this_insn))
15727 /* Succeed if it is the target label, otherwise fail since
15728 control falls in from somewhere else. */
15729 if (this_insn == label)
15731 arm_ccfsm_state = 1;
15739 /* Succeed if the following insn is the target label.
15741 If return insns are used then the last insn in a function
15742 will be a barrier. */
15743 this_insn = next_nonnote_insn (this_insn);
15744 if (this_insn && this_insn == label)
15746 arm_ccfsm_state = 1;
15754 /* The AAPCS says that conditional calls should not be
15755 used since they make interworking inefficient (the
15756 linker can't transform BL<cond> into BLX). That's
15757 only a problem if the machine has BLX. */
15764 /* Succeed if the following insn is the target label, or
15765 if the following two insns are a barrier and the
15767 this_insn = next_nonnote_insn (this_insn);
15768 if (this_insn && GET_CODE (this_insn) == BARRIER)
15769 this_insn = next_nonnote_insn (this_insn);
15771 if (this_insn && this_insn == label
15772 && insns_skipped < max_insns_skipped)
15774 arm_ccfsm_state = 1;
15782 /* If this is an unconditional branch to the same label, succeed.
15783 If it is to another label, do nothing. If it is conditional,
15785 /* XXX Probably, the tests for SET and the PC are
15788 scanbody = PATTERN (this_insn);
15789 if (GET_CODE (scanbody) == SET
15790 && GET_CODE (SET_DEST (scanbody)) == PC)
15792 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
15793 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
15795 arm_ccfsm_state = 2;
15798 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
15801 /* Fail if a conditional return is undesirable (e.g. on a
15802 StrongARM), but still allow this if optimizing for size. */
15803 else if (GET_CODE (scanbody) == RETURN
15804 && !use_return_insn (TRUE, NULL)
15807 else if (GET_CODE (scanbody) == RETURN
15810 arm_ccfsm_state = 2;
15813 else if (GET_CODE (scanbody) == PARALLEL)
15815 switch (get_attr_conds (this_insn))
15825 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
15830 /* Instructions using or affecting the condition codes make it
15832 scanbody = PATTERN (this_insn);
15833 if (!(GET_CODE (scanbody) == SET
15834 || GET_CODE (scanbody) == PARALLEL)
15835 || get_attr_conds (this_insn) != CONDS_NOCOND)
15838 /* A conditional cirrus instruction must be followed by
15839 a non Cirrus instruction. However, since we
15840 conditionalize instructions in this function and by
15841 the time we get here we can't add instructions
15842 (nops), because shorten_branches() has already been
15843 called, we will disable conditionalizing Cirrus
15844 instructions to be safe. */
15845 if (GET_CODE (scanbody) != USE
15846 && GET_CODE (scanbody) != CLOBBER
15847 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
15857 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
15858 arm_target_label = CODE_LABEL_NUMBER (label);
15861 gcc_assert (seeking_return || arm_ccfsm_state == 2);
15863 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
15865 this_insn = next_nonnote_insn (this_insn);
15866 gcc_assert (!this_insn
15867 || (GET_CODE (this_insn) != BARRIER
15868 && GET_CODE (this_insn) != CODE_LABEL));
15872 /* Oh, dear! we ran off the end.. give up. */
15873 extract_constrain_insn_cached (insn);
15874 arm_ccfsm_state = 0;
15875 arm_target_insn = NULL;
15878 arm_target_insn = this_insn;
15881 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
15884 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
15886 if (reverse || then_not_else)
15887 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
15890 /* Restore recog_data (getting the attributes of other insns can
15891 destroy this array, but final.c assumes that it remains intact
15892 across this call. */
15893 extract_constrain_insn_cached (insn);
15897 /* Output IT instructions. */
15899 thumb2_asm_output_opcode (FILE * stream)
15904 if (arm_condexec_mask)
15906 for (n = 0; n < arm_condexec_masklen; n++)
15907 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
15909 asm_fprintf(stream, "i%s\t%s\n\t", buff,
15910 arm_condition_codes[arm_current_cc]);
15911 arm_condexec_mask = 0;
15915 /* Returns true if REGNO is a valid register
15916 for holding a quantity of type MODE. */
15918 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
15920 if (GET_MODE_CLASS (mode) == MODE_CC)
15921 return (regno == CC_REGNUM
15922 || (TARGET_HARD_FLOAT && TARGET_VFP
15923 && regno == VFPCC_REGNUM));
15926 /* For the Thumb we only allow values bigger than SImode in
15927 registers 0 - 6, so that there is always a second low
15928 register available to hold the upper part of the value.
15929 We probably we ought to ensure that the register is the
15930 start of an even numbered register pair. */
15931 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
15933 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
15934 && IS_CIRRUS_REGNUM (regno))
15935 /* We have outlawed SI values in Cirrus registers because they
15936 reside in the lower 32 bits, but SF values reside in the
15937 upper 32 bits. This causes gcc all sorts of grief. We can't
15938 even split the registers into pairs because Cirrus SI values
15939 get sign extended to 64bits-- aldyh. */
15940 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
15942 if (TARGET_HARD_FLOAT && TARGET_VFP
15943 && IS_VFP_REGNUM (regno))
15945 if (mode == SFmode || mode == SImode)
15946 return VFP_REGNO_OK_FOR_SINGLE (regno);
15948 if (mode == DFmode)
15949 return VFP_REGNO_OK_FOR_DOUBLE (regno);
15951 /* VFP registers can hold HFmode values, but there is no point in
15952 putting them there unless we have the NEON extensions for
15953 loading/storing them, too. */
15954 if (mode == HFmode)
15955 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
15958 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
15959 || (VALID_NEON_QREG_MODE (mode)
15960 && NEON_REGNO_OK_FOR_QUAD (regno))
15961 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
15962 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
15963 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
15964 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
15965 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
15970 if (TARGET_REALLY_IWMMXT)
15972 if (IS_IWMMXT_GR_REGNUM (regno))
15973 return mode == SImode;
15975 if (IS_IWMMXT_REGNUM (regno))
15976 return VALID_IWMMXT_REG_MODE (mode);
15979 /* We allow almost any value to be stored in the general registers.
15980 Restrict doubleword quantities to even register pairs so that we can
15981 use ldrd. Do not allow very large Neon structure opaque modes in
15982 general registers; they would use too many. */
15983 if (regno <= LAST_ARM_REGNUM)
15984 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
15985 && ARM_NUM_REGS (mode) <= 4;
15987 if (regno == FRAME_POINTER_REGNUM
15988 || regno == ARG_POINTER_REGNUM)
15989 /* We only allow integers in the fake hard registers. */
15990 return GET_MODE_CLASS (mode) == MODE_INT;
15992 /* The only registers left are the FPA registers
15993 which we only allow to hold FP values. */
15994 return (TARGET_HARD_FLOAT && TARGET_FPA
15995 && GET_MODE_CLASS (mode) == MODE_FLOAT
15996 && regno >= FIRST_FPA_REGNUM
15997 && regno <= LAST_FPA_REGNUM);
16000 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16001 not used in arm mode. */
16004 arm_regno_class (int regno)
16008 if (regno == STACK_POINTER_REGNUM)
16010 if (regno == CC_REGNUM)
16017 if (TARGET_THUMB2 && regno < 8)
16020 if ( regno <= LAST_ARM_REGNUM
16021 || regno == FRAME_POINTER_REGNUM
16022 || regno == ARG_POINTER_REGNUM)
16023 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16025 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16026 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16028 if (IS_CIRRUS_REGNUM (regno))
16029 return CIRRUS_REGS;
16031 if (IS_VFP_REGNUM (regno))
16033 if (regno <= D7_VFP_REGNUM)
16034 return VFP_D0_D7_REGS;
16035 else if (regno <= LAST_LO_VFP_REGNUM)
16036 return VFP_LO_REGS;
16038 return VFP_HI_REGS;
16041 if (IS_IWMMXT_REGNUM (regno))
16042 return IWMMXT_REGS;
16044 if (IS_IWMMXT_GR_REGNUM (regno))
16045 return IWMMXT_GR_REGS;
16050 /* Handle a special case when computing the offset
16051 of an argument from the frame pointer. */
16053 arm_debugger_arg_offset (int value, rtx addr)
16057 /* We are only interested if dbxout_parms() failed to compute the offset. */
16061 /* We can only cope with the case where the address is held in a register. */
16062 if (GET_CODE (addr) != REG)
16065 /* If we are using the frame pointer to point at the argument, then
16066 an offset of 0 is correct. */
16067 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16070 /* If we are using the stack pointer to point at the
16071 argument, then an offset of 0 is correct. */
16072 /* ??? Check this is consistent with thumb2 frame layout. */
16073 if ((TARGET_THUMB || !frame_pointer_needed)
16074 && REGNO (addr) == SP_REGNUM)
16077 /* Oh dear. The argument is pointed to by a register rather
16078 than being held in a register, or being stored at a known
16079 offset from the frame pointer. Since GDB only understands
16080 those two kinds of argument we must translate the address
16081 held in the register into an offset from the frame pointer.
16082 We do this by searching through the insns for the function
16083 looking to see where this register gets its value. If the
16084 register is initialized from the frame pointer plus an offset
16085 then we are in luck and we can continue, otherwise we give up.
16087 This code is exercised by producing debugging information
16088 for a function with arguments like this:
16090 double func (double a, double b, int c, double d) {return d;}
16092 Without this code the stab for parameter 'd' will be set to
16093 an offset of 0 from the frame pointer, rather than 8. */
16095 /* The if() statement says:
16097 If the insn is a normal instruction
16098 and if the insn is setting the value in a register
16099 and if the register being set is the register holding the address of the argument
16100 and if the address is computing by an addition
16101 that involves adding to a register
16102 which is the frame pointer
16107 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16109 if ( GET_CODE (insn) == INSN
16110 && GET_CODE (PATTERN (insn)) == SET
16111 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16112 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16113 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16114 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16115 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16118 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16127 warning (0, "unable to compute real location of stacked parameter");
16128 value = 8; /* XXX magic hack */
16134 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16137 if ((MASK) & insn_flags) \
16138 add_builtin_function ((NAME), (TYPE), (CODE), \
16139 BUILT_IN_MD, NULL, NULL_TREE); \
16143 struct builtin_description
16145 const unsigned int mask;
16146 const enum insn_code icode;
16147 const char * const name;
16148 const enum arm_builtins code;
16149 const enum rtx_code comparison;
16150 const unsigned int flag;
16153 static const struct builtin_description bdesc_2arg[] =
16155 #define IWMMXT_BUILTIN(code, string, builtin) \
16156 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16157 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16159 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16160 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16161 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16162 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16163 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16164 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16165 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16166 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16167 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16168 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16169 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16170 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16171 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16172 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16173 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16174 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16175 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16176 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16177 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16178 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16179 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16180 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16181 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16182 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16183 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16184 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16185 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16186 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16187 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16188 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16189 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16190 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16191 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16192 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16193 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16194 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16195 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16196 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16197 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16198 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16199 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16200 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16201 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16202 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16203 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16204 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16205 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16206 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16207 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16208 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16209 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16210 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16211 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16212 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16213 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16214 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16215 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16216 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16218 #define IWMMXT_BUILTIN2(code, builtin) \
16219 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16221 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16222 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16223 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16224 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16225 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16226 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16227 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16228 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16229 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16230 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16231 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16232 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16233 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16234 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16235 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16236 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16237 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16238 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16239 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16240 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16241 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16242 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16243 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16244 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16245 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16246 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16247 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16248 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16249 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16250 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16251 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16252 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16255 static const struct builtin_description bdesc_1arg[] =
16257 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16258 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16259 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16260 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16261 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16262 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16263 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16264 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16265 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16266 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16267 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16268 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16269 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16270 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16271 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16272 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16273 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16274 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16277 /* Set up all the iWMMXt builtins. This is
16278 not called if TARGET_IWMMXT is zero. */
16281 arm_init_iwmmxt_builtins (void)
16283 const struct builtin_description * d;
16285 tree endlink = void_list_node;
16287 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16288 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16289 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16292 = build_function_type (integer_type_node,
16293 tree_cons (NULL_TREE, integer_type_node, endlink));
16294 tree v8qi_ftype_v8qi_v8qi_int
16295 = build_function_type (V8QI_type_node,
16296 tree_cons (NULL_TREE, V8QI_type_node,
16297 tree_cons (NULL_TREE, V8QI_type_node,
16298 tree_cons (NULL_TREE,
16301 tree v4hi_ftype_v4hi_int
16302 = build_function_type (V4HI_type_node,
16303 tree_cons (NULL_TREE, V4HI_type_node,
16304 tree_cons (NULL_TREE, integer_type_node,
16306 tree v2si_ftype_v2si_int
16307 = build_function_type (V2SI_type_node,
16308 tree_cons (NULL_TREE, V2SI_type_node,
16309 tree_cons (NULL_TREE, integer_type_node,
16311 tree v2si_ftype_di_di
16312 = build_function_type (V2SI_type_node,
16313 tree_cons (NULL_TREE, long_long_integer_type_node,
16314 tree_cons (NULL_TREE, long_long_integer_type_node,
16316 tree di_ftype_di_int
16317 = build_function_type (long_long_integer_type_node,
16318 tree_cons (NULL_TREE, long_long_integer_type_node,
16319 tree_cons (NULL_TREE, integer_type_node,
16321 tree di_ftype_di_int_int
16322 = build_function_type (long_long_integer_type_node,
16323 tree_cons (NULL_TREE, long_long_integer_type_node,
16324 tree_cons (NULL_TREE, integer_type_node,
16325 tree_cons (NULL_TREE,
16328 tree int_ftype_v8qi
16329 = build_function_type (integer_type_node,
16330 tree_cons (NULL_TREE, V8QI_type_node,
16332 tree int_ftype_v4hi
16333 = build_function_type (integer_type_node,
16334 tree_cons (NULL_TREE, V4HI_type_node,
16336 tree int_ftype_v2si
16337 = build_function_type (integer_type_node,
16338 tree_cons (NULL_TREE, V2SI_type_node,
16340 tree int_ftype_v8qi_int
16341 = build_function_type (integer_type_node,
16342 tree_cons (NULL_TREE, V8QI_type_node,
16343 tree_cons (NULL_TREE, integer_type_node,
16345 tree int_ftype_v4hi_int
16346 = build_function_type (integer_type_node,
16347 tree_cons (NULL_TREE, V4HI_type_node,
16348 tree_cons (NULL_TREE, integer_type_node,
16350 tree int_ftype_v2si_int
16351 = build_function_type (integer_type_node,
16352 tree_cons (NULL_TREE, V2SI_type_node,
16353 tree_cons (NULL_TREE, integer_type_node,
16355 tree v8qi_ftype_v8qi_int_int
16356 = build_function_type (V8QI_type_node,
16357 tree_cons (NULL_TREE, V8QI_type_node,
16358 tree_cons (NULL_TREE, integer_type_node,
16359 tree_cons (NULL_TREE,
16362 tree v4hi_ftype_v4hi_int_int
16363 = build_function_type (V4HI_type_node,
16364 tree_cons (NULL_TREE, V4HI_type_node,
16365 tree_cons (NULL_TREE, integer_type_node,
16366 tree_cons (NULL_TREE,
16369 tree v2si_ftype_v2si_int_int
16370 = build_function_type (V2SI_type_node,
16371 tree_cons (NULL_TREE, V2SI_type_node,
16372 tree_cons (NULL_TREE, integer_type_node,
16373 tree_cons (NULL_TREE,
16376 /* Miscellaneous. */
16377 tree v8qi_ftype_v4hi_v4hi
16378 = build_function_type (V8QI_type_node,
16379 tree_cons (NULL_TREE, V4HI_type_node,
16380 tree_cons (NULL_TREE, V4HI_type_node,
16382 tree v4hi_ftype_v2si_v2si
16383 = build_function_type (V4HI_type_node,
16384 tree_cons (NULL_TREE, V2SI_type_node,
16385 tree_cons (NULL_TREE, V2SI_type_node,
16387 tree v2si_ftype_v4hi_v4hi
16388 = build_function_type (V2SI_type_node,
16389 tree_cons (NULL_TREE, V4HI_type_node,
16390 tree_cons (NULL_TREE, V4HI_type_node,
16392 tree v2si_ftype_v8qi_v8qi
16393 = build_function_type (V2SI_type_node,
16394 tree_cons (NULL_TREE, V8QI_type_node,
16395 tree_cons (NULL_TREE, V8QI_type_node,
16397 tree v4hi_ftype_v4hi_di
16398 = build_function_type (V4HI_type_node,
16399 tree_cons (NULL_TREE, V4HI_type_node,
16400 tree_cons (NULL_TREE,
16401 long_long_integer_type_node,
16403 tree v2si_ftype_v2si_di
16404 = build_function_type (V2SI_type_node,
16405 tree_cons (NULL_TREE, V2SI_type_node,
16406 tree_cons (NULL_TREE,
16407 long_long_integer_type_node,
16409 tree void_ftype_int_int
16410 = build_function_type (void_type_node,
16411 tree_cons (NULL_TREE, integer_type_node,
16412 tree_cons (NULL_TREE, integer_type_node,
16415 = build_function_type (long_long_unsigned_type_node, endlink);
16417 = build_function_type (long_long_integer_type_node,
16418 tree_cons (NULL_TREE, V8QI_type_node,
16421 = build_function_type (long_long_integer_type_node,
16422 tree_cons (NULL_TREE, V4HI_type_node,
16425 = build_function_type (long_long_integer_type_node,
16426 tree_cons (NULL_TREE, V2SI_type_node,
16428 tree v2si_ftype_v4hi
16429 = build_function_type (V2SI_type_node,
16430 tree_cons (NULL_TREE, V4HI_type_node,
16432 tree v4hi_ftype_v8qi
16433 = build_function_type (V4HI_type_node,
16434 tree_cons (NULL_TREE, V8QI_type_node,
16437 tree di_ftype_di_v4hi_v4hi
16438 = build_function_type (long_long_unsigned_type_node,
16439 tree_cons (NULL_TREE,
16440 long_long_unsigned_type_node,
16441 tree_cons (NULL_TREE, V4HI_type_node,
16442 tree_cons (NULL_TREE,
16446 tree di_ftype_v4hi_v4hi
16447 = build_function_type (long_long_unsigned_type_node,
16448 tree_cons (NULL_TREE, V4HI_type_node,
16449 tree_cons (NULL_TREE, V4HI_type_node,
16452 /* Normal vector binops. */
16453 tree v8qi_ftype_v8qi_v8qi
16454 = build_function_type (V8QI_type_node,
16455 tree_cons (NULL_TREE, V8QI_type_node,
16456 tree_cons (NULL_TREE, V8QI_type_node,
16458 tree v4hi_ftype_v4hi_v4hi
16459 = build_function_type (V4HI_type_node,
16460 tree_cons (NULL_TREE, V4HI_type_node,
16461 tree_cons (NULL_TREE, V4HI_type_node,
16463 tree v2si_ftype_v2si_v2si
16464 = build_function_type (V2SI_type_node,
16465 tree_cons (NULL_TREE, V2SI_type_node,
16466 tree_cons (NULL_TREE, V2SI_type_node,
16468 tree di_ftype_di_di
16469 = build_function_type (long_long_unsigned_type_node,
16470 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16471 tree_cons (NULL_TREE,
16472 long_long_unsigned_type_node,
16475 /* Add all builtins that are more or less simple operations on two
16477 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16479 /* Use one of the operands; the target can have a different mode for
16480 mask-generating compares. */
16481 enum machine_mode mode;
16487 mode = insn_data[d->icode].operand[1].mode;
16492 type = v8qi_ftype_v8qi_v8qi;
16495 type = v4hi_ftype_v4hi_v4hi;
16498 type = v2si_ftype_v2si_v2si;
16501 type = di_ftype_di_di;
16505 gcc_unreachable ();
16508 def_mbuiltin (d->mask, d->name, type, d->code);
16511 /* Add the remaining MMX insns with somewhat more complicated types. */
16512 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16516 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16520 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16521 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16523 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16524 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16525 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16526 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16532 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16534 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16535 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16537 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16538 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16539 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16540 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16541 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16542 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16544 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16546 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16547 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16548 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16551 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16552 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16553 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16554 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16555 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16556 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16557 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16558 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16561 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16562 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16565 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16567 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16572 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16577 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16581 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16585 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16586 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16592 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16595 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16598 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16600 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16604 arm_init_tls_builtins (void)
16608 ftype = build_function_type (ptr_type_node, void_list_node);
16609 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16610 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16612 TREE_NOTHROW (decl) = 1;
16613 TREE_READONLY (decl) = 1;
16616 enum neon_builtin_type_bits {
16632 #define v8qi_UP T_V8QI
16633 #define v4hi_UP T_V4HI
16634 #define v2si_UP T_V2SI
16635 #define v2sf_UP T_V2SF
16637 #define v16qi_UP T_V16QI
16638 #define v8hi_UP T_V8HI
16639 #define v4si_UP T_V4SI
16640 #define v4sf_UP T_V4SF
16641 #define v2di_UP T_V2DI
16646 #define UP(X) X##_UP
16681 NEON_LOADSTRUCTLANE,
16683 NEON_STORESTRUCTLANE,
16692 const neon_itype itype;
16694 const enum insn_code codes[T_MAX];
16695 const unsigned int num_vars;
16696 unsigned int base_fcode;
16697 } neon_builtin_datum;
16699 #define CF(N,X) CODE_FOR_neon_##N##X
16701 #define VAR1(T, N, A) \
16702 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16703 #define VAR2(T, N, A, B) \
16704 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16705 #define VAR3(T, N, A, B, C) \
16706 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16707 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16708 #define VAR4(T, N, A, B, C, D) \
16709 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16710 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16711 #define VAR5(T, N, A, B, C, D, E) \
16712 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16713 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16714 #define VAR6(T, N, A, B, C, D, E, F) \
16715 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16716 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16717 #define VAR7(T, N, A, B, C, D, E, F, G) \
16718 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16719 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16721 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16722 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16724 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16725 CF (N, G), CF (N, H) }, 8, 0
16726 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
16727 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16728 | UP (H) | UP (I), \
16729 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16730 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
16731 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
16732 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16733 | UP (H) | UP (I) | UP (J), \
16734 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16735 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
16737 /* The mode entries in the following table correspond to the "key" type of the
16738 instruction variant, i.e. equivalent to that which would be specified after
16739 the assembler mnemonic, which usually refers to the last vector operand.
16740 (Signed/unsigned/polynomial types are not differentiated between though, and
16741 are all mapped onto the same mode for a given element size.) The modes
16742 listed per instruction should be the same as those defined for that
16743 instruction's pattern in neon.md.
16744 WARNING: Variants should be listed in the same increasing order as
16745 neon_builtin_type_bits. */
16747 static neon_builtin_datum neon_builtin_data[] =
16749 { VAR10 (BINOP, vadd,
16750 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16751 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
16752 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
16753 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16754 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16755 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
16756 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16757 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16758 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
16759 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16760 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
16761 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
16762 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
16763 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
16764 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
16765 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
16766 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
16767 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
16768 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
16769 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
16770 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
16771 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
16772 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16773 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16774 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16775 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
16776 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
16777 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
16778 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16779 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16780 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16781 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
16782 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16783 { VAR10 (BINOP, vsub,
16784 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16785 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
16786 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
16787 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16788 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16789 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
16790 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16791 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16792 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16793 { VAR2 (BINOP, vcage, v2sf, v4sf) },
16794 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
16795 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16796 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16797 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
16798 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16799 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
16800 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16801 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16802 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
16803 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16804 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16805 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
16806 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
16807 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
16808 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
16809 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16810 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16811 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16812 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16813 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16814 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16815 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16816 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16817 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
16818 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
16819 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
16820 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16821 /* FIXME: vget_lane supports more variants than this! */
16822 { VAR10 (GETLANE, vget_lane,
16823 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16824 { VAR10 (SETLANE, vset_lane,
16825 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16826 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
16827 { VAR10 (DUP, vdup_n,
16828 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16829 { VAR10 (DUPLANE, vdup_lane,
16830 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16831 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
16832 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
16833 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
16834 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
16835 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
16836 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
16837 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
16838 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16839 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16840 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
16841 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
16842 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16843 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
16844 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
16845 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16846 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16847 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
16848 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
16849 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16850 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
16851 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
16852 { VAR10 (BINOP, vext,
16853 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16854 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16855 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
16856 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
16857 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
16858 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
16859 { VAR10 (SELECT, vbsl,
16860 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16861 { VAR1 (VTBL, vtbl1, v8qi) },
16862 { VAR1 (VTBL, vtbl2, v8qi) },
16863 { VAR1 (VTBL, vtbl3, v8qi) },
16864 { VAR1 (VTBL, vtbl4, v8qi) },
16865 { VAR1 (VTBX, vtbx1, v8qi) },
16866 { VAR1 (VTBX, vtbx2, v8qi) },
16867 { VAR1 (VTBX, vtbx3, v8qi) },
16868 { VAR1 (VTBX, vtbx4, v8qi) },
16869 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16870 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16871 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16872 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
16873 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
16874 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
16875 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
16876 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
16877 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
16878 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
16879 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
16880 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
16881 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
16882 { VAR10 (LOAD1, vld1,
16883 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16884 { VAR10 (LOAD1LANE, vld1_lane,
16885 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16886 { VAR10 (LOAD1, vld1_dup,
16887 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16888 { VAR10 (STORE1, vst1,
16889 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16890 { VAR10 (STORE1LANE, vst1_lane,
16891 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16892 { VAR9 (LOADSTRUCT,
16893 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16894 { VAR7 (LOADSTRUCTLANE, vld2_lane,
16895 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16896 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
16897 { VAR9 (STORESTRUCT, vst2,
16898 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16899 { VAR7 (STORESTRUCTLANE, vst2_lane,
16900 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16901 { VAR9 (LOADSTRUCT,
16902 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16903 { VAR7 (LOADSTRUCTLANE, vld3_lane,
16904 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16905 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
16906 { VAR9 (STORESTRUCT, vst3,
16907 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16908 { VAR7 (STORESTRUCTLANE, vst3_lane,
16909 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16910 { VAR9 (LOADSTRUCT, vld4,
16911 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16912 { VAR7 (LOADSTRUCTLANE, vld4_lane,
16913 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16914 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
16915 { VAR9 (STORESTRUCT, vst4,
16916 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16917 { VAR7 (STORESTRUCTLANE, vst4_lane,
16918 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16919 { VAR10 (LOGICBINOP, vand,
16920 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16921 { VAR10 (LOGICBINOP, vorr,
16922 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16923 { VAR10 (BINOP, veor,
16924 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16925 { VAR10 (LOGICBINOP, vbic,
16926 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16927 { VAR10 (LOGICBINOP, vorn,
16928 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
16944 arm_init_neon_builtins (void)
16946 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
16948 tree neon_intQI_type_node;
16949 tree neon_intHI_type_node;
16950 tree neon_polyQI_type_node;
16951 tree neon_polyHI_type_node;
16952 tree neon_intSI_type_node;
16953 tree neon_intDI_type_node;
16954 tree neon_float_type_node;
16956 tree intQI_pointer_node;
16957 tree intHI_pointer_node;
16958 tree intSI_pointer_node;
16959 tree intDI_pointer_node;
16960 tree float_pointer_node;
16962 tree const_intQI_node;
16963 tree const_intHI_node;
16964 tree const_intSI_node;
16965 tree const_intDI_node;
16966 tree const_float_node;
16968 tree const_intQI_pointer_node;
16969 tree const_intHI_pointer_node;
16970 tree const_intSI_pointer_node;
16971 tree const_intDI_pointer_node;
16972 tree const_float_pointer_node;
16974 tree V8QI_type_node;
16975 tree V4HI_type_node;
16976 tree V2SI_type_node;
16977 tree V2SF_type_node;
16978 tree V16QI_type_node;
16979 tree V8HI_type_node;
16980 tree V4SI_type_node;
16981 tree V4SF_type_node;
16982 tree V2DI_type_node;
16984 tree intUQI_type_node;
16985 tree intUHI_type_node;
16986 tree intUSI_type_node;
16987 tree intUDI_type_node;
16989 tree intEI_type_node;
16990 tree intOI_type_node;
16991 tree intCI_type_node;
16992 tree intXI_type_node;
16994 tree V8QI_pointer_node;
16995 tree V4HI_pointer_node;
16996 tree V2SI_pointer_node;
16997 tree V2SF_pointer_node;
16998 tree V16QI_pointer_node;
16999 tree V8HI_pointer_node;
17000 tree V4SI_pointer_node;
17001 tree V4SF_pointer_node;
17002 tree V2DI_pointer_node;
17004 tree void_ftype_pv8qi_v8qi_v8qi;
17005 tree void_ftype_pv4hi_v4hi_v4hi;
17006 tree void_ftype_pv2si_v2si_v2si;
17007 tree void_ftype_pv2sf_v2sf_v2sf;
17008 tree void_ftype_pdi_di_di;
17009 tree void_ftype_pv16qi_v16qi_v16qi;
17010 tree void_ftype_pv8hi_v8hi_v8hi;
17011 tree void_ftype_pv4si_v4si_v4si;
17012 tree void_ftype_pv4sf_v4sf_v4sf;
17013 tree void_ftype_pv2di_v2di_v2di;
17015 tree reinterp_ftype_dreg[5][5];
17016 tree reinterp_ftype_qreg[5][5];
17017 tree dreg_types[5], qreg_types[5];
17019 /* Create distinguished type nodes for NEON vector element types,
17020 and pointers to values of such types, so we can detect them later. */
17021 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17022 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17023 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17024 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17025 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17026 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17027 neon_float_type_node = make_node (REAL_TYPE);
17028 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17029 layout_type (neon_float_type_node);
17031 /* Define typedefs which exactly correspond to the modes we are basing vector
17032 types on. If you change these names you'll need to change
17033 the table used by arm_mangle_type too. */
17034 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17035 "__builtin_neon_qi");
17036 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17037 "__builtin_neon_hi");
17038 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17039 "__builtin_neon_si");
17040 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17041 "__builtin_neon_sf");
17042 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17043 "__builtin_neon_di");
17044 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17045 "__builtin_neon_poly8");
17046 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17047 "__builtin_neon_poly16");
17049 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17050 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17051 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17052 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17053 float_pointer_node = build_pointer_type (neon_float_type_node);
17055 /* Next create constant-qualified versions of the above types. */
17056 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17058 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17060 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17062 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17064 const_float_node = build_qualified_type (neon_float_type_node,
17067 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17068 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17069 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17070 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17071 const_float_pointer_node = build_pointer_type (const_float_node);
17073 /* Now create vector types based on our NEON element types. */
17074 /* 64-bit vectors. */
17076 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17078 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17080 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17082 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17083 /* 128-bit vectors. */
17085 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17087 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17089 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17091 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17093 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17095 /* Unsigned integer types for various mode sizes. */
17096 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17097 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17098 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17099 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17101 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17102 "__builtin_neon_uqi");
17103 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17104 "__builtin_neon_uhi");
17105 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17106 "__builtin_neon_usi");
17107 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17108 "__builtin_neon_udi");
17110 /* Opaque integer types for structures of vectors. */
17111 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17112 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17113 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17114 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17116 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17117 "__builtin_neon_ti");
17118 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17119 "__builtin_neon_ei");
17120 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17121 "__builtin_neon_oi");
17122 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17123 "__builtin_neon_ci");
17124 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17125 "__builtin_neon_xi");
17127 /* Pointers to vector types. */
17128 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17129 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17130 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17131 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17132 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17133 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17134 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17135 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17136 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17138 /* Operations which return results as pairs. */
17139 void_ftype_pv8qi_v8qi_v8qi =
17140 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17141 V8QI_type_node, NULL);
17142 void_ftype_pv4hi_v4hi_v4hi =
17143 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17144 V4HI_type_node, NULL);
17145 void_ftype_pv2si_v2si_v2si =
17146 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17147 V2SI_type_node, NULL);
17148 void_ftype_pv2sf_v2sf_v2sf =
17149 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17150 V2SF_type_node, NULL);
17151 void_ftype_pdi_di_di =
17152 build_function_type_list (void_type_node, intDI_pointer_node,
17153 neon_intDI_type_node, neon_intDI_type_node, NULL);
17154 void_ftype_pv16qi_v16qi_v16qi =
17155 build_function_type_list (void_type_node, V16QI_pointer_node,
17156 V16QI_type_node, V16QI_type_node, NULL);
17157 void_ftype_pv8hi_v8hi_v8hi =
17158 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17159 V8HI_type_node, NULL);
17160 void_ftype_pv4si_v4si_v4si =
17161 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17162 V4SI_type_node, NULL);
17163 void_ftype_pv4sf_v4sf_v4sf =
17164 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17165 V4SF_type_node, NULL);
17166 void_ftype_pv2di_v2di_v2di =
17167 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17168 V2DI_type_node, NULL);
17170 dreg_types[0] = V8QI_type_node;
17171 dreg_types[1] = V4HI_type_node;
17172 dreg_types[2] = V2SI_type_node;
17173 dreg_types[3] = V2SF_type_node;
17174 dreg_types[4] = neon_intDI_type_node;
17176 qreg_types[0] = V16QI_type_node;
17177 qreg_types[1] = V8HI_type_node;
17178 qreg_types[2] = V4SI_type_node;
17179 qreg_types[3] = V4SF_type_node;
17180 qreg_types[4] = V2DI_type_node;
17182 for (i = 0; i < 5; i++)
17185 for (j = 0; j < 5; j++)
17187 reinterp_ftype_dreg[i][j]
17188 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17189 reinterp_ftype_qreg[i][j]
17190 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17194 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17196 neon_builtin_datum *d = &neon_builtin_data[i];
17197 unsigned int j, codeidx = 0;
17199 d->base_fcode = fcode;
17201 for (j = 0; j < T_MAX; j++)
17203 const char* const modenames[] = {
17204 "v8qi", "v4hi", "v2si", "v2sf", "di",
17205 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17209 enum insn_code icode;
17210 int is_load = 0, is_store = 0;
17212 if ((d->bits & (1 << j)) == 0)
17215 icode = d->codes[codeidx++];
17220 case NEON_LOAD1LANE:
17221 case NEON_LOADSTRUCT:
17222 case NEON_LOADSTRUCTLANE:
17224 /* Fall through. */
17226 case NEON_STORE1LANE:
17227 case NEON_STORESTRUCT:
17228 case NEON_STORESTRUCTLANE:
17231 /* Fall through. */
17234 case NEON_LOGICBINOP:
17235 case NEON_SHIFTINSERT:
17242 case NEON_SHIFTIMM:
17243 case NEON_SHIFTACC:
17249 case NEON_LANEMULL:
17250 case NEON_LANEMULH:
17252 case NEON_SCALARMUL:
17253 case NEON_SCALARMULL:
17254 case NEON_SCALARMULH:
17255 case NEON_SCALARMAC:
17261 tree return_type = void_type_node, args = void_list_node;
17263 /* Build a function type directly from the insn_data for this
17264 builtin. The build_function_type() function takes care of
17265 removing duplicates for us. */
17266 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17270 if (is_load && k == 1)
17272 /* Neon load patterns always have the memory operand
17273 (a SImode pointer) in the operand 1 position. We
17274 want a const pointer to the element type in that
17276 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17282 eltype = const_intQI_pointer_node;
17287 eltype = const_intHI_pointer_node;
17292 eltype = const_intSI_pointer_node;
17297 eltype = const_float_pointer_node;
17302 eltype = const_intDI_pointer_node;
17305 default: gcc_unreachable ();
17308 else if (is_store && k == 0)
17310 /* Similarly, Neon store patterns use operand 0 as
17311 the memory location to store to (a SImode pointer).
17312 Use a pointer to the element type of the store in
17314 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17320 eltype = intQI_pointer_node;
17325 eltype = intHI_pointer_node;
17330 eltype = intSI_pointer_node;
17335 eltype = float_pointer_node;
17340 eltype = intDI_pointer_node;
17343 default: gcc_unreachable ();
17348 switch (insn_data[icode].operand[k].mode)
17350 case VOIDmode: eltype = void_type_node; break;
17352 case QImode: eltype = neon_intQI_type_node; break;
17353 case HImode: eltype = neon_intHI_type_node; break;
17354 case SImode: eltype = neon_intSI_type_node; break;
17355 case SFmode: eltype = neon_float_type_node; break;
17356 case DImode: eltype = neon_intDI_type_node; break;
17357 case TImode: eltype = intTI_type_node; break;
17358 case EImode: eltype = intEI_type_node; break;
17359 case OImode: eltype = intOI_type_node; break;
17360 case CImode: eltype = intCI_type_node; break;
17361 case XImode: eltype = intXI_type_node; break;
17362 /* 64-bit vectors. */
17363 case V8QImode: eltype = V8QI_type_node; break;
17364 case V4HImode: eltype = V4HI_type_node; break;
17365 case V2SImode: eltype = V2SI_type_node; break;
17366 case V2SFmode: eltype = V2SF_type_node; break;
17367 /* 128-bit vectors. */
17368 case V16QImode: eltype = V16QI_type_node; break;
17369 case V8HImode: eltype = V8HI_type_node; break;
17370 case V4SImode: eltype = V4SI_type_node; break;
17371 case V4SFmode: eltype = V4SF_type_node; break;
17372 case V2DImode: eltype = V2DI_type_node; break;
17373 default: gcc_unreachable ();
17377 if (k == 0 && !is_store)
17378 return_type = eltype;
17380 args = tree_cons (NULL_TREE, eltype, args);
17383 ftype = build_function_type (return_type, args);
17387 case NEON_RESULTPAIR:
17389 switch (insn_data[icode].operand[1].mode)
17391 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17392 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17393 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17394 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17395 case DImode: ftype = void_ftype_pdi_di_di; break;
17396 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17397 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17398 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17399 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17400 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17401 default: gcc_unreachable ();
17406 case NEON_REINTERP:
17408 /* We iterate over 5 doubleword types, then 5 quadword
17411 switch (insn_data[icode].operand[0].mode)
17413 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17414 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17415 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17416 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17417 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17418 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17419 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17420 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17421 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17422 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17423 default: gcc_unreachable ();
17429 gcc_unreachable ();
17432 gcc_assert (ftype != NULL);
17434 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17436 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17443 arm_init_fp16_builtins (void)
17445 tree fp16_type = make_node (REAL_TYPE);
17446 TYPE_PRECISION (fp16_type) = 16;
17447 layout_type (fp16_type);
17448 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17452 arm_init_builtins (void)
17454 arm_init_tls_builtins ();
17456 if (TARGET_REALLY_IWMMXT)
17457 arm_init_iwmmxt_builtins ();
17460 arm_init_neon_builtins ();
17462 if (arm_fp16_format)
17463 arm_init_fp16_builtins ();
17466 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17468 static const char *
17469 arm_invalid_parameter_type (const_tree t)
17471 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17472 return N_("function parameters cannot have __fp16 type");
17476 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17478 static const char *
17479 arm_invalid_return_type (const_tree t)
17481 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17482 return N_("functions cannot return __fp16 type");
17486 /* Implement TARGET_PROMOTED_TYPE. */
17489 arm_promoted_type (const_tree t)
17491 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17492 return float_type_node;
17496 /* Implement TARGET_CONVERT_TO_TYPE.
17497 Specifically, this hook implements the peculiarity of the ARM
17498 half-precision floating-point C semantics that requires conversions between
17499 __fp16 to or from double to do an intermediate conversion to float. */
17502 arm_convert_to_type (tree type, tree expr)
17504 tree fromtype = TREE_TYPE (expr);
17505 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17507 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17508 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17509 return convert (type, convert (float_type_node, expr));
17513 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17514 This simply adds HFmode as a supported mode; even though we don't
17515 implement arithmetic on this type directly, it's supported by
17516 optabs conversions, much the way the double-word arithmetic is
17517 special-cased in the default hook. */
17520 arm_scalar_mode_supported_p (enum machine_mode mode)
17522 if (mode == HFmode)
17523 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17525 return default_scalar_mode_supported_p (mode);
17528 /* Errors in the source file can cause expand_expr to return const0_rtx
17529 where we expect a vector. To avoid crashing, use one of the vector
17530 clear instructions. */
17533 safe_vector_operand (rtx x, enum machine_mode mode)
17535 if (x != const0_rtx)
17537 x = gen_reg_rtx (mode);
17539 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17540 : gen_rtx_SUBREG (DImode, x, 0)));
17544 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17547 arm_expand_binop_builtin (enum insn_code icode,
17548 tree exp, rtx target)
17551 tree arg0 = CALL_EXPR_ARG (exp, 0);
17552 tree arg1 = CALL_EXPR_ARG (exp, 1);
17553 rtx op0 = expand_normal (arg0);
17554 rtx op1 = expand_normal (arg1);
17555 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17556 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17557 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17559 if (VECTOR_MODE_P (mode0))
17560 op0 = safe_vector_operand (op0, mode0);
17561 if (VECTOR_MODE_P (mode1))
17562 op1 = safe_vector_operand (op1, mode1);
17565 || GET_MODE (target) != tmode
17566 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17567 target = gen_reg_rtx (tmode);
17569 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17571 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17572 op0 = copy_to_mode_reg (mode0, op0);
17573 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17574 op1 = copy_to_mode_reg (mode1, op1);
17576 pat = GEN_FCN (icode) (target, op0, op1);
17583 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17586 arm_expand_unop_builtin (enum insn_code icode,
17587 tree exp, rtx target, int do_load)
17590 tree arg0 = CALL_EXPR_ARG (exp, 0);
17591 rtx op0 = expand_normal (arg0);
17592 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17593 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17596 || GET_MODE (target) != tmode
17597 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17598 target = gen_reg_rtx (tmode);
17600 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17603 if (VECTOR_MODE_P (mode0))
17604 op0 = safe_vector_operand (op0, mode0);
17606 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17607 op0 = copy_to_mode_reg (mode0, op0);
17610 pat = GEN_FCN (icode) (target, op0);
17618 neon_builtin_compare (const void *a, const void *b)
17620 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17621 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17622 unsigned int soughtcode = key->base_fcode;
17624 if (soughtcode >= memb->base_fcode
17625 && soughtcode < memb->base_fcode + memb->num_vars)
17627 else if (soughtcode < memb->base_fcode)
17633 static enum insn_code
17634 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17636 neon_builtin_datum key, *found;
17639 key.base_fcode = fcode;
17640 found = (neon_builtin_datum *)
17641 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17642 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17643 gcc_assert (found);
17644 idx = fcode - (int) found->base_fcode;
17645 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17648 *itype = found->itype;
17650 return found->codes[idx];
17654 NEON_ARG_COPY_TO_REG,
17659 #define NEON_MAX_BUILTIN_ARGS 5
17661 /* Expand a Neon builtin. */
17663 arm_expand_neon_args (rtx target, int icode, int have_retval,
17668 tree arg[NEON_MAX_BUILTIN_ARGS];
17669 rtx op[NEON_MAX_BUILTIN_ARGS];
17670 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17671 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17676 || GET_MODE (target) != tmode
17677 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17678 target = gen_reg_rtx (tmode);
17680 va_start (ap, exp);
17684 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17686 if (thisarg == NEON_ARG_STOP)
17690 arg[argc] = CALL_EXPR_ARG (exp, argc);
17691 op[argc] = expand_normal (arg[argc]);
17692 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17696 case NEON_ARG_COPY_TO_REG:
17697 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17698 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17699 (op[argc], mode[argc]))
17700 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17703 case NEON_ARG_CONSTANT:
17704 /* FIXME: This error message is somewhat unhelpful. */
17705 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17706 (op[argc], mode[argc]))
17707 error ("argument must be a constant");
17710 case NEON_ARG_STOP:
17711 gcc_unreachable ();
17724 pat = GEN_FCN (icode) (target, op[0]);
17728 pat = GEN_FCN (icode) (target, op[0], op[1]);
17732 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
17736 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
17740 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
17744 gcc_unreachable ();
17750 pat = GEN_FCN (icode) (op[0]);
17754 pat = GEN_FCN (icode) (op[0], op[1]);
17758 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
17762 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
17766 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
17770 gcc_unreachable ();
17781 /* Expand a Neon builtin. These are "special" because they don't have symbolic
17782 constants defined per-instruction or per instruction-variant. Instead, the
17783 required info is looked up in the table neon_builtin_data. */
17785 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
17788 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
17795 return arm_expand_neon_args (target, icode, 1, exp,
17796 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17800 case NEON_SCALARMUL:
17801 case NEON_SCALARMULL:
17802 case NEON_SCALARMULH:
17803 case NEON_SHIFTINSERT:
17804 case NEON_LOGICBINOP:
17805 return arm_expand_neon_args (target, icode, 1, exp,
17806 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17810 return arm_expand_neon_args (target, icode, 1, exp,
17811 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17812 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17816 case NEON_SHIFTIMM:
17817 return arm_expand_neon_args (target, icode, 1, exp,
17818 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
17822 return arm_expand_neon_args (target, icode, 1, exp,
17823 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17827 case NEON_REINTERP:
17828 return arm_expand_neon_args (target, icode, 1, exp,
17829 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17833 return arm_expand_neon_args (target, icode, 1, exp,
17834 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17836 case NEON_RESULTPAIR:
17837 return arm_expand_neon_args (target, icode, 0, exp,
17838 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17842 case NEON_LANEMULL:
17843 case NEON_LANEMULH:
17844 return arm_expand_neon_args (target, icode, 1, exp,
17845 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17846 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17849 return arm_expand_neon_args (target, icode, 1, exp,
17850 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17851 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17853 case NEON_SHIFTACC:
17854 return arm_expand_neon_args (target, icode, 1, exp,
17855 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17856 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17858 case NEON_SCALARMAC:
17859 return arm_expand_neon_args (target, icode, 1, exp,
17860 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17861 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17865 return arm_expand_neon_args (target, icode, 1, exp,
17866 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17870 case NEON_LOADSTRUCT:
17871 return arm_expand_neon_args (target, icode, 1, exp,
17872 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17874 case NEON_LOAD1LANE:
17875 case NEON_LOADSTRUCTLANE:
17876 return arm_expand_neon_args (target, icode, 1, exp,
17877 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17881 case NEON_STORESTRUCT:
17882 return arm_expand_neon_args (target, icode, 0, exp,
17883 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17885 case NEON_STORE1LANE:
17886 case NEON_STORESTRUCTLANE:
17887 return arm_expand_neon_args (target, icode, 0, exp,
17888 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17892 gcc_unreachable ();
17895 /* Emit code to reinterpret one Neon type as another, without altering bits. */
17897 neon_reinterpret (rtx dest, rtx src)
17899 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
17902 /* Emit code to place a Neon pair result in memory locations (with equal
17905 neon_emit_pair_result_insn (enum machine_mode mode,
17906 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
17909 rtx mem = gen_rtx_MEM (mode, destaddr);
17910 rtx tmp1 = gen_reg_rtx (mode);
17911 rtx tmp2 = gen_reg_rtx (mode);
17913 emit_insn (intfn (tmp1, op1, tmp2, op2));
17915 emit_move_insn (mem, tmp1);
17916 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
17917 emit_move_insn (mem, tmp2);
17920 /* Set up operands for a register copy from src to dest, taking care not to
17921 clobber registers in the process.
17922 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
17923 be called with a large N, so that should be OK. */
17926 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
17928 unsigned int copied = 0, opctr = 0;
17929 unsigned int done = (1 << count) - 1;
17932 while (copied != done)
17934 for (i = 0; i < count; i++)
17938 for (j = 0; good && j < count; j++)
17939 if (i != j && (copied & (1 << j)) == 0
17940 && reg_overlap_mentioned_p (src[j], dest[i]))
17945 operands[opctr++] = dest[i];
17946 operands[opctr++] = src[i];
17952 gcc_assert (opctr == count * 2);
17955 /* Expand an expression EXP that calls a built-in function,
17956 with result going to TARGET if that's convenient
17957 (and in mode MODE if that's convenient).
17958 SUBTARGET may be used as the target for computing one of EXP's operands.
17959 IGNORE is nonzero if the value is to be ignored. */
17962 arm_expand_builtin (tree exp,
17964 rtx subtarget ATTRIBUTE_UNUSED,
17965 enum machine_mode mode ATTRIBUTE_UNUSED,
17966 int ignore ATTRIBUTE_UNUSED)
17968 const struct builtin_description * d;
17969 enum insn_code icode;
17970 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17978 int fcode = DECL_FUNCTION_CODE (fndecl);
17980 enum machine_mode tmode;
17981 enum machine_mode mode0;
17982 enum machine_mode mode1;
17983 enum machine_mode mode2;
17985 if (fcode >= ARM_BUILTIN_NEON_BASE)
17986 return arm_expand_neon_builtin (fcode, exp, target);
17990 case ARM_BUILTIN_TEXTRMSB:
17991 case ARM_BUILTIN_TEXTRMUB:
17992 case ARM_BUILTIN_TEXTRMSH:
17993 case ARM_BUILTIN_TEXTRMUH:
17994 case ARM_BUILTIN_TEXTRMSW:
17995 case ARM_BUILTIN_TEXTRMUW:
17996 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
17997 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
17998 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
17999 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18000 : CODE_FOR_iwmmxt_textrmw);
18002 arg0 = CALL_EXPR_ARG (exp, 0);
18003 arg1 = CALL_EXPR_ARG (exp, 1);
18004 op0 = expand_normal (arg0);
18005 op1 = expand_normal (arg1);
18006 tmode = insn_data[icode].operand[0].mode;
18007 mode0 = insn_data[icode].operand[1].mode;
18008 mode1 = insn_data[icode].operand[2].mode;
18010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18011 op0 = copy_to_mode_reg (mode0, op0);
18012 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18014 /* @@@ better error message */
18015 error ("selector must be an immediate");
18016 return gen_reg_rtx (tmode);
18019 || GET_MODE (target) != tmode
18020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18021 target = gen_reg_rtx (tmode);
18022 pat = GEN_FCN (icode) (target, op0, op1);
18028 case ARM_BUILTIN_TINSRB:
18029 case ARM_BUILTIN_TINSRH:
18030 case ARM_BUILTIN_TINSRW:
18031 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18032 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18033 : CODE_FOR_iwmmxt_tinsrw);
18034 arg0 = CALL_EXPR_ARG (exp, 0);
18035 arg1 = CALL_EXPR_ARG (exp, 1);
18036 arg2 = CALL_EXPR_ARG (exp, 2);
18037 op0 = expand_normal (arg0);
18038 op1 = expand_normal (arg1);
18039 op2 = expand_normal (arg2);
18040 tmode = insn_data[icode].operand[0].mode;
18041 mode0 = insn_data[icode].operand[1].mode;
18042 mode1 = insn_data[icode].operand[2].mode;
18043 mode2 = insn_data[icode].operand[3].mode;
18045 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18046 op0 = copy_to_mode_reg (mode0, op0);
18047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18048 op1 = copy_to_mode_reg (mode1, op1);
18049 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18051 /* @@@ better error message */
18052 error ("selector must be an immediate");
18056 || GET_MODE (target) != tmode
18057 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18058 target = gen_reg_rtx (tmode);
18059 pat = GEN_FCN (icode) (target, op0, op1, op2);
18065 case ARM_BUILTIN_SETWCX:
18066 arg0 = CALL_EXPR_ARG (exp, 0);
18067 arg1 = CALL_EXPR_ARG (exp, 1);
18068 op0 = force_reg (SImode, expand_normal (arg0));
18069 op1 = expand_normal (arg1);
18070 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18073 case ARM_BUILTIN_GETWCX:
18074 arg0 = CALL_EXPR_ARG (exp, 0);
18075 op0 = expand_normal (arg0);
18076 target = gen_reg_rtx (SImode);
18077 emit_insn (gen_iwmmxt_tmrc (target, op0));
18080 case ARM_BUILTIN_WSHUFH:
18081 icode = CODE_FOR_iwmmxt_wshufh;
18082 arg0 = CALL_EXPR_ARG (exp, 0);
18083 arg1 = CALL_EXPR_ARG (exp, 1);
18084 op0 = expand_normal (arg0);
18085 op1 = expand_normal (arg1);
18086 tmode = insn_data[icode].operand[0].mode;
18087 mode1 = insn_data[icode].operand[1].mode;
18088 mode2 = insn_data[icode].operand[2].mode;
18090 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18091 op0 = copy_to_mode_reg (mode1, op0);
18092 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18094 /* @@@ better error message */
18095 error ("mask must be an immediate");
18099 || GET_MODE (target) != tmode
18100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18101 target = gen_reg_rtx (tmode);
18102 pat = GEN_FCN (icode) (target, op0, op1);
18108 case ARM_BUILTIN_WSADB:
18109 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18110 case ARM_BUILTIN_WSADH:
18111 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18112 case ARM_BUILTIN_WSADBZ:
18113 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18114 case ARM_BUILTIN_WSADHZ:
18115 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18117 /* Several three-argument builtins. */
18118 case ARM_BUILTIN_WMACS:
18119 case ARM_BUILTIN_WMACU:
18120 case ARM_BUILTIN_WALIGN:
18121 case ARM_BUILTIN_TMIA:
18122 case ARM_BUILTIN_TMIAPH:
18123 case ARM_BUILTIN_TMIATT:
18124 case ARM_BUILTIN_TMIATB:
18125 case ARM_BUILTIN_TMIABT:
18126 case ARM_BUILTIN_TMIABB:
18127 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18128 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18129 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18130 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18131 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18132 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18133 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18134 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18135 : CODE_FOR_iwmmxt_walign);
18136 arg0 = CALL_EXPR_ARG (exp, 0);
18137 arg1 = CALL_EXPR_ARG (exp, 1);
18138 arg2 = CALL_EXPR_ARG (exp, 2);
18139 op0 = expand_normal (arg0);
18140 op1 = expand_normal (arg1);
18141 op2 = expand_normal (arg2);
18142 tmode = insn_data[icode].operand[0].mode;
18143 mode0 = insn_data[icode].operand[1].mode;
18144 mode1 = insn_data[icode].operand[2].mode;
18145 mode2 = insn_data[icode].operand[3].mode;
18147 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18148 op0 = copy_to_mode_reg (mode0, op0);
18149 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18150 op1 = copy_to_mode_reg (mode1, op1);
18151 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18152 op2 = copy_to_mode_reg (mode2, op2);
18154 || GET_MODE (target) != tmode
18155 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18156 target = gen_reg_rtx (tmode);
18157 pat = GEN_FCN (icode) (target, op0, op1, op2);
18163 case ARM_BUILTIN_WZERO:
18164 target = gen_reg_rtx (DImode);
18165 emit_insn (gen_iwmmxt_clrdi (target));
18168 case ARM_BUILTIN_THREAD_POINTER:
18169 return arm_load_tp (target);
18175 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18176 if (d->code == (const enum arm_builtins) fcode)
18177 return arm_expand_binop_builtin (d->icode, exp, target);
18179 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18180 if (d->code == (const enum arm_builtins) fcode)
18181 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18183 /* @@@ Should really do something sensible here. */
18187 /* Return the number (counting from 0) of
18188 the least significant set bit in MASK. */
18191 number_of_first_bit_set (unsigned mask)
18196 (mask & (1 << bit)) == 0;
18203 /* Emit code to push or pop registers to or from the stack. F is the
18204 assembly file. MASK is the registers to push or pop. PUSH is
18205 nonzero if we should push, and zero if we should pop. For debugging
18206 output, if pushing, adjust CFA_OFFSET by the amount of space added
18207 to the stack. REAL_REGS should have the same number of bits set as
18208 MASK, and will be used instead (in the same order) to describe which
18209 registers were saved - this is used to mark the save slots when we
18210 push high registers after moving them to low registers. */
18212 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18213 unsigned long real_regs)
18216 int lo_mask = mask & 0xFF;
18217 int pushed_words = 0;
18221 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18223 /* Special case. Do not generate a POP PC statement here, do it in
18225 thumb_exit (f, -1);
18229 if (ARM_EABI_UNWIND_TABLES && push)
18231 fprintf (f, "\t.save\t{");
18232 for (regno = 0; regno < 15; regno++)
18234 if (real_regs & (1 << regno))
18236 if (real_regs & ((1 << regno) -1))
18238 asm_fprintf (f, "%r", regno);
18241 fprintf (f, "}\n");
18244 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18246 /* Look at the low registers first. */
18247 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18251 asm_fprintf (f, "%r", regno);
18253 if ((lo_mask & ~1) != 0)
18260 if (push && (mask & (1 << LR_REGNUM)))
18262 /* Catch pushing the LR. */
18266 asm_fprintf (f, "%r", LR_REGNUM);
18270 else if (!push && (mask & (1 << PC_REGNUM)))
18272 /* Catch popping the PC. */
18273 if (TARGET_INTERWORK || TARGET_BACKTRACE
18274 || crtl->calls_eh_return)
18276 /* The PC is never poped directly, instead
18277 it is popped into r3 and then BX is used. */
18278 fprintf (f, "}\n");
18280 thumb_exit (f, -1);
18289 asm_fprintf (f, "%r", PC_REGNUM);
18293 fprintf (f, "}\n");
18295 if (push && pushed_words && dwarf2out_do_frame ())
18297 char *l = dwarf2out_cfi_label (false);
18298 int pushed_mask = real_regs;
18300 *cfa_offset += pushed_words * 4;
18301 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18304 pushed_mask = real_regs;
18305 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18307 if (pushed_mask & 1)
18308 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18313 /* Generate code to return from a thumb function.
18314 If 'reg_containing_return_addr' is -1, then the return address is
18315 actually on the stack, at the stack pointer. */
18317 thumb_exit (FILE *f, int reg_containing_return_addr)
18319 unsigned regs_available_for_popping;
18320 unsigned regs_to_pop;
18322 unsigned available;
18326 int restore_a4 = FALSE;
18328 /* Compute the registers we need to pop. */
18332 if (reg_containing_return_addr == -1)
18334 regs_to_pop |= 1 << LR_REGNUM;
18338 if (TARGET_BACKTRACE)
18340 /* Restore the (ARM) frame pointer and stack pointer. */
18341 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18345 /* If there is nothing to pop then just emit the BX instruction and
18347 if (pops_needed == 0)
18349 if (crtl->calls_eh_return)
18350 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18352 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18355 /* Otherwise if we are not supporting interworking and we have not created
18356 a backtrace structure and the function was not entered in ARM mode then
18357 just pop the return address straight into the PC. */
18358 else if (!TARGET_INTERWORK
18359 && !TARGET_BACKTRACE
18360 && !is_called_in_ARM_mode (current_function_decl)
18361 && !crtl->calls_eh_return)
18363 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18367 /* Find out how many of the (return) argument registers we can corrupt. */
18368 regs_available_for_popping = 0;
18370 /* If returning via __builtin_eh_return, the bottom three registers
18371 all contain information needed for the return. */
18372 if (crtl->calls_eh_return)
18376 /* If we can deduce the registers used from the function's
18377 return value. This is more reliable that examining
18378 df_regs_ever_live_p () because that will be set if the register is
18379 ever used in the function, not just if the register is used
18380 to hold a return value. */
18382 if (crtl->return_rtx != 0)
18383 mode = GET_MODE (crtl->return_rtx);
18385 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18387 size = GET_MODE_SIZE (mode);
18391 /* In a void function we can use any argument register.
18392 In a function that returns a structure on the stack
18393 we can use the second and third argument registers. */
18394 if (mode == VOIDmode)
18395 regs_available_for_popping =
18396 (1 << ARG_REGISTER (1))
18397 | (1 << ARG_REGISTER (2))
18398 | (1 << ARG_REGISTER (3));
18400 regs_available_for_popping =
18401 (1 << ARG_REGISTER (2))
18402 | (1 << ARG_REGISTER (3));
18404 else if (size <= 4)
18405 regs_available_for_popping =
18406 (1 << ARG_REGISTER (2))
18407 | (1 << ARG_REGISTER (3));
18408 else if (size <= 8)
18409 regs_available_for_popping =
18410 (1 << ARG_REGISTER (3));
18413 /* Match registers to be popped with registers into which we pop them. */
18414 for (available = regs_available_for_popping,
18415 required = regs_to_pop;
18416 required != 0 && available != 0;
18417 available &= ~(available & - available),
18418 required &= ~(required & - required))
18421 /* If we have any popping registers left over, remove them. */
18423 regs_available_for_popping &= ~available;
18425 /* Otherwise if we need another popping register we can use
18426 the fourth argument register. */
18427 else if (pops_needed)
18429 /* If we have not found any free argument registers and
18430 reg a4 contains the return address, we must move it. */
18431 if (regs_available_for_popping == 0
18432 && reg_containing_return_addr == LAST_ARG_REGNUM)
18434 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18435 reg_containing_return_addr = LR_REGNUM;
18437 else if (size > 12)
18439 /* Register a4 is being used to hold part of the return value,
18440 but we have dire need of a free, low register. */
18443 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18446 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18448 /* The fourth argument register is available. */
18449 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18455 /* Pop as many registers as we can. */
18456 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18457 regs_available_for_popping);
18459 /* Process the registers we popped. */
18460 if (reg_containing_return_addr == -1)
18462 /* The return address was popped into the lowest numbered register. */
18463 regs_to_pop &= ~(1 << LR_REGNUM);
18465 reg_containing_return_addr =
18466 number_of_first_bit_set (regs_available_for_popping);
18468 /* Remove this register for the mask of available registers, so that
18469 the return address will not be corrupted by further pops. */
18470 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18473 /* If we popped other registers then handle them here. */
18474 if (regs_available_for_popping)
18478 /* Work out which register currently contains the frame pointer. */
18479 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18481 /* Move it into the correct place. */
18482 asm_fprintf (f, "\tmov\t%r, %r\n",
18483 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18485 /* (Temporarily) remove it from the mask of popped registers. */
18486 regs_available_for_popping &= ~(1 << frame_pointer);
18487 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18489 if (regs_available_for_popping)
18493 /* We popped the stack pointer as well,
18494 find the register that contains it. */
18495 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18497 /* Move it into the stack register. */
18498 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18500 /* At this point we have popped all necessary registers, so
18501 do not worry about restoring regs_available_for_popping
18502 to its correct value:
18504 assert (pops_needed == 0)
18505 assert (regs_available_for_popping == (1 << frame_pointer))
18506 assert (regs_to_pop == (1 << STACK_POINTER)) */
18510 /* Since we have just move the popped value into the frame
18511 pointer, the popping register is available for reuse, and
18512 we know that we still have the stack pointer left to pop. */
18513 regs_available_for_popping |= (1 << frame_pointer);
18517 /* If we still have registers left on the stack, but we no longer have
18518 any registers into which we can pop them, then we must move the return
18519 address into the link register and make available the register that
18521 if (regs_available_for_popping == 0 && pops_needed > 0)
18523 regs_available_for_popping |= 1 << reg_containing_return_addr;
18525 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18526 reg_containing_return_addr);
18528 reg_containing_return_addr = LR_REGNUM;
18531 /* If we have registers left on the stack then pop some more.
18532 We know that at most we will want to pop FP and SP. */
18533 if (pops_needed > 0)
18538 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18539 regs_available_for_popping);
18541 /* We have popped either FP or SP.
18542 Move whichever one it is into the correct register. */
18543 popped_into = number_of_first_bit_set (regs_available_for_popping);
18544 move_to = number_of_first_bit_set (regs_to_pop);
18546 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18548 regs_to_pop &= ~(1 << move_to);
18553 /* If we still have not popped everything then we must have only
18554 had one register available to us and we are now popping the SP. */
18555 if (pops_needed > 0)
18559 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18560 regs_available_for_popping);
18562 popped_into = number_of_first_bit_set (regs_available_for_popping);
18564 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18566 assert (regs_to_pop == (1 << STACK_POINTER))
18567 assert (pops_needed == 1)
18571 /* If necessary restore the a4 register. */
18574 if (reg_containing_return_addr != LR_REGNUM)
18576 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18577 reg_containing_return_addr = LR_REGNUM;
18580 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18583 if (crtl->calls_eh_return)
18584 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18586 /* Return to caller. */
18587 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18592 thumb1_final_prescan_insn (rtx insn)
18594 if (flag_print_asm_name)
18595 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18596 INSN_ADDRESSES (INSN_UID (insn)));
18600 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18602 unsigned HOST_WIDE_INT mask = 0xff;
18605 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18606 if (val == 0) /* XXX */
18609 for (i = 0; i < 25; i++)
18610 if ((val & (mask << i)) == val)
18616 /* Returns nonzero if the current function contains,
18617 or might contain a far jump. */
18619 thumb_far_jump_used_p (void)
18623 /* This test is only important for leaf functions. */
18624 /* assert (!leaf_function_p ()); */
18626 /* If we have already decided that far jumps may be used,
18627 do not bother checking again, and always return true even if
18628 it turns out that they are not being used. Once we have made
18629 the decision that far jumps are present (and that hence the link
18630 register will be pushed onto the stack) we cannot go back on it. */
18631 if (cfun->machine->far_jump_used)
18634 /* If this function is not being called from the prologue/epilogue
18635 generation code then it must be being called from the
18636 INITIAL_ELIMINATION_OFFSET macro. */
18637 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18639 /* In this case we know that we are being asked about the elimination
18640 of the arg pointer register. If that register is not being used,
18641 then there are no arguments on the stack, and we do not have to
18642 worry that a far jump might force the prologue to push the link
18643 register, changing the stack offsets. In this case we can just
18644 return false, since the presence of far jumps in the function will
18645 not affect stack offsets.
18647 If the arg pointer is live (or if it was live, but has now been
18648 eliminated and so set to dead) then we do have to test to see if
18649 the function might contain a far jump. This test can lead to some
18650 false negatives, since before reload is completed, then length of
18651 branch instructions is not known, so gcc defaults to returning their
18652 longest length, which in turn sets the far jump attribute to true.
18654 A false negative will not result in bad code being generated, but it
18655 will result in a needless push and pop of the link register. We
18656 hope that this does not occur too often.
18658 If we need doubleword stack alignment this could affect the other
18659 elimination offsets so we can't risk getting it wrong. */
18660 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18661 cfun->machine->arg_pointer_live = 1;
18662 else if (!cfun->machine->arg_pointer_live)
18666 /* Check to see if the function contains a branch
18667 insn with the far jump attribute set. */
18668 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18670 if (GET_CODE (insn) == JUMP_INSN
18671 /* Ignore tablejump patterns. */
18672 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18673 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18674 && get_attr_far_jump (insn) == FAR_JUMP_YES
18677 /* Record the fact that we have decided that
18678 the function does use far jumps. */
18679 cfun->machine->far_jump_used = 1;
18687 /* Return nonzero if FUNC must be entered in ARM mode. */
18689 is_called_in_ARM_mode (tree func)
18691 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18693 /* Ignore the problem about functions whose address is taken. */
18694 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18698 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18704 /* The bits which aren't usefully expanded as rtl. */
18706 thumb_unexpanded_epilogue (void)
18708 arm_stack_offsets *offsets;
18710 unsigned long live_regs_mask = 0;
18711 int high_regs_pushed = 0;
18712 int had_to_push_lr;
18715 if (cfun->machine->return_used_this_function != 0)
18718 if (IS_NAKED (arm_current_func_type ()))
18721 offsets = arm_get_frame_offsets ();
18722 live_regs_mask = offsets->saved_regs_mask;
18723 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18725 /* If we can deduce the registers used from the function's return value.
18726 This is more reliable that examining df_regs_ever_live_p () because that
18727 will be set if the register is ever used in the function, not just if
18728 the register is used to hold a return value. */
18729 size = arm_size_return_regs ();
18731 /* The prolog may have pushed some high registers to use as
18732 work registers. e.g. the testsuite file:
18733 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
18734 compiles to produce:
18735 push {r4, r5, r6, r7, lr}
18739 as part of the prolog. We have to undo that pushing here. */
18741 if (high_regs_pushed)
18743 unsigned long mask = live_regs_mask & 0xff;
18746 /* The available low registers depend on the size of the value we are
18754 /* Oh dear! We have no low registers into which we can pop
18757 ("no low registers available for popping high registers");
18759 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
18760 if (live_regs_mask & (1 << next_hi_reg))
18763 while (high_regs_pushed)
18765 /* Find lo register(s) into which the high register(s) can
18767 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18769 if (mask & (1 << regno))
18770 high_regs_pushed--;
18771 if (high_regs_pushed == 0)
18775 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
18777 /* Pop the values into the low register(s). */
18778 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
18780 /* Move the value(s) into the high registers. */
18781 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18783 if (mask & (1 << regno))
18785 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
18788 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
18789 if (live_regs_mask & (1 << next_hi_reg))
18794 live_regs_mask &= ~0x0f00;
18797 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
18798 live_regs_mask &= 0xff;
18800 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
18802 /* Pop the return address into the PC. */
18803 if (had_to_push_lr)
18804 live_regs_mask |= 1 << PC_REGNUM;
18806 /* Either no argument registers were pushed or a backtrace
18807 structure was created which includes an adjusted stack
18808 pointer, so just pop everything. */
18809 if (live_regs_mask)
18810 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18813 /* We have either just popped the return address into the
18814 PC or it is was kept in LR for the entire function. */
18815 if (!had_to_push_lr)
18816 thumb_exit (asm_out_file, LR_REGNUM);
18820 /* Pop everything but the return address. */
18821 if (live_regs_mask)
18822 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18825 if (had_to_push_lr)
18829 /* We have no free low regs, so save one. */
18830 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
18834 /* Get the return address into a temporary register. */
18835 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
18836 1 << LAST_ARG_REGNUM);
18840 /* Move the return address to lr. */
18841 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
18843 /* Restore the low register. */
18844 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
18849 regno = LAST_ARG_REGNUM;
18854 /* Remove the argument registers that were pushed onto the stack. */
18855 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
18856 SP_REGNUM, SP_REGNUM,
18857 crtl->args.pretend_args_size);
18859 thumb_exit (asm_out_file, regno);
18865 /* Functions to save and restore machine-specific function data. */
18866 static struct machine_function *
18867 arm_init_machine_status (void)
18869 struct machine_function *machine;
18870 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
18872 #if ARM_FT_UNKNOWN != 0
18873 machine->func_type = ARM_FT_UNKNOWN;
18878 /* Return an RTX indicating where the return address to the
18879 calling function can be found. */
18881 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
18886 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
18889 /* Do anything needed before RTL is emitted for each function. */
18891 arm_init_expanders (void)
18893 /* Arrange to initialize and mark the machine per-function status. */
18894 init_machine_status = arm_init_machine_status;
18896 /* This is to stop the combine pass optimizing away the alignment
18897 adjustment of va_arg. */
18898 /* ??? It is claimed that this should not be necessary. */
18900 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
18904 /* Like arm_compute_initial_elimination offset. Simpler because there
18905 isn't an ABI specified frame pointer for Thumb. Instead, we set it
18906 to point at the base of the local variables after static stack
18907 space for a function has been allocated. */
18910 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
18912 arm_stack_offsets *offsets;
18914 offsets = arm_get_frame_offsets ();
18918 case ARG_POINTER_REGNUM:
18921 case STACK_POINTER_REGNUM:
18922 return offsets->outgoing_args - offsets->saved_args;
18924 case FRAME_POINTER_REGNUM:
18925 return offsets->soft_frame - offsets->saved_args;
18927 case ARM_HARD_FRAME_POINTER_REGNUM:
18928 return offsets->saved_regs - offsets->saved_args;
18930 case THUMB_HARD_FRAME_POINTER_REGNUM:
18931 return offsets->locals_base - offsets->saved_args;
18934 gcc_unreachable ();
18938 case FRAME_POINTER_REGNUM:
18941 case STACK_POINTER_REGNUM:
18942 return offsets->outgoing_args - offsets->soft_frame;
18944 case ARM_HARD_FRAME_POINTER_REGNUM:
18945 return offsets->saved_regs - offsets->soft_frame;
18947 case THUMB_HARD_FRAME_POINTER_REGNUM:
18948 return offsets->locals_base - offsets->soft_frame;
18951 gcc_unreachable ();
18956 gcc_unreachable ();
18960 /* Generate the rest of a function's prologue. */
18962 thumb1_expand_prologue (void)
18966 HOST_WIDE_INT amount;
18967 arm_stack_offsets *offsets;
18968 unsigned long func_type;
18970 unsigned long live_regs_mask;
18972 func_type = arm_current_func_type ();
18974 /* Naked functions don't have prologues. */
18975 if (IS_NAKED (func_type))
18978 if (IS_INTERRUPT (func_type))
18980 error ("interrupt Service Routines cannot be coded in Thumb mode");
18984 offsets = arm_get_frame_offsets ();
18985 live_regs_mask = offsets->saved_regs_mask;
18986 /* Load the pic register before setting the frame pointer,
18987 so we can use r7 as a temporary work register. */
18988 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18989 arm_load_pic_register (live_regs_mask);
18991 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18992 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
18993 stack_pointer_rtx);
18995 amount = offsets->outgoing_args - offsets->saved_regs;
19000 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19001 GEN_INT (- amount)));
19002 RTX_FRAME_RELATED_P (insn) = 1;
19008 /* The stack decrement is too big for an immediate value in a single
19009 insn. In theory we could issue multiple subtracts, but after
19010 three of them it becomes more space efficient to place the full
19011 value in the constant pool and load into a register. (Also the
19012 ARM debugger really likes to see only one stack decrement per
19013 function). So instead we look for a scratch register into which
19014 we can load the decrement, and then we subtract this from the
19015 stack pointer. Unfortunately on the thumb the only available
19016 scratch registers are the argument registers, and we cannot use
19017 these as they may hold arguments to the function. Instead we
19018 attempt to locate a call preserved register which is used by this
19019 function. If we can find one, then we know that it will have
19020 been pushed at the start of the prologue and so we can corrupt
19022 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19023 if (live_regs_mask & (1 << regno))
19026 gcc_assert(regno <= LAST_LO_REGNUM);
19028 reg = gen_rtx_REG (SImode, regno);
19030 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19032 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19033 stack_pointer_rtx, reg));
19034 RTX_FRAME_RELATED_P (insn) = 1;
19035 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19036 plus_constant (stack_pointer_rtx,
19038 RTX_FRAME_RELATED_P (dwarf) = 1;
19039 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19043 if (frame_pointer_needed)
19044 thumb_set_frame_pointer (offsets);
19046 /* If we are profiling, make sure no instructions are scheduled before
19047 the call to mcount. Similarly if the user has requested no
19048 scheduling in the prolog. Similarly if we want non-call exceptions
19049 using the EABI unwinder, to prevent faulting instructions from being
19050 swapped with a stack adjustment. */
19051 if (crtl->profile || !TARGET_SCHED_PROLOG
19052 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19053 emit_insn (gen_blockage ());
19055 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19056 if (live_regs_mask & 0xff)
19057 cfun->machine->lr_save_eliminated = 0;
19062 thumb1_expand_epilogue (void)
19064 HOST_WIDE_INT amount;
19065 arm_stack_offsets *offsets;
19068 /* Naked functions don't have prologues. */
19069 if (IS_NAKED (arm_current_func_type ()))
19072 offsets = arm_get_frame_offsets ();
19073 amount = offsets->outgoing_args - offsets->saved_regs;
19075 if (frame_pointer_needed)
19077 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19078 amount = offsets->locals_base - offsets->saved_regs;
19081 gcc_assert (amount >= 0);
19085 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19086 GEN_INT (amount)));
19089 /* r3 is always free in the epilogue. */
19090 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19092 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19093 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19097 /* Emit a USE (stack_pointer_rtx), so that
19098 the stack adjustment will not be deleted. */
19099 emit_insn (gen_prologue_use (stack_pointer_rtx));
19101 if (crtl->profile || !TARGET_SCHED_PROLOG)
19102 emit_insn (gen_blockage ());
19104 /* Emit a clobber for each insn that will be restored in the epilogue,
19105 so that flow2 will get register lifetimes correct. */
19106 for (regno = 0; regno < 13; regno++)
19107 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19108 emit_clobber (gen_rtx_REG (SImode, regno));
19110 if (! df_regs_ever_live_p (LR_REGNUM))
19111 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19115 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19117 arm_stack_offsets *offsets;
19118 unsigned long live_regs_mask = 0;
19119 unsigned long l_mask;
19120 unsigned high_regs_pushed = 0;
19121 int cfa_offset = 0;
19124 if (IS_NAKED (arm_current_func_type ()))
19127 if (is_called_in_ARM_mode (current_function_decl))
19131 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19132 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19134 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19136 /* Generate code sequence to switch us into Thumb mode. */
19137 /* The .code 32 directive has already been emitted by
19138 ASM_DECLARE_FUNCTION_NAME. */
19139 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19140 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19142 /* Generate a label, so that the debugger will notice the
19143 change in instruction sets. This label is also used by
19144 the assembler to bypass the ARM code when this function
19145 is called from a Thumb encoded function elsewhere in the
19146 same file. Hence the definition of STUB_NAME here must
19147 agree with the definition in gas/config/tc-arm.c. */
19149 #define STUB_NAME ".real_start_of"
19151 fprintf (f, "\t.code\t16\n");
19153 if (arm_dllexport_name_p (name))
19154 name = arm_strip_name_encoding (name);
19156 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19157 fprintf (f, "\t.thumb_func\n");
19158 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19161 if (crtl->args.pretend_args_size)
19163 /* Output unwind directive for the stack adjustment. */
19164 if (ARM_EABI_UNWIND_TABLES)
19165 fprintf (f, "\t.pad #%d\n",
19166 crtl->args.pretend_args_size);
19168 if (cfun->machine->uses_anonymous_args)
19172 fprintf (f, "\tpush\t{");
19174 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19176 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19177 regno <= LAST_ARG_REGNUM;
19179 asm_fprintf (f, "%r%s", regno,
19180 regno == LAST_ARG_REGNUM ? "" : ", ");
19182 fprintf (f, "}\n");
19185 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19186 SP_REGNUM, SP_REGNUM,
19187 crtl->args.pretend_args_size);
19189 /* We don't need to record the stores for unwinding (would it
19190 help the debugger any if we did?), but record the change in
19191 the stack pointer. */
19192 if (dwarf2out_do_frame ())
19194 char *l = dwarf2out_cfi_label (false);
19196 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19197 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19201 /* Get the registers we are going to push. */
19202 offsets = arm_get_frame_offsets ();
19203 live_regs_mask = offsets->saved_regs_mask;
19204 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19205 l_mask = live_regs_mask & 0x40ff;
19206 /* Then count how many other high registers will need to be pushed. */
19207 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19209 if (TARGET_BACKTRACE)
19212 unsigned work_register;
19214 /* We have been asked to create a stack backtrace structure.
19215 The code looks like this:
19219 0 sub SP, #16 Reserve space for 4 registers.
19220 2 push {R7} Push low registers.
19221 4 add R7, SP, #20 Get the stack pointer before the push.
19222 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19223 8 mov R7, PC Get hold of the start of this code plus 12.
19224 10 str R7, [SP, #16] Store it.
19225 12 mov R7, FP Get hold of the current frame pointer.
19226 14 str R7, [SP, #4] Store it.
19227 16 mov R7, LR Get hold of the current return address.
19228 18 str R7, [SP, #12] Store it.
19229 20 add R7, SP, #16 Point at the start of the backtrace structure.
19230 22 mov FP, R7 Put this value into the frame pointer. */
19232 work_register = thumb_find_work_register (live_regs_mask);
19234 if (ARM_EABI_UNWIND_TABLES)
19235 asm_fprintf (f, "\t.pad #16\n");
19238 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19239 SP_REGNUM, SP_REGNUM);
19241 if (dwarf2out_do_frame ())
19243 char *l = dwarf2out_cfi_label (false);
19245 cfa_offset = cfa_offset + 16;
19246 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19251 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19252 offset = bit_count (l_mask) * UNITS_PER_WORD;
19257 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19258 offset + 16 + crtl->args.pretend_args_size);
19260 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19263 /* Make sure that the instruction fetching the PC is in the right place
19264 to calculate "start of backtrace creation code + 12". */
19267 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19268 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19270 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19271 ARM_HARD_FRAME_POINTER_REGNUM);
19272 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19277 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19278 ARM_HARD_FRAME_POINTER_REGNUM);
19279 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19281 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19282 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19286 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19287 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19289 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19291 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19292 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19294 /* Optimization: If we are not pushing any low registers but we are going
19295 to push some high registers then delay our first push. This will just
19296 be a push of LR and we can combine it with the push of the first high
19298 else if ((l_mask & 0xff) != 0
19299 || (high_regs_pushed == 0 && l_mask))
19300 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19302 if (high_regs_pushed)
19304 unsigned pushable_regs;
19305 unsigned next_hi_reg;
19307 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19308 if (live_regs_mask & (1 << next_hi_reg))
19311 pushable_regs = l_mask & 0xff;
19313 if (pushable_regs == 0)
19314 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19316 while (high_regs_pushed > 0)
19318 unsigned long real_regs_mask = 0;
19320 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19322 if (pushable_regs & (1 << regno))
19324 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19326 high_regs_pushed --;
19327 real_regs_mask |= (1 << next_hi_reg);
19329 if (high_regs_pushed)
19331 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19333 if (live_regs_mask & (1 << next_hi_reg))
19338 pushable_regs &= ~((1 << regno) - 1);
19344 /* If we had to find a work register and we have not yet
19345 saved the LR then add it to the list of regs to push. */
19346 if (l_mask == (1 << LR_REGNUM))
19348 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19350 real_regs_mask | (1 << LR_REGNUM));
19354 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19359 /* Handle the case of a double word load into a low register from
19360 a computed memory address. The computed address may involve a
19361 register which is overwritten by the load. */
19363 thumb_load_double_from_address (rtx *operands)
19371 gcc_assert (GET_CODE (operands[0]) == REG);
19372 gcc_assert (GET_CODE (operands[1]) == MEM);
19374 /* Get the memory address. */
19375 addr = XEXP (operands[1], 0);
19377 /* Work out how the memory address is computed. */
19378 switch (GET_CODE (addr))
19381 operands[2] = adjust_address (operands[1], SImode, 4);
19383 if (REGNO (operands[0]) == REGNO (addr))
19385 output_asm_insn ("ldr\t%H0, %2", operands);
19386 output_asm_insn ("ldr\t%0, %1", operands);
19390 output_asm_insn ("ldr\t%0, %1", operands);
19391 output_asm_insn ("ldr\t%H0, %2", operands);
19396 /* Compute <address> + 4 for the high order load. */
19397 operands[2] = adjust_address (operands[1], SImode, 4);
19399 output_asm_insn ("ldr\t%0, %1", operands);
19400 output_asm_insn ("ldr\t%H0, %2", operands);
19404 arg1 = XEXP (addr, 0);
19405 arg2 = XEXP (addr, 1);
19407 if (CONSTANT_P (arg1))
19408 base = arg2, offset = arg1;
19410 base = arg1, offset = arg2;
19412 gcc_assert (GET_CODE (base) == REG);
19414 /* Catch the case of <address> = <reg> + <reg> */
19415 if (GET_CODE (offset) == REG)
19417 int reg_offset = REGNO (offset);
19418 int reg_base = REGNO (base);
19419 int reg_dest = REGNO (operands[0]);
19421 /* Add the base and offset registers together into the
19422 higher destination register. */
19423 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19424 reg_dest + 1, reg_base, reg_offset);
19426 /* Load the lower destination register from the address in
19427 the higher destination register. */
19428 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19429 reg_dest, reg_dest + 1);
19431 /* Load the higher destination register from its own address
19433 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19434 reg_dest + 1, reg_dest + 1);
19438 /* Compute <address> + 4 for the high order load. */
19439 operands[2] = adjust_address (operands[1], SImode, 4);
19441 /* If the computed address is held in the low order register
19442 then load the high order register first, otherwise always
19443 load the low order register first. */
19444 if (REGNO (operands[0]) == REGNO (base))
19446 output_asm_insn ("ldr\t%H0, %2", operands);
19447 output_asm_insn ("ldr\t%0, %1", operands);
19451 output_asm_insn ("ldr\t%0, %1", operands);
19452 output_asm_insn ("ldr\t%H0, %2", operands);
19458 /* With no registers to worry about we can just load the value
19460 operands[2] = adjust_address (operands[1], SImode, 4);
19462 output_asm_insn ("ldr\t%H0, %2", operands);
19463 output_asm_insn ("ldr\t%0, %1", operands);
19467 gcc_unreachable ();
19474 thumb_output_move_mem_multiple (int n, rtx *operands)
19481 if (REGNO (operands[4]) > REGNO (operands[5]))
19484 operands[4] = operands[5];
19487 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19488 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19492 if (REGNO (operands[4]) > REGNO (operands[5]))
19495 operands[4] = operands[5];
19498 if (REGNO (operands[5]) > REGNO (operands[6]))
19501 operands[5] = operands[6];
19504 if (REGNO (operands[4]) > REGNO (operands[5]))
19507 operands[4] = operands[5];
19511 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19512 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19516 gcc_unreachable ();
19522 /* Output a call-via instruction for thumb state. */
19524 thumb_call_via_reg (rtx reg)
19526 int regno = REGNO (reg);
19529 gcc_assert (regno < LR_REGNUM);
19531 /* If we are in the normal text section we can use a single instance
19532 per compilation unit. If we are doing function sections, then we need
19533 an entry per section, since we can't rely on reachability. */
19534 if (in_section == text_section)
19536 thumb_call_reg_needed = 1;
19538 if (thumb_call_via_label[regno] == NULL)
19539 thumb_call_via_label[regno] = gen_label_rtx ();
19540 labelp = thumb_call_via_label + regno;
19544 if (cfun->machine->call_via[regno] == NULL)
19545 cfun->machine->call_via[regno] = gen_label_rtx ();
19546 labelp = cfun->machine->call_via + regno;
19549 output_asm_insn ("bl\t%a0", labelp);
19553 /* Routines for generating rtl. */
19555 thumb_expand_movmemqi (rtx *operands)
19557 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19558 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19559 HOST_WIDE_INT len = INTVAL (operands[2]);
19560 HOST_WIDE_INT offset = 0;
19564 emit_insn (gen_movmem12b (out, in, out, in));
19570 emit_insn (gen_movmem8b (out, in, out, in));
19576 rtx reg = gen_reg_rtx (SImode);
19577 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19578 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19585 rtx reg = gen_reg_rtx (HImode);
19586 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19587 plus_constant (in, offset))));
19588 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19596 rtx reg = gen_reg_rtx (QImode);
19597 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19598 plus_constant (in, offset))));
19599 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19605 thumb_reload_out_hi (rtx *operands)
19607 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19610 /* Handle reading a half-word from memory during reload. */
19612 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19614 gcc_unreachable ();
19617 /* Return the length of a function name prefix
19618 that starts with the character 'c'. */
19620 arm_get_strip_length (int c)
19624 ARM_NAME_ENCODING_LENGTHS
19629 /* Return a pointer to a function's name with any
19630 and all prefix encodings stripped from it. */
19632 arm_strip_name_encoding (const char *name)
19636 while ((skip = arm_get_strip_length (* name)))
19642 /* If there is a '*' anywhere in the name's prefix, then
19643 emit the stripped name verbatim, otherwise prepend an
19644 underscore if leading underscores are being used. */
19646 arm_asm_output_labelref (FILE *stream, const char *name)
19651 while ((skip = arm_get_strip_length (* name)))
19653 verbatim |= (*name == '*');
19658 fputs (name, stream);
19660 asm_fprintf (stream, "%U%s", name);
19664 arm_file_start (void)
19668 if (TARGET_UNIFIED_ASM)
19669 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19673 const char *fpu_name;
19674 if (arm_select[0].string)
19675 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19676 else if (arm_select[1].string)
19677 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19679 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19680 all_cores[arm_default_cpu].name);
19682 if (TARGET_SOFT_FLOAT)
19685 fpu_name = "softvfp";
19687 fpu_name = "softfpa";
19691 int set_float_abi_attributes = 0;
19692 switch (arm_fpu_arch)
19697 case FPUTYPE_FPA_EMU2:
19700 case FPUTYPE_FPA_EMU3:
19703 case FPUTYPE_MAVERICK:
19704 fpu_name = "maverick";
19708 set_float_abi_attributes = 1;
19710 case FPUTYPE_VFP3D16:
19711 fpu_name = "vfpv3-d16";
19712 set_float_abi_attributes = 1;
19715 fpu_name = "vfpv3";
19716 set_float_abi_attributes = 1;
19720 set_float_abi_attributes = 1;
19722 case FPUTYPE_NEON_FP16:
19723 fpu_name = "neon-fp16";
19724 set_float_abi_attributes = 1;
19729 if (set_float_abi_attributes)
19731 if (TARGET_HARD_FLOAT)
19732 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19733 if (TARGET_HARD_FLOAT_ABI)
19734 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19737 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19739 /* Some of these attributes only apply when the corresponding features
19740 are used. However we don't have any easy way of figuring this out.
19741 Conservatively record the setting that would have been used. */
19743 /* Tag_ABI_FP_rounding. */
19744 if (flag_rounding_math)
19745 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19746 if (!flag_unsafe_math_optimizations)
19748 /* Tag_ABI_FP_denomal. */
19749 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19750 /* Tag_ABI_FP_exceptions. */
19751 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19753 /* Tag_ABI_FP_user_exceptions. */
19754 if (flag_signaling_nans)
19755 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19756 /* Tag_ABI_FP_number_model. */
19757 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19758 flag_finite_math_only ? 1 : 3);
19760 /* Tag_ABI_align8_needed. */
19761 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19762 /* Tag_ABI_align8_preserved. */
19763 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
19764 /* Tag_ABI_enum_size. */
19765 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
19766 flag_short_enums ? 1 : 2);
19768 /* Tag_ABI_optimization_goals. */
19771 else if (optimize >= 2)
19777 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
19779 /* Tag_ABI_FP_16bit_format. */
19780 if (arm_fp16_format)
19781 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
19782 (int)arm_fp16_format);
19784 if (arm_lang_output_object_attributes_hook)
19785 arm_lang_output_object_attributes_hook();
19787 default_file_start();
19791 arm_file_end (void)
19795 if (NEED_INDICATE_EXEC_STACK)
19796 /* Add .note.GNU-stack. */
19797 file_end_indicate_exec_stack ();
19799 if (! thumb_call_reg_needed)
19802 switch_to_section (text_section);
19803 asm_fprintf (asm_out_file, "\t.code 16\n");
19804 ASM_OUTPUT_ALIGN (asm_out_file, 1);
19806 for (regno = 0; regno < LR_REGNUM; regno++)
19808 rtx label = thumb_call_via_label[regno];
19812 targetm.asm_out.internal_label (asm_out_file, "L",
19813 CODE_LABEL_NUMBER (label));
19814 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19820 /* Symbols in the text segment can be accessed without indirecting via the
19821 constant pool; it may take an extra binary operation, but this is still
19822 faster than indirecting via memory. Don't do this when not optimizing,
19823 since we won't be calculating al of the offsets necessary to do this
19827 arm_encode_section_info (tree decl, rtx rtl, int first)
19829 if (optimize > 0 && TREE_CONSTANT (decl))
19830 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
19832 default_encode_section_info (decl, rtl, first);
19834 #endif /* !ARM_PE */
19837 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
19839 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
19840 && !strcmp (prefix, "L"))
19842 arm_ccfsm_state = 0;
19843 arm_target_insn = NULL;
19845 default_internal_label (stream, prefix, labelno);
19848 /* Output code to add DELTA to the first argument, and then jump
19849 to FUNCTION. Used for C++ multiple inheritance. */
19851 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
19852 HOST_WIDE_INT delta,
19853 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
19856 static int thunk_label = 0;
19859 int mi_delta = delta;
19860 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
19862 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
19865 mi_delta = - mi_delta;
19869 int labelno = thunk_label++;
19870 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
19871 /* Thunks are entered in arm mode when avaiable. */
19872 if (TARGET_THUMB1_ONLY)
19874 /* push r3 so we can use it as a temporary. */
19875 /* TODO: Omit this save if r3 is not used. */
19876 fputs ("\tpush {r3}\n", file);
19877 fputs ("\tldr\tr3, ", file);
19881 fputs ("\tldr\tr12, ", file);
19883 assemble_name (file, label);
19884 fputc ('\n', file);
19887 /* If we are generating PIC, the ldr instruction below loads
19888 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
19889 the address of the add + 8, so we have:
19891 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
19894 Note that we have "+ 1" because some versions of GNU ld
19895 don't set the low bit of the result for R_ARM_REL32
19896 relocations against thumb function symbols.
19897 On ARMv6M this is +4, not +8. */
19898 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
19899 assemble_name (file, labelpc);
19900 fputs (":\n", file);
19901 if (TARGET_THUMB1_ONLY)
19903 /* This is 2 insns after the start of the thunk, so we know it
19904 is 4-byte aligned. */
19905 fputs ("\tadd\tr3, pc, r3\n", file);
19906 fputs ("\tmov r12, r3\n", file);
19909 fputs ("\tadd\tr12, pc, r12\n", file);
19911 else if (TARGET_THUMB1_ONLY)
19912 fputs ("\tmov r12, r3\n", file);
19914 if (TARGET_THUMB1_ONLY)
19916 if (mi_delta > 255)
19918 fputs ("\tldr\tr3, ", file);
19919 assemble_name (file, label);
19920 fputs ("+4\n", file);
19921 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
19922 mi_op, this_regno, this_regno);
19924 else if (mi_delta != 0)
19926 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19927 mi_op, this_regno, this_regno,
19933 /* TODO: Use movw/movt for large constants when available. */
19934 while (mi_delta != 0)
19936 if ((mi_delta & (3 << shift)) == 0)
19940 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19941 mi_op, this_regno, this_regno,
19942 mi_delta & (0xff << shift));
19943 mi_delta &= ~(0xff << shift);
19950 if (TARGET_THUMB1_ONLY)
19951 fputs ("\tpop\t{r3}\n", file);
19953 fprintf (file, "\tbx\tr12\n");
19954 ASM_OUTPUT_ALIGN (file, 2);
19955 assemble_name (file, label);
19956 fputs (":\n", file);
19959 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
19960 rtx tem = XEXP (DECL_RTL (function), 0);
19961 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
19962 tem = gen_rtx_MINUS (GET_MODE (tem),
19964 gen_rtx_SYMBOL_REF (Pmode,
19965 ggc_strdup (labelpc)));
19966 assemble_integer (tem, 4, BITS_PER_WORD, 1);
19969 /* Output ".word .LTHUNKn". */
19970 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
19972 if (TARGET_THUMB1_ONLY && mi_delta > 255)
19973 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
19977 fputs ("\tb\t", file);
19978 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
19979 if (NEED_PLT_RELOC)
19980 fputs ("(PLT)", file);
19981 fputc ('\n', file);
19986 arm_emit_vector_const (FILE *file, rtx x)
19989 const char * pattern;
19991 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19993 switch (GET_MODE (x))
19995 case V2SImode: pattern = "%08x"; break;
19996 case V4HImode: pattern = "%04x"; break;
19997 case V8QImode: pattern = "%02x"; break;
19998 default: gcc_unreachable ();
20001 fprintf (file, "0x");
20002 for (i = CONST_VECTOR_NUNITS (x); i--;)
20006 element = CONST_VECTOR_ELT (x, i);
20007 fprintf (file, pattern, INTVAL (element));
20013 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20014 HFmode constant pool entries are actually loaded with ldr. */
20016 arm_emit_fp16_const (rtx c)
20021 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20022 bits = real_to_target (NULL, &r, HFmode);
20023 if (WORDS_BIG_ENDIAN)
20024 assemble_zeros (2);
20025 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20026 if (!WORDS_BIG_ENDIAN)
20027 assemble_zeros (2);
20031 arm_output_load_gr (rtx *operands)
20038 if (GET_CODE (operands [1]) != MEM
20039 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20040 || GET_CODE (reg = XEXP (sum, 0)) != REG
20041 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20042 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20043 return "wldrw%?\t%0, %1";
20045 /* Fix up an out-of-range load of a GR register. */
20046 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20047 wcgr = operands[0];
20049 output_asm_insn ("ldr%?\t%0, %1", operands);
20051 operands[0] = wcgr;
20053 output_asm_insn ("tmcr%?\t%0, %1", operands);
20054 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20059 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20061 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20062 named arg and all anonymous args onto the stack.
20063 XXX I know the prologue shouldn't be pushing registers, but it is faster
20067 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20068 enum machine_mode mode,
20071 int second_time ATTRIBUTE_UNUSED)
20075 cfun->machine->uses_anonymous_args = 1;
20076 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20078 nregs = pcum->aapcs_ncrn;
20079 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20083 nregs = pcum->nregs;
20085 if (nregs < NUM_ARG_REGS)
20086 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20089 /* Return nonzero if the CONSUMER instruction (a store) does not need
20090 PRODUCER's value to calculate the address. */
20093 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20095 rtx value = PATTERN (producer);
20096 rtx addr = PATTERN (consumer);
20098 if (GET_CODE (value) == COND_EXEC)
20099 value = COND_EXEC_CODE (value);
20100 if (GET_CODE (value) == PARALLEL)
20101 value = XVECEXP (value, 0, 0);
20102 value = XEXP (value, 0);
20103 if (GET_CODE (addr) == COND_EXEC)
20104 addr = COND_EXEC_CODE (addr);
20105 if (GET_CODE (addr) == PARALLEL)
20106 addr = XVECEXP (addr, 0, 0);
20107 addr = XEXP (addr, 0);
20109 return !reg_overlap_mentioned_p (value, addr);
20112 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20113 have an early register shift value or amount dependency on the
20114 result of PRODUCER. */
20117 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20119 rtx value = PATTERN (producer);
20120 rtx op = PATTERN (consumer);
20123 if (GET_CODE (value) == COND_EXEC)
20124 value = COND_EXEC_CODE (value);
20125 if (GET_CODE (value) == PARALLEL)
20126 value = XVECEXP (value, 0, 0);
20127 value = XEXP (value, 0);
20128 if (GET_CODE (op) == COND_EXEC)
20129 op = COND_EXEC_CODE (op);
20130 if (GET_CODE (op) == PARALLEL)
20131 op = XVECEXP (op, 0, 0);
20134 early_op = XEXP (op, 0);
20135 /* This is either an actual independent shift, or a shift applied to
20136 the first operand of another operation. We want the whole shift
20138 if (GET_CODE (early_op) == REG)
20141 return !reg_overlap_mentioned_p (value, early_op);
20144 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20145 have an early register shift value dependency on the result of
20149 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20151 rtx value = PATTERN (producer);
20152 rtx op = PATTERN (consumer);
20155 if (GET_CODE (value) == COND_EXEC)
20156 value = COND_EXEC_CODE (value);
20157 if (GET_CODE (value) == PARALLEL)
20158 value = XVECEXP (value, 0, 0);
20159 value = XEXP (value, 0);
20160 if (GET_CODE (op) == COND_EXEC)
20161 op = COND_EXEC_CODE (op);
20162 if (GET_CODE (op) == PARALLEL)
20163 op = XVECEXP (op, 0, 0);
20166 early_op = XEXP (op, 0);
20168 /* This is either an actual independent shift, or a shift applied to
20169 the first operand of another operation. We want the value being
20170 shifted, in either case. */
20171 if (GET_CODE (early_op) != REG)
20172 early_op = XEXP (early_op, 0);
20174 return !reg_overlap_mentioned_p (value, early_op);
20177 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20178 have an early register mult dependency on the result of
20182 arm_no_early_mul_dep (rtx producer, rtx consumer)
20184 rtx value = PATTERN (producer);
20185 rtx op = PATTERN (consumer);
20187 if (GET_CODE (value) == COND_EXEC)
20188 value = COND_EXEC_CODE (value);
20189 if (GET_CODE (value) == PARALLEL)
20190 value = XVECEXP (value, 0, 0);
20191 value = XEXP (value, 0);
20192 if (GET_CODE (op) == COND_EXEC)
20193 op = COND_EXEC_CODE (op);
20194 if (GET_CODE (op) == PARALLEL)
20195 op = XVECEXP (op, 0, 0);
20198 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20200 if (GET_CODE (XEXP (op, 0)) == MULT)
20201 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20203 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20209 /* We can't rely on the caller doing the proper promotion when
20210 using APCS or ATPCS. */
20213 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20215 return !TARGET_AAPCS_BASED;
20218 static enum machine_mode
20219 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20220 enum machine_mode mode,
20221 int *punsignedp ATTRIBUTE_UNUSED,
20222 const_tree fntype ATTRIBUTE_UNUSED,
20223 int for_return ATTRIBUTE_UNUSED)
20225 if (GET_MODE_CLASS (mode) == MODE_INT
20226 && GET_MODE_SIZE (mode) < 4)
20232 /* AAPCS based ABIs use short enums by default. */
20235 arm_default_short_enums (void)
20237 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20241 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20244 arm_align_anon_bitfield (void)
20246 return TARGET_AAPCS_BASED;
20250 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20253 arm_cxx_guard_type (void)
20255 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20258 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20259 has an accumulator dependency on the result of the producer (a
20260 multiplication instruction) and no other dependency on that result. */
20262 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20264 rtx mul = PATTERN (producer);
20265 rtx mac = PATTERN (consumer);
20267 rtx mac_op0, mac_op1, mac_acc;
20269 if (GET_CODE (mul) == COND_EXEC)
20270 mul = COND_EXEC_CODE (mul);
20271 if (GET_CODE (mac) == COND_EXEC)
20272 mac = COND_EXEC_CODE (mac);
20274 /* Check that mul is of the form (set (...) (mult ...))
20275 and mla is of the form (set (...) (plus (mult ...) (...))). */
20276 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20277 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20278 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20281 mul_result = XEXP (mul, 0);
20282 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20283 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20284 mac_acc = XEXP (XEXP (mac, 1), 1);
20286 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20287 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20288 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20292 /* The EABI says test the least significant bit of a guard variable. */
20295 arm_cxx_guard_mask_bit (void)
20297 return TARGET_AAPCS_BASED;
20301 /* The EABI specifies that all array cookies are 8 bytes long. */
20304 arm_get_cookie_size (tree type)
20308 if (!TARGET_AAPCS_BASED)
20309 return default_cxx_get_cookie_size (type);
20311 size = build_int_cst (sizetype, 8);
20316 /* The EABI says that array cookies should also contain the element size. */
20319 arm_cookie_has_size (void)
20321 return TARGET_AAPCS_BASED;
20325 /* The EABI says constructors and destructors should return a pointer to
20326 the object constructed/destroyed. */
20329 arm_cxx_cdtor_returns_this (void)
20331 return TARGET_AAPCS_BASED;
20334 /* The EABI says that an inline function may never be the key
20338 arm_cxx_key_method_may_be_inline (void)
20340 return !TARGET_AAPCS_BASED;
20344 arm_cxx_determine_class_data_visibility (tree decl)
20346 if (!TARGET_AAPCS_BASED
20347 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20350 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20351 is exported. However, on systems without dynamic vague linkage,
20352 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20353 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20354 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20356 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20357 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20361 arm_cxx_class_data_always_comdat (void)
20363 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20364 vague linkage if the class has no key function. */
20365 return !TARGET_AAPCS_BASED;
20369 /* The EABI says __aeabi_atexit should be used to register static
20373 arm_cxx_use_aeabi_atexit (void)
20375 return TARGET_AAPCS_BASED;
20380 arm_set_return_address (rtx source, rtx scratch)
20382 arm_stack_offsets *offsets;
20383 HOST_WIDE_INT delta;
20385 unsigned long saved_regs;
20387 offsets = arm_get_frame_offsets ();
20388 saved_regs = offsets->saved_regs_mask;
20390 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20391 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20394 if (frame_pointer_needed)
20395 addr = plus_constant(hard_frame_pointer_rtx, -4);
20398 /* LR will be the first saved register. */
20399 delta = offsets->outgoing_args - (offsets->frame + 4);
20404 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20405 GEN_INT (delta & ~4095)));
20410 addr = stack_pointer_rtx;
20412 addr = plus_constant (addr, delta);
20414 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20420 thumb_set_return_address (rtx source, rtx scratch)
20422 arm_stack_offsets *offsets;
20423 HOST_WIDE_INT delta;
20424 HOST_WIDE_INT limit;
20427 unsigned long mask;
20431 offsets = arm_get_frame_offsets ();
20432 mask = offsets->saved_regs_mask;
20433 if (mask & (1 << LR_REGNUM))
20436 /* Find the saved regs. */
20437 if (frame_pointer_needed)
20439 delta = offsets->soft_frame - offsets->saved_args;
20440 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20446 delta = offsets->outgoing_args - offsets->saved_args;
20449 /* Allow for the stack frame. */
20450 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20452 /* The link register is always the first saved register. */
20455 /* Construct the address. */
20456 addr = gen_rtx_REG (SImode, reg);
20459 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20460 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20464 addr = plus_constant (addr, delta);
20466 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20469 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20472 /* Implements target hook vector_mode_supported_p. */
20474 arm_vector_mode_supported_p (enum machine_mode mode)
20476 /* Neon also supports V2SImode, etc. listed in the clause below. */
20477 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20478 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20481 if ((TARGET_NEON || TARGET_IWMMXT)
20482 && ((mode == V2SImode)
20483 || (mode == V4HImode)
20484 || (mode == V8QImode)))
20490 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20491 ARM insns and therefore guarantee that the shift count is modulo 256.
20492 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20493 guarantee no particular behavior for out-of-range counts. */
20495 static unsigned HOST_WIDE_INT
20496 arm_shift_truncation_mask (enum machine_mode mode)
20498 return mode == SImode ? 255 : 0;
20502 /* Map internal gcc register numbers to DWARF2 register numbers. */
20505 arm_dbx_register_number (unsigned int regno)
20510 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20511 compatibility. The EABI defines them as registers 96-103. */
20512 if (IS_FPA_REGNUM (regno))
20513 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20515 /* FIXME: VFPv3 register numbering. */
20516 if (IS_VFP_REGNUM (regno))
20517 return 64 + regno - FIRST_VFP_REGNUM;
20519 if (IS_IWMMXT_GR_REGNUM (regno))
20520 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20522 if (IS_IWMMXT_REGNUM (regno))
20523 return 112 + regno - FIRST_IWMMXT_REGNUM;
20525 gcc_unreachable ();
20529 #ifdef TARGET_UNWIND_INFO
20530 /* Emit unwind directives for a store-multiple instruction or stack pointer
20531 push during alignment.
20532 These should only ever be generated by the function prologue code, so
20533 expect them to have a particular form. */
20536 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20539 HOST_WIDE_INT offset;
20540 HOST_WIDE_INT nregs;
20546 e = XVECEXP (p, 0, 0);
20547 if (GET_CODE (e) != SET)
20550 /* First insn will adjust the stack pointer. */
20551 if (GET_CODE (e) != SET
20552 || GET_CODE (XEXP (e, 0)) != REG
20553 || REGNO (XEXP (e, 0)) != SP_REGNUM
20554 || GET_CODE (XEXP (e, 1)) != PLUS)
20557 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20558 nregs = XVECLEN (p, 0) - 1;
20560 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20563 /* The function prologue may also push pc, but not annotate it as it is
20564 never restored. We turn this into a stack pointer adjustment. */
20565 if (nregs * 4 == offset - 4)
20567 fprintf (asm_out_file, "\t.pad #4\n");
20571 fprintf (asm_out_file, "\t.save {");
20573 else if (IS_VFP_REGNUM (reg))
20576 fprintf (asm_out_file, "\t.vsave {");
20578 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20580 /* FPA registers are done differently. */
20581 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20585 /* Unknown register type. */
20588 /* If the stack increment doesn't match the size of the saved registers,
20589 something has gone horribly wrong. */
20590 if (offset != nregs * reg_size)
20595 /* The remaining insns will describe the stores. */
20596 for (i = 1; i <= nregs; i++)
20598 /* Expect (set (mem <addr>) (reg)).
20599 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20600 e = XVECEXP (p, 0, i);
20601 if (GET_CODE (e) != SET
20602 || GET_CODE (XEXP (e, 0)) != MEM
20603 || GET_CODE (XEXP (e, 1)) != REG)
20606 reg = REGNO (XEXP (e, 1));
20611 fprintf (asm_out_file, ", ");
20612 /* We can't use %r for vfp because we need to use the
20613 double precision register names. */
20614 if (IS_VFP_REGNUM (reg))
20615 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20617 asm_fprintf (asm_out_file, "%r", reg);
20619 #ifdef ENABLE_CHECKING
20620 /* Check that the addresses are consecutive. */
20621 e = XEXP (XEXP (e, 0), 0);
20622 if (GET_CODE (e) == PLUS)
20624 offset += reg_size;
20625 if (GET_CODE (XEXP (e, 0)) != REG
20626 || REGNO (XEXP (e, 0)) != SP_REGNUM
20627 || GET_CODE (XEXP (e, 1)) != CONST_INT
20628 || offset != INTVAL (XEXP (e, 1)))
20632 || GET_CODE (e) != REG
20633 || REGNO (e) != SP_REGNUM)
20637 fprintf (asm_out_file, "}\n");
20640 /* Emit unwind directives for a SET. */
20643 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20651 switch (GET_CODE (e0))
20654 /* Pushing a single register. */
20655 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20656 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20657 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20660 asm_fprintf (asm_out_file, "\t.save ");
20661 if (IS_VFP_REGNUM (REGNO (e1)))
20662 asm_fprintf(asm_out_file, "{d%d}\n",
20663 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20665 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20669 if (REGNO (e0) == SP_REGNUM)
20671 /* A stack increment. */
20672 if (GET_CODE (e1) != PLUS
20673 || GET_CODE (XEXP (e1, 0)) != REG
20674 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20675 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20678 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20679 -INTVAL (XEXP (e1, 1)));
20681 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20683 HOST_WIDE_INT offset;
20685 if (GET_CODE (e1) == PLUS)
20687 if (GET_CODE (XEXP (e1, 0)) != REG
20688 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20690 reg = REGNO (XEXP (e1, 0));
20691 offset = INTVAL (XEXP (e1, 1));
20692 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20693 HARD_FRAME_POINTER_REGNUM, reg,
20694 INTVAL (XEXP (e1, 1)));
20696 else if (GET_CODE (e1) == REG)
20699 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20700 HARD_FRAME_POINTER_REGNUM, reg);
20705 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20707 /* Move from sp to reg. */
20708 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20710 else if (GET_CODE (e1) == PLUS
20711 && GET_CODE (XEXP (e1, 0)) == REG
20712 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20713 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20715 /* Set reg to offset from sp. */
20716 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20717 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20719 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20721 /* Stack pointer save before alignment. */
20723 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
20736 /* Emit unwind directives for the given insn. */
20739 arm_unwind_emit (FILE * asm_out_file, rtx insn)
20743 if (!ARM_EABI_UNWIND_TABLES)
20746 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20747 && (TREE_NOTHROW (current_function_decl)
20748 || crtl->all_throwers_are_sibcalls))
20751 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
20754 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
20756 pat = XEXP (pat, 0);
20758 pat = PATTERN (insn);
20760 switch (GET_CODE (pat))
20763 arm_unwind_emit_set (asm_out_file, pat);
20767 /* Store multiple. */
20768 arm_unwind_emit_sequence (asm_out_file, pat);
20777 /* Output a reference from a function exception table to the type_info
20778 object X. The EABI specifies that the symbol should be relocated by
20779 an R_ARM_TARGET2 relocation. */
20782 arm_output_ttype (rtx x)
20784 fputs ("\t.word\t", asm_out_file);
20785 output_addr_const (asm_out_file, x);
20786 /* Use special relocations for symbol references. */
20787 if (GET_CODE (x) != CONST_INT)
20788 fputs ("(TARGET2)", asm_out_file);
20789 fputc ('\n', asm_out_file);
20793 #endif /* TARGET_UNWIND_INFO */
20796 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
20797 stack alignment. */
20800 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
20802 rtx unspec = SET_SRC (pattern);
20803 gcc_assert (GET_CODE (unspec) == UNSPEC);
20807 case UNSPEC_STACK_ALIGN:
20808 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
20809 put anything on the stack, so hopefully it won't matter.
20810 CFA = SP will be correct after alignment. */
20811 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
20812 SET_DEST (pattern));
20815 gcc_unreachable ();
20820 /* Output unwind directives for the start/end of a function. */
20823 arm_output_fn_unwind (FILE * f, bool prologue)
20825 if (!ARM_EABI_UNWIND_TABLES)
20829 fputs ("\t.fnstart\n", f);
20832 /* If this function will never be unwound, then mark it as such.
20833 The came condition is used in arm_unwind_emit to suppress
20834 the frame annotations. */
20835 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20836 && (TREE_NOTHROW (current_function_decl)
20837 || crtl->all_throwers_are_sibcalls))
20838 fputs("\t.cantunwind\n", f);
20840 fputs ("\t.fnend\n", f);
20845 arm_emit_tls_decoration (FILE *fp, rtx x)
20847 enum tls_reloc reloc;
20850 val = XVECEXP (x, 0, 0);
20851 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
20853 output_addr_const (fp, val);
20858 fputs ("(tlsgd)", fp);
20861 fputs ("(tlsldm)", fp);
20864 fputs ("(tlsldo)", fp);
20867 fputs ("(gottpoff)", fp);
20870 fputs ("(tpoff)", fp);
20873 gcc_unreachable ();
20881 fputs (" + (. - ", fp);
20882 output_addr_const (fp, XVECEXP (x, 0, 2));
20884 output_addr_const (fp, XVECEXP (x, 0, 3));
20894 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
20897 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
20899 gcc_assert (size == 4);
20900 fputs ("\t.word\t", file);
20901 output_addr_const (file, x);
20902 fputs ("(tlsldo)", file);
20906 arm_output_addr_const_extra (FILE *fp, rtx x)
20908 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
20909 return arm_emit_tls_decoration (fp, x);
20910 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
20913 int labelno = INTVAL (XVECEXP (x, 0, 0));
20915 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
20916 assemble_name_raw (fp, label);
20920 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
20922 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
20926 output_addr_const (fp, XVECEXP (x, 0, 0));
20930 else if (GET_CODE (x) == CONST_VECTOR)
20931 return arm_emit_vector_const (fp, x);
20936 /* Output assembly for a shift instruction.
20937 SET_FLAGS determines how the instruction modifies the condition codes.
20938 0 - Do not set condition codes.
20939 1 - Set condition codes.
20940 2 - Use smallest instruction. */
20942 arm_output_shift(rtx * operands, int set_flags)
20945 static const char flag_chars[3] = {'?', '.', '!'};
20950 c = flag_chars[set_flags];
20951 if (TARGET_UNIFIED_ASM)
20953 shift = shift_op(operands[3], &val);
20957 operands[2] = GEN_INT(val);
20958 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
20961 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
20964 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
20965 output_asm_insn (pattern, operands);
20969 /* Output a Thumb-1 casesi dispatch sequence. */
20971 thumb1_output_casesi (rtx *operands)
20973 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
20974 addr_diff_vec_flags flags;
20976 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20978 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
20980 switch (GET_MODE(diff_vec))
20983 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20984 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
20986 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20987 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
20989 return "bl\t%___gnu_thumb1_case_si";
20991 gcc_unreachable ();
20995 /* Output a Thumb-2 casesi instruction. */
20997 thumb2_output_casesi (rtx *operands)
20999 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21001 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21003 output_asm_insn ("cmp\t%0, %1", operands);
21004 output_asm_insn ("bhi\t%l3", operands);
21005 switch (GET_MODE(diff_vec))
21008 return "tbb\t[%|pc, %0]";
21010 return "tbh\t[%|pc, %0, lsl #1]";
21014 output_asm_insn ("adr\t%4, %l2", operands);
21015 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21016 output_asm_insn ("add\t%4, %4, %5", operands);
21021 output_asm_insn ("adr\t%4, %l2", operands);
21022 return "ldr\t%|pc, [%4, %0, lsl #2]";
21025 gcc_unreachable ();
21029 /* Most ARM cores are single issue, but some newer ones can dual issue.
21030 The scheduler descriptions rely on this being correct. */
21032 arm_issue_rate (void)
21047 /* A table and a function to perform ARM-specific name mangling for
21048 NEON vector types in order to conform to the AAPCS (see "Procedure
21049 Call Standard for the ARM Architecture", Appendix A). To qualify
21050 for emission with the mangled names defined in that document, a
21051 vector type must not only be of the correct mode but also be
21052 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21055 enum machine_mode mode;
21056 const char *element_type_name;
21057 const char *aapcs_name;
21058 } arm_mangle_map_entry;
21060 static arm_mangle_map_entry arm_mangle_map[] = {
21061 /* 64-bit containerized types. */
21062 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21063 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21064 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21065 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21066 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21067 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21068 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21069 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21070 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21071 /* 128-bit containerized types. */
21072 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21073 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21074 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21075 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21076 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21077 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21078 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21079 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21080 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21081 { VOIDmode, NULL, NULL }
21085 arm_mangle_type (const_tree type)
21087 arm_mangle_map_entry *pos = arm_mangle_map;
21089 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21090 has to be managled as if it is in the "std" namespace. */
21091 if (TARGET_AAPCS_BASED
21092 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21094 static bool warned;
21095 if (!warned && warn_psabi)
21098 inform (input_location,
21099 "the mangling of %<va_list%> has changed in GCC 4.4");
21101 return "St9__va_list";
21104 /* Half-precision float. */
21105 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21108 if (TREE_CODE (type) != VECTOR_TYPE)
21111 /* Check the mode of the vector type, and the name of the vector
21112 element type, against the table. */
21113 while (pos->mode != VOIDmode)
21115 tree elt_type = TREE_TYPE (type);
21117 if (pos->mode == TYPE_MODE (type)
21118 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21119 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21120 pos->element_type_name))
21121 return pos->aapcs_name;
21126 /* Use the default mangling for unrecognized (possibly user-defined)
21131 /* Order of allocation of core registers for Thumb: this allocation is
21132 written over the corresponding initial entries of the array
21133 initialized with REG_ALLOC_ORDER. We allocate all low registers
21134 first. Saving and restoring a low register is usually cheaper than
21135 using a call-clobbered high register. */
21137 static const int thumb_core_reg_alloc_order[] =
21139 3, 2, 1, 0, 4, 5, 6, 7,
21140 14, 12, 8, 9, 10, 11, 13, 15
21143 /* Adjust register allocation order when compiling for Thumb. */
21146 arm_order_regs_for_local_alloc (void)
21148 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21149 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21151 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21152 sizeof (thumb_core_reg_alloc_order));
21155 /* Set default optimization options. */
21157 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21159 /* Enable section anchors by default at -O1 or higher.
21160 Use 2 to distinguish from an explicit -fsection-anchors
21161 given on the command line. */
21163 flag_section_anchors = 2;
21166 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21169 arm_frame_pointer_required (void)
21171 return (cfun->has_nonlocal_label
21172 || SUBTARGET_FRAME_POINTER_REQUIRED
21173 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21176 #include "gt-arm.h"