1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
142 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
143 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
144 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_rtx_costs (rtx, int, int, int *, bool);
148 static int arm_address_cost (rtx, bool);
149 static bool arm_memory_load_p (rtx);
150 static bool arm_cirrus_insn_p (rtx);
151 static void cirrus_reorg (rtx);
152 static void arm_init_builtins (void);
153 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
154 static void arm_init_iwmmxt_builtins (void);
155 static rtx safe_vector_operand (rtx, enum machine_mode);
156 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
157 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
158 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
159 static void emit_constant_insn (rtx cond, rtx pattern);
160 static rtx emit_set_insn (rtx, rtx);
161 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 static int aapcs_select_return_coproc (const_tree, const_tree);
167 #ifdef OBJECT_FORMAT_ELF
168 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
169 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
172 static void arm_encode_section_info (tree, rtx, int);
175 static void arm_file_end (void);
176 static void arm_file_start (void);
178 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
181 enum machine_mode, const_tree, bool);
182 static bool arm_promote_prototypes (const_tree);
183 static bool arm_default_short_enums (void);
184 static bool arm_align_anon_bitfield (void);
185 static bool arm_return_in_msb (const_tree);
186 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
187 static bool arm_return_in_memory (const_tree, const_tree);
188 #ifdef TARGET_UNWIND_INFO
189 static void arm_unwind_emit (FILE *, rtx);
190 static bool arm_output_ttype (rtx);
192 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void);
196 static tree arm_get_cookie_size (tree);
197 static bool arm_cookie_has_size (void);
198 static bool arm_cxx_cdtor_returns_this (void);
199 static bool arm_cxx_key_method_may_be_inline (void);
200 static void arm_cxx_determine_class_data_visibility (tree);
201 static bool arm_cxx_class_data_always_comdat (void);
202 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
207 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
214 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr);
219 static bool arm_scalar_mode_supported_p (enum machine_mode);
220 static bool arm_frame_pointer_required (void);
221 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx, tree, rtx);
224 static rtx arm_trampoline_adjust_address (rtx);
227 /* Table of machine attributes. */
228 static const struct attribute_spec arm_attribute_table[] =
230 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
231 /* Function calls made to this symbol must be done indirectly, because
232 it may lie outside of the 26 bit addressing range of a normal function
234 { "long_call", 0, 0, false, true, true, NULL },
235 /* Whereas these functions are always known to reside within the 26 bit
237 { "short_call", 0, 0, false, true, true, NULL },
238 /* Specify the procedure call conventions for a function. */
239 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
240 /* Interrupt Service Routines have special prologue and epilogue requirements. */
241 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
242 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
243 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
245 /* ARM/PE has three new attributes:
247 dllexport - for exporting a function/variable that will live in a dll
248 dllimport - for importing a function/variable from a dll
250 Microsoft allows multiple declspecs in one __declspec, separating
251 them with spaces. We do NOT support this. Instead, use __declspec
254 { "dllimport", 0, 0, true, false, false, NULL },
255 { "dllexport", 0, 0, true, false, false, NULL },
256 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
257 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
258 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
259 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
260 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
262 { NULL, 0, 0, false, false, false, NULL }
265 /* Initialize the GCC target structure. */
266 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
267 #undef TARGET_MERGE_DECL_ATTRIBUTES
268 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
271 #undef TARGET_LEGITIMIZE_ADDRESS
272 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
274 #undef TARGET_ATTRIBUTE_TABLE
275 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
277 #undef TARGET_ASM_FILE_START
278 #define TARGET_ASM_FILE_START arm_file_start
279 #undef TARGET_ASM_FILE_END
280 #define TARGET_ASM_FILE_END arm_file_end
282 #undef TARGET_ASM_ALIGNED_SI_OP
283 #define TARGET_ASM_ALIGNED_SI_OP NULL
284 #undef TARGET_ASM_INTEGER
285 #define TARGET_ASM_INTEGER arm_assemble_integer
287 #undef TARGET_ASM_FUNCTION_PROLOGUE
288 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
290 #undef TARGET_ASM_FUNCTION_EPILOGUE
291 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
293 #undef TARGET_DEFAULT_TARGET_FLAGS
294 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
295 #undef TARGET_HANDLE_OPTION
296 #define TARGET_HANDLE_OPTION arm_handle_option
298 #define TARGET_HELP arm_target_help
300 #undef TARGET_COMP_TYPE_ATTRIBUTES
301 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
303 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
304 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
306 #undef TARGET_SCHED_ADJUST_COST
307 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
309 #undef TARGET_ENCODE_SECTION_INFO
311 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
313 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
316 #undef TARGET_STRIP_NAME_ENCODING
317 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
319 #undef TARGET_ASM_INTERNAL_LABEL
320 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
322 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
323 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
325 #undef TARGET_FUNCTION_VALUE
326 #define TARGET_FUNCTION_VALUE arm_function_value
328 #undef TARGET_LIBCALL_VALUE
329 #define TARGET_LIBCALL_VALUE arm_libcall_value
331 #undef TARGET_ASM_OUTPUT_MI_THUNK
332 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
333 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
334 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
336 #undef TARGET_RTX_COSTS
337 #define TARGET_RTX_COSTS arm_rtx_costs
338 #undef TARGET_ADDRESS_COST
339 #define TARGET_ADDRESS_COST arm_address_cost
341 #undef TARGET_SHIFT_TRUNCATION_MASK
342 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
343 #undef TARGET_VECTOR_MODE_SUPPORTED_P
344 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
346 #undef TARGET_MACHINE_DEPENDENT_REORG
347 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
349 #undef TARGET_INIT_BUILTINS
350 #define TARGET_INIT_BUILTINS arm_init_builtins
351 #undef TARGET_EXPAND_BUILTIN
352 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
354 #undef TARGET_INIT_LIBFUNCS
355 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
357 #undef TARGET_PROMOTE_FUNCTION_MODE
358 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
359 #undef TARGET_PROMOTE_PROTOTYPES
360 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
361 #undef TARGET_PASS_BY_REFERENCE
362 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
363 #undef TARGET_ARG_PARTIAL_BYTES
364 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
366 #undef TARGET_SETUP_INCOMING_VARARGS
367 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
369 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
370 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
372 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
373 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
374 #undef TARGET_TRAMPOLINE_INIT
375 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
376 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
377 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
379 #undef TARGET_DEFAULT_SHORT_ENUMS
380 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
382 #undef TARGET_ALIGN_ANON_BITFIELD
383 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
385 #undef TARGET_NARROW_VOLATILE_BITFIELD
386 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
388 #undef TARGET_CXX_GUARD_TYPE
389 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
391 #undef TARGET_CXX_GUARD_MASK_BIT
392 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
394 #undef TARGET_CXX_GET_COOKIE_SIZE
395 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
397 #undef TARGET_CXX_COOKIE_HAS_SIZE
398 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
400 #undef TARGET_CXX_CDTOR_RETURNS_THIS
401 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
403 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
404 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
406 #undef TARGET_CXX_USE_AEABI_ATEXIT
407 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
409 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
410 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
411 arm_cxx_determine_class_data_visibility
413 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
414 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
416 #undef TARGET_RETURN_IN_MSB
417 #define TARGET_RETURN_IN_MSB arm_return_in_msb
419 #undef TARGET_RETURN_IN_MEMORY
420 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
422 #undef TARGET_MUST_PASS_IN_STACK
423 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
425 #ifdef TARGET_UNWIND_INFO
426 #undef TARGET_UNWIND_EMIT
427 #define TARGET_UNWIND_EMIT arm_unwind_emit
429 /* EABI unwinding tables use a different format for the typeinfo tables. */
430 #undef TARGET_ASM_TTYPE
431 #define TARGET_ASM_TTYPE arm_output_ttype
433 #undef TARGET_ARM_EABI_UNWINDER
434 #define TARGET_ARM_EABI_UNWINDER true
435 #endif /* TARGET_UNWIND_INFO */
437 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
438 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
440 #undef TARGET_CANNOT_COPY_INSN_P
441 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
444 #undef TARGET_HAVE_TLS
445 #define TARGET_HAVE_TLS true
448 #undef TARGET_CANNOT_FORCE_CONST_MEM
449 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
451 #undef TARGET_MAX_ANCHOR_OFFSET
452 #define TARGET_MAX_ANCHOR_OFFSET 4095
454 /* The minimum is set such that the total size of the block
455 for a particular anchor is -4088 + 1 + 4095 bytes, which is
456 divisible by eight, ensuring natural spacing of anchors. */
457 #undef TARGET_MIN_ANCHOR_OFFSET
458 #define TARGET_MIN_ANCHOR_OFFSET -4088
460 #undef TARGET_SCHED_ISSUE_RATE
461 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
463 #undef TARGET_MANGLE_TYPE
464 #define TARGET_MANGLE_TYPE arm_mangle_type
466 #undef TARGET_BUILD_BUILTIN_VA_LIST
467 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
468 #undef TARGET_EXPAND_BUILTIN_VA_START
469 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
470 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
471 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
474 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
475 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
478 #undef TARGET_LEGITIMATE_ADDRESS_P
479 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
481 #undef TARGET_INVALID_PARAMETER_TYPE
482 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
484 #undef TARGET_INVALID_RETURN_TYPE
485 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
487 #undef TARGET_PROMOTED_TYPE
488 #define TARGET_PROMOTED_TYPE arm_promoted_type
490 #undef TARGET_CONVERT_TO_TYPE
491 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
493 #undef TARGET_SCALAR_MODE_SUPPORTED_P
494 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
496 #undef TARGET_FRAME_POINTER_REQUIRED
497 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
499 #undef TARGET_CAN_ELIMINATE
500 #define TARGET_CAN_ELIMINATE arm_can_eliminate
502 struct gcc_target targetm = TARGET_INITIALIZER;
504 /* Obstack for minipool constant handling. */
505 static struct obstack minipool_obstack;
506 static char * minipool_startobj;
508 /* The maximum number of insns skipped which
509 will be conditionalised if possible. */
510 static int max_insns_skipped = 5;
512 extern FILE * asm_out_file;
514 /* True if we are currently building a constant table. */
515 int making_const_table;
517 /* The processor for which instructions should be scheduled. */
518 enum processor_type arm_tune = arm_none;
520 /* The default processor used if not overridden by commandline. */
521 static enum processor_type arm_default_cpu = arm_none;
523 /* Which floating point model to use. */
524 enum arm_fp_model arm_fp_model;
526 /* Which floating point hardware is available. */
527 enum fputype arm_fpu_arch;
529 /* Which floating point hardware to schedule for. */
530 enum fputype arm_fpu_tune;
532 /* Whether to use floating point hardware. */
533 enum float_abi_type arm_float_abi;
535 /* Which __fp16 format to use. */
536 enum arm_fp16_format_type arm_fp16_format;
538 /* Which ABI to use. */
539 enum arm_abi_type arm_abi;
541 /* Which thread pointer model to use. */
542 enum arm_tp_type target_thread_pointer = TP_AUTO;
544 /* Used to parse -mstructure_size_boundary command line option. */
545 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
547 /* Used for Thumb call_via trampolines. */
548 rtx thumb_call_via_label[14];
549 static int thumb_call_reg_needed;
551 /* Bit values used to identify processor capabilities. */
552 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
553 #define FL_ARCH3M (1 << 1) /* Extended multiply */
554 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
555 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
556 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
557 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
558 #define FL_THUMB (1 << 6) /* Thumb aware */
559 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
560 #define FL_STRONG (1 << 8) /* StrongARM */
561 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
562 #define FL_XSCALE (1 << 10) /* XScale */
563 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
564 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
565 media instructions. */
566 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
567 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
568 Note: ARM6 & 7 derivatives only. */
569 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
570 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
571 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
573 #define FL_DIV (1 << 18) /* Hardware divide. */
574 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
575 #define FL_NEON (1 << 20) /* Neon instructions. */
577 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
579 #define FL_FOR_ARCH2 FL_NOTM
580 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
581 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
582 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
583 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
584 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
585 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
586 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
587 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
588 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
589 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
590 #define FL_FOR_ARCH6J FL_FOR_ARCH6
591 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
592 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
593 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
594 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
595 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
596 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
597 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
598 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
599 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
601 /* The bits in this mask specify which
602 instructions we are allowed to generate. */
603 static unsigned long insn_flags = 0;
605 /* The bits in this mask specify which instruction scheduling options should
607 static unsigned long tune_flags = 0;
609 /* The following are used in the arm.md file as equivalents to bits
610 in the above two flag variables. */
612 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
615 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
618 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
621 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
624 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
627 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
630 /* Nonzero if this chip supports the ARM 6K extensions. */
633 /* Nonzero if instructions not present in the 'M' profile can be used. */
634 int arm_arch_notm = 0;
636 /* Nonzero if this chip can benefit from load scheduling. */
637 int arm_ld_sched = 0;
639 /* Nonzero if this chip is a StrongARM. */
640 int arm_tune_strongarm = 0;
642 /* Nonzero if this chip is a Cirrus variant. */
643 int arm_arch_cirrus = 0;
645 /* Nonzero if this chip supports Intel Wireless MMX technology. */
646 int arm_arch_iwmmxt = 0;
648 /* Nonzero if this chip is an XScale. */
649 int arm_arch_xscale = 0;
651 /* Nonzero if tuning for XScale */
652 int arm_tune_xscale = 0;
654 /* Nonzero if we want to tune for stores that access the write-buffer.
655 This typically means an ARM6 or ARM7 with MMU or MPU. */
656 int arm_tune_wbuf = 0;
658 /* Nonzero if tuning for Cortex-A9. */
659 int arm_tune_cortex_a9 = 0;
661 /* Nonzero if generating Thumb instructions. */
664 /* Nonzero if we should define __THUMB_INTERWORK__ in the
666 XXX This is a bit of a hack, it's intended to help work around
667 problems in GLD which doesn't understand that armv5t code is
668 interworking clean. */
669 int arm_cpp_interwork = 0;
671 /* Nonzero if chip supports Thumb 2. */
674 /* Nonzero if chip supports integer division instruction. */
677 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
678 must report the mode of the memory reference from PRINT_OPERAND to
679 PRINT_OPERAND_ADDRESS. */
680 enum machine_mode output_memory_reference_mode;
682 /* The register number to be used for the PIC offset register. */
683 unsigned arm_pic_register = INVALID_REGNUM;
685 /* Set to 1 after arm_reorg has started. Reset to start at the start of
686 the next function. */
687 static int after_arm_reorg = 0;
689 /* The maximum number of insns to be used when loading a constant. */
690 static int arm_constant_limit = 3;
692 static enum arm_pcs arm_pcs_default;
694 /* For an explanation of these variables, see final_prescan_insn below. */
696 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
697 enum arm_cond_code arm_current_cc;
699 int arm_target_label;
700 /* The number of conditionally executed insns, including the current insn. */
701 int arm_condexec_count = 0;
702 /* A bitmask specifying the patterns for the IT block.
703 Zero means do not output an IT block before this insn. */
704 int arm_condexec_mask = 0;
705 /* The number of bits used in arm_condexec_mask. */
706 int arm_condexec_masklen = 0;
708 /* The condition codes of the ARM, and the inverse function. */
709 static const char * const arm_condition_codes[] =
711 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
712 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
715 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
716 #define streq(string1, string2) (strcmp (string1, string2) == 0)
718 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
719 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
720 | (1 << PIC_OFFSET_TABLE_REGNUM)))
722 /* Initialization code. */
726 const char *const name;
727 enum processor_type core;
729 const unsigned long flags;
730 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
733 /* Not all of these give usefully different compilation alternatives,
734 but there is no simple way of generalizing them. */
735 static const struct processors all_cores[] =
738 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
739 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
740 #include "arm-cores.def"
742 {NULL, arm_none, NULL, 0, NULL}
745 static const struct processors all_architectures[] =
747 /* ARM Architectures */
748 /* We don't specify rtx_costs here as it will be figured out
751 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
752 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
753 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
754 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
755 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
756 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
757 implementations that support it, so we will leave it out for now. */
758 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
759 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
760 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
761 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
762 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
763 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
764 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
765 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
766 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
767 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
768 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
769 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
770 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
771 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
772 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
773 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
774 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
775 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
776 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
777 {NULL, arm_none, NULL, 0 , NULL}
780 struct arm_cpu_select
784 const struct processors * processors;
787 /* This is a magic structure. The 'string' field is magically filled in
788 with a pointer to the value specified by the user on the command line
789 assuming that the user has specified such a value. */
791 static struct arm_cpu_select arm_select[] =
793 /* string name processors */
794 { NULL, "-mcpu=", all_cores },
795 { NULL, "-march=", all_architectures },
796 { NULL, "-mtune=", all_cores }
799 /* Defines representing the indexes into the above table. */
800 #define ARM_OPT_SET_CPU 0
801 #define ARM_OPT_SET_ARCH 1
802 #define ARM_OPT_SET_TUNE 2
804 /* The name of the preprocessor macro to define for this architecture. */
806 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
815 /* Available values for -mfpu=. */
817 static const struct fpu_desc all_fpus[] =
819 {"fpa", FPUTYPE_FPA},
820 {"fpe2", FPUTYPE_FPA_EMU2},
821 {"fpe3", FPUTYPE_FPA_EMU2},
822 {"maverick", FPUTYPE_MAVERICK},
823 {"vfp", FPUTYPE_VFP},
824 {"vfp3", FPUTYPE_VFP3},
825 {"vfpv3", FPUTYPE_VFP3},
826 {"vfpv3-d16", FPUTYPE_VFP3D16},
827 {"neon", FPUTYPE_NEON},
828 {"neon-fp16", FPUTYPE_NEON_FP16}
832 /* Floating point models used by the different hardware.
833 See fputype in arm.h. */
835 static const enum arm_fp_model fp_model_for_fpu[] =
837 /* No FP hardware. */
838 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
839 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
840 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
841 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
842 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
843 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
844 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
845 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
846 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
847 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
854 enum float_abi_type abi_type;
858 /* Available values for -mfloat-abi=. */
860 static const struct float_abi all_float_abis[] =
862 {"soft", ARM_FLOAT_ABI_SOFT},
863 {"softfp", ARM_FLOAT_ABI_SOFTFP},
864 {"hard", ARM_FLOAT_ABI_HARD}
871 enum arm_fp16_format_type fp16_format_type;
875 /* Available values for -mfp16-format=. */
877 static const struct fp16_format all_fp16_formats[] =
879 {"none", ARM_FP16_FORMAT_NONE},
880 {"ieee", ARM_FP16_FORMAT_IEEE},
881 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
888 enum arm_abi_type abi_type;
892 /* Available values for -mabi=. */
894 static const struct abi_name arm_all_abis[] =
896 {"apcs-gnu", ARM_ABI_APCS},
897 {"atpcs", ARM_ABI_ATPCS},
898 {"aapcs", ARM_ABI_AAPCS},
899 {"iwmmxt", ARM_ABI_IWMMXT},
900 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
903 /* Supported TLS relocations. */
913 /* Emit an insn that's a simple single-set. Both the operands must be known
916 emit_set_insn (rtx x, rtx y)
918 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
921 /* Return the number of bits set in VALUE. */
923 bit_count (unsigned long value)
925 unsigned long count = 0;
930 value &= value - 1; /* Clear the least-significant set bit. */
936 /* Set up library functions unique to ARM. */
939 arm_init_libfuncs (void)
941 /* There are no special library functions unless we are using the
946 /* The functions below are described in Section 4 of the "Run-Time
947 ABI for the ARM architecture", Version 1.0. */
949 /* Double-precision floating-point arithmetic. Table 2. */
950 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
951 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
952 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
953 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
954 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
956 /* Double-precision comparisons. Table 3. */
957 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
958 set_optab_libfunc (ne_optab, DFmode, NULL);
959 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
960 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
961 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
962 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
963 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
965 /* Single-precision floating-point arithmetic. Table 4. */
966 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
967 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
968 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
969 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
970 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
972 /* Single-precision comparisons. Table 5. */
973 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
974 set_optab_libfunc (ne_optab, SFmode, NULL);
975 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
976 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
977 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
978 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
979 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
981 /* Floating-point to integer conversions. Table 6. */
982 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
983 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
984 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
985 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
986 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
987 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
988 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
989 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
991 /* Conversions between floating types. Table 7. */
992 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
993 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
995 /* Integer to floating-point conversions. Table 8. */
996 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
997 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
998 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
999 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1000 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1001 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1002 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1003 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1005 /* Long long. Table 9. */
1006 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1007 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1008 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1009 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1010 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1011 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1012 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1013 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1015 /* Integer (32/32->32) division. \S 4.3.1. */
1016 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1017 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1019 /* The divmod functions are designed so that they can be used for
1020 plain division, even though they return both the quotient and the
1021 remainder. The quotient is returned in the usual location (i.e.,
1022 r0 for SImode, {r0, r1} for DImode), just as would be expected
1023 for an ordinary division routine. Because the AAPCS calling
1024 conventions specify that all of { r0, r1, r2, r3 } are
1025 callee-saved registers, there is no need to tell the compiler
1026 explicitly that those registers are clobbered by these
1028 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1029 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1031 /* For SImode division the ABI provides div-without-mod routines,
1032 which are faster. */
1033 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1034 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1036 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1037 divmod libcalls instead. */
1038 set_optab_libfunc (smod_optab, DImode, NULL);
1039 set_optab_libfunc (umod_optab, DImode, NULL);
1040 set_optab_libfunc (smod_optab, SImode, NULL);
1041 set_optab_libfunc (umod_optab, SImode, NULL);
1043 /* Half-precision float operations. The compiler handles all operations
1044 with NULL libfuncs by converting the SFmode. */
1045 switch (arm_fp16_format)
1047 case ARM_FP16_FORMAT_IEEE:
1048 case ARM_FP16_FORMAT_ALTERNATIVE:
1051 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1052 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1054 : "__gnu_f2h_alternative"));
1055 set_conv_libfunc (sext_optab, SFmode, HFmode,
1056 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1058 : "__gnu_h2f_alternative"));
1061 set_optab_libfunc (add_optab, HFmode, NULL);
1062 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1063 set_optab_libfunc (smul_optab, HFmode, NULL);
1064 set_optab_libfunc (neg_optab, HFmode, NULL);
1065 set_optab_libfunc (sub_optab, HFmode, NULL);
1068 set_optab_libfunc (eq_optab, HFmode, NULL);
1069 set_optab_libfunc (ne_optab, HFmode, NULL);
1070 set_optab_libfunc (lt_optab, HFmode, NULL);
1071 set_optab_libfunc (le_optab, HFmode, NULL);
1072 set_optab_libfunc (ge_optab, HFmode, NULL);
1073 set_optab_libfunc (gt_optab, HFmode, NULL);
1074 set_optab_libfunc (unord_optab, HFmode, NULL);
1081 if (TARGET_AAPCS_BASED)
1082 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1085 /* On AAPCS systems, this is the "struct __va_list". */
1086 static GTY(()) tree va_list_type;
1088 /* Return the type to use as __builtin_va_list. */
1090 arm_build_builtin_va_list (void)
1095 if (!TARGET_AAPCS_BASED)
1096 return std_build_builtin_va_list ();
1098 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1106 The C Library ABI further reinforces this definition in \S
1109 We must follow this definition exactly. The structure tag
1110 name is visible in C++ mangled names, and thus forms a part
1111 of the ABI. The field name may be used by people who
1112 #include <stdarg.h>. */
1113 /* Create the type. */
1114 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1115 /* Give it the required name. */
1116 va_list_name = build_decl (BUILTINS_LOCATION,
1118 get_identifier ("__va_list"),
1120 DECL_ARTIFICIAL (va_list_name) = 1;
1121 TYPE_NAME (va_list_type) = va_list_name;
1122 /* Create the __ap field. */
1123 ap_field = build_decl (BUILTINS_LOCATION,
1125 get_identifier ("__ap"),
1127 DECL_ARTIFICIAL (ap_field) = 1;
1128 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1129 TYPE_FIELDS (va_list_type) = ap_field;
1130 /* Compute its layout. */
1131 layout_type (va_list_type);
1133 return va_list_type;
1136 /* Return an expression of type "void *" pointing to the next
1137 available argument in a variable-argument list. VALIST is the
1138 user-level va_list object, of type __builtin_va_list. */
1140 arm_extract_valist_ptr (tree valist)
1142 if (TREE_TYPE (valist) == error_mark_node)
1143 return error_mark_node;
1145 /* On an AAPCS target, the pointer is stored within "struct
1147 if (TARGET_AAPCS_BASED)
1149 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1150 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1151 valist, ap_field, NULL_TREE);
1157 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1159 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1161 valist = arm_extract_valist_ptr (valist);
1162 std_expand_builtin_va_start (valist, nextarg);
1165 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1167 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1170 valist = arm_extract_valist_ptr (valist);
1171 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1174 /* Implement TARGET_HANDLE_OPTION. */
1177 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1182 arm_select[1].string = arg;
1186 arm_select[0].string = arg;
1189 case OPT_mhard_float:
1190 target_float_abi_name = "hard";
1193 case OPT_msoft_float:
1194 target_float_abi_name = "soft";
1198 arm_select[2].string = arg;
1207 arm_target_help (void)
1210 static int columns = 0;
1213 /* If we have not done so already, obtain the desired maximum width of
1214 the output. Note - this is a duplication of the code at the start of
1215 gcc/opts.c:print_specific_help() - the two copies should probably be
1216 replaced by a single function. */
1221 GET_ENVIRONMENT (p, "COLUMNS");
1224 int value = atoi (p);
1231 /* Use a reasonable default. */
1235 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1237 /* The - 2 is because we know that the last entry in the array is NULL. */
1238 i = ARRAY_SIZE (all_cores) - 2;
1240 printf (" %s", all_cores[i].name);
1241 remaining = columns - (strlen (all_cores[i].name) + 4);
1242 gcc_assert (remaining >= 0);
1246 int len = strlen (all_cores[i].name);
1248 if (remaining > len + 2)
1250 printf (", %s", all_cores[i].name);
1251 remaining -= len + 2;
1257 printf ("\n %s", all_cores[i].name);
1258 remaining = columns - (len + 4);
1262 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1264 i = ARRAY_SIZE (all_architectures) - 2;
1267 printf (" %s", all_architectures[i].name);
1268 remaining = columns - (strlen (all_architectures[i].name) + 4);
1269 gcc_assert (remaining >= 0);
1273 int len = strlen (all_architectures[i].name);
1275 if (remaining > len + 2)
1277 printf (", %s", all_architectures[i].name);
1278 remaining -= len + 2;
1284 printf ("\n %s", all_architectures[i].name);
1285 remaining = columns - (len + 4);
1292 /* Fix up any incompatible options that the user has specified.
1293 This has now turned into a maze. */
1295 arm_override_options (void)
1298 enum processor_type target_arch_cpu = arm_none;
1299 enum processor_type selected_cpu = arm_none;
1301 /* Set up the flags based on the cpu/architecture selected by the user. */
1302 for (i = ARRAY_SIZE (arm_select); i--;)
1304 struct arm_cpu_select * ptr = arm_select + i;
1306 if (ptr->string != NULL && ptr->string[0] != '\0')
1308 const struct processors * sel;
1310 for (sel = ptr->processors; sel->name != NULL; sel++)
1311 if (streq (ptr->string, sel->name))
1313 /* Set the architecture define. */
1314 if (i != ARM_OPT_SET_TUNE)
1315 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1317 /* Determine the processor core for which we should
1318 tune code-generation. */
1319 if (/* -mcpu= is a sensible default. */
1320 i == ARM_OPT_SET_CPU
1321 /* -mtune= overrides -mcpu= and -march=. */
1322 || i == ARM_OPT_SET_TUNE)
1323 arm_tune = (enum processor_type) (sel - ptr->processors);
1325 /* Remember the CPU associated with this architecture.
1326 If no other option is used to set the CPU type,
1327 we'll use this to guess the most suitable tuning
1329 if (i == ARM_OPT_SET_ARCH)
1330 target_arch_cpu = sel->core;
1332 if (i == ARM_OPT_SET_CPU)
1333 selected_cpu = (enum processor_type) (sel - ptr->processors);
1335 if (i != ARM_OPT_SET_TUNE)
1337 /* If we have been given an architecture and a processor
1338 make sure that they are compatible. We only generate
1339 a warning though, and we prefer the CPU over the
1341 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1342 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1345 insn_flags = sel->flags;
1351 if (sel->name == NULL)
1352 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1356 /* Guess the tuning options from the architecture if necessary. */
1357 if (arm_tune == arm_none)
1358 arm_tune = target_arch_cpu;
1360 /* If the user did not specify a processor, choose one for them. */
1361 if (insn_flags == 0)
1363 const struct processors * sel;
1364 unsigned int sought;
1366 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1367 if (selected_cpu == arm_none)
1369 #ifdef SUBTARGET_CPU_DEFAULT
1370 /* Use the subtarget default CPU if none was specified by
1372 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1374 /* Default to ARM6. */
1375 if (selected_cpu == arm_none)
1376 selected_cpu = arm6;
1378 sel = &all_cores[selected_cpu];
1380 insn_flags = sel->flags;
1382 /* Now check to see if the user has specified some command line
1383 switch that require certain abilities from the cpu. */
1386 if (TARGET_INTERWORK || TARGET_THUMB)
1388 sought |= (FL_THUMB | FL_MODE32);
1390 /* There are no ARM processors that support both APCS-26 and
1391 interworking. Therefore we force FL_MODE26 to be removed
1392 from insn_flags here (if it was set), so that the search
1393 below will always be able to find a compatible processor. */
1394 insn_flags &= ~FL_MODE26;
1397 if (sought != 0 && ((sought & insn_flags) != sought))
1399 /* Try to locate a CPU type that supports all of the abilities
1400 of the default CPU, plus the extra abilities requested by
1402 for (sel = all_cores; sel->name != NULL; sel++)
1403 if ((sel->flags & sought) == (sought | insn_flags))
1406 if (sel->name == NULL)
1408 unsigned current_bit_count = 0;
1409 const struct processors * best_fit = NULL;
1411 /* Ideally we would like to issue an error message here
1412 saying that it was not possible to find a CPU compatible
1413 with the default CPU, but which also supports the command
1414 line options specified by the programmer, and so they
1415 ought to use the -mcpu=<name> command line option to
1416 override the default CPU type.
1418 If we cannot find a cpu that has both the
1419 characteristics of the default cpu and the given
1420 command line options we scan the array again looking
1421 for a best match. */
1422 for (sel = all_cores; sel->name != NULL; sel++)
1423 if ((sel->flags & sought) == sought)
1427 count = bit_count (sel->flags & insn_flags);
1429 if (count >= current_bit_count)
1432 current_bit_count = count;
1436 gcc_assert (best_fit);
1440 insn_flags = sel->flags;
1442 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1443 arm_default_cpu = (enum processor_type) (sel - all_cores);
1444 if (arm_tune == arm_none)
1445 arm_tune = arm_default_cpu;
1448 /* The processor for which we should tune should now have been
1450 gcc_assert (arm_tune != arm_none);
1452 tune_flags = all_cores[(int)arm_tune].flags;
1454 if (target_fp16_format_name)
1456 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1458 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1460 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1464 if (i == ARRAY_SIZE (all_fp16_formats))
1465 error ("invalid __fp16 format option: -mfp16-format=%s",
1466 target_fp16_format_name);
1469 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1471 if (target_abi_name)
1473 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1475 if (streq (arm_all_abis[i].name, target_abi_name))
1477 arm_abi = arm_all_abis[i].abi_type;
1481 if (i == ARRAY_SIZE (arm_all_abis))
1482 error ("invalid ABI option: -mabi=%s", target_abi_name);
1485 arm_abi = ARM_DEFAULT_ABI;
1487 /* Make sure that the processor choice does not conflict with any of the
1488 other command line choices. */
1489 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1490 error ("target CPU does not support ARM mode");
1492 /* BPABI targets use linker tricks to allow interworking on cores
1493 without thumb support. */
1494 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1496 warning (0, "target CPU does not support interworking" );
1497 target_flags &= ~MASK_INTERWORK;
1500 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1502 warning (0, "target CPU does not support THUMB instructions");
1503 target_flags &= ~MASK_THUMB;
1506 if (TARGET_APCS_FRAME && TARGET_THUMB)
1508 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1509 target_flags &= ~MASK_APCS_FRAME;
1512 /* Callee super interworking implies thumb interworking. Adding
1513 this to the flags here simplifies the logic elsewhere. */
1514 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1515 target_flags |= MASK_INTERWORK;
1517 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1518 from here where no function is being compiled currently. */
1519 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1520 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1522 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1523 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1525 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1526 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1528 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1530 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1531 target_flags |= MASK_APCS_FRAME;
1534 if (TARGET_POKE_FUNCTION_NAME)
1535 target_flags |= MASK_APCS_FRAME;
1537 if (TARGET_APCS_REENT && flag_pic)
1538 error ("-fpic and -mapcs-reent are incompatible");
1540 if (TARGET_APCS_REENT)
1541 warning (0, "APCS reentrant code not supported. Ignored");
1543 /* If this target is normally configured to use APCS frames, warn if they
1544 are turned off and debugging is turned on. */
1546 && write_symbols != NO_DEBUG
1547 && !TARGET_APCS_FRAME
1548 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1549 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1551 if (TARGET_APCS_FLOAT)
1552 warning (0, "passing floating point arguments in fp regs not yet supported");
1554 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1555 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1556 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1557 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1558 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1559 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1560 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1561 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1562 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1563 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1564 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1565 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1567 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1568 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1569 thumb_code = (TARGET_ARM == 0);
1570 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1571 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1572 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1573 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1574 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1576 /* If we are not using the default (ARM mode) section anchor offset
1577 ranges, then set the correct ranges now. */
1580 /* Thumb-1 LDR instructions cannot have negative offsets.
1581 Permissible positive offset ranges are 5-bit (for byte loads),
1582 6-bit (for halfword loads), or 7-bit (for word loads).
1583 Empirical results suggest a 7-bit anchor range gives the best
1584 overall code size. */
1585 targetm.min_anchor_offset = 0;
1586 targetm.max_anchor_offset = 127;
1588 else if (TARGET_THUMB2)
1590 /* The minimum is set such that the total size of the block
1591 for a particular anchor is 248 + 1 + 4095 bytes, which is
1592 divisible by eight, ensuring natural spacing of anchors. */
1593 targetm.min_anchor_offset = -248;
1594 targetm.max_anchor_offset = 4095;
1597 /* V5 code we generate is completely interworking capable, so we turn off
1598 TARGET_INTERWORK here to avoid many tests later on. */
1600 /* XXX However, we must pass the right pre-processor defines to CPP
1601 or GLD can get confused. This is a hack. */
1602 if (TARGET_INTERWORK)
1603 arm_cpp_interwork = 1;
1606 target_flags &= ~MASK_INTERWORK;
1608 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1609 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1611 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1612 error ("iwmmxt abi requires an iwmmxt capable cpu");
1614 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1615 if (target_fpu_name == NULL && target_fpe_name != NULL)
1617 if (streq (target_fpe_name, "2"))
1618 target_fpu_name = "fpe2";
1619 else if (streq (target_fpe_name, "3"))
1620 target_fpu_name = "fpe3";
1622 error ("invalid floating point emulation option: -mfpe=%s",
1625 if (target_fpu_name != NULL)
1627 /* The user specified a FPU. */
1628 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1630 if (streq (all_fpus[i].name, target_fpu_name))
1632 arm_fpu_arch = all_fpus[i].fpu;
1633 arm_fpu_tune = arm_fpu_arch;
1634 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1638 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1639 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1643 #ifdef FPUTYPE_DEFAULT
1644 /* Use the default if it is specified for this platform. */
1645 arm_fpu_arch = FPUTYPE_DEFAULT;
1646 arm_fpu_tune = FPUTYPE_DEFAULT;
1648 /* Pick one based on CPU type. */
1649 /* ??? Some targets assume FPA is the default.
1650 if ((insn_flags & FL_VFP) != 0)
1651 arm_fpu_arch = FPUTYPE_VFP;
1654 if (arm_arch_cirrus)
1655 arm_fpu_arch = FPUTYPE_MAVERICK;
1657 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1659 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1660 arm_fpu_tune = FPUTYPE_FPA;
1662 arm_fpu_tune = arm_fpu_arch;
1663 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1664 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1667 if (target_float_abi_name != NULL)
1669 /* The user specified a FP ABI. */
1670 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1672 if (streq (all_float_abis[i].name, target_float_abi_name))
1674 arm_float_abi = all_float_abis[i].abi_type;
1678 if (i == ARRAY_SIZE (all_float_abis))
1679 error ("invalid floating point abi: -mfloat-abi=%s",
1680 target_float_abi_name);
1683 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1685 if (TARGET_AAPCS_BASED
1686 && (arm_fp_model == ARM_FP_MODEL_FPA))
1687 error ("FPA is unsupported in the AAPCS");
1689 if (TARGET_AAPCS_BASED)
1691 if (TARGET_CALLER_INTERWORKING)
1692 error ("AAPCS does not support -mcaller-super-interworking");
1694 if (TARGET_CALLEE_INTERWORKING)
1695 error ("AAPCS does not support -mcallee-super-interworking");
1698 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1699 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1700 will ever exist. GCC makes no attempt to support this combination. */
1701 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1702 sorry ("iWMMXt and hardware floating point");
1704 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1705 if (TARGET_THUMB2 && TARGET_IWMMXT)
1706 sorry ("Thumb-2 iWMMXt");
1708 /* __fp16 support currently assumes the core has ldrh. */
1709 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1710 sorry ("__fp16 and no ldrh");
1712 /* If soft-float is specified then don't use FPU. */
1713 if (TARGET_SOFT_FLOAT)
1714 arm_fpu_arch = FPUTYPE_NONE;
1716 if (TARGET_AAPCS_BASED)
1718 if (arm_abi == ARM_ABI_IWMMXT)
1719 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1720 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1721 && TARGET_HARD_FLOAT
1723 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1725 arm_pcs_default = ARM_PCS_AAPCS;
1729 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1730 sorry ("-mfloat-abi=hard and VFP");
1732 if (arm_abi == ARM_ABI_APCS)
1733 arm_pcs_default = ARM_PCS_APCS;
1735 arm_pcs_default = ARM_PCS_ATPCS;
1738 /* For arm2/3 there is no need to do any scheduling if there is only
1739 a floating point emulator, or we are doing software floating-point. */
1740 if ((TARGET_SOFT_FLOAT
1741 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1742 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1743 && (tune_flags & FL_MODE32) == 0)
1744 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1746 if (target_thread_switch)
1748 if (strcmp (target_thread_switch, "soft") == 0)
1749 target_thread_pointer = TP_SOFT;
1750 else if (strcmp (target_thread_switch, "auto") == 0)
1751 target_thread_pointer = TP_AUTO;
1752 else if (strcmp (target_thread_switch, "cp15") == 0)
1753 target_thread_pointer = TP_CP15;
1755 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1758 /* Use the cp15 method if it is available. */
1759 if (target_thread_pointer == TP_AUTO)
1761 if (arm_arch6k && !TARGET_THUMB)
1762 target_thread_pointer = TP_CP15;
1764 target_thread_pointer = TP_SOFT;
1767 if (TARGET_HARD_TP && TARGET_THUMB1)
1768 error ("can not use -mtp=cp15 with 16-bit Thumb");
1770 /* Override the default structure alignment for AAPCS ABI. */
1771 if (TARGET_AAPCS_BASED)
1772 arm_structure_size_boundary = 8;
1774 if (structure_size_string != NULL)
1776 int size = strtol (structure_size_string, NULL, 0);
1778 if (size == 8 || size == 32
1779 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1780 arm_structure_size_boundary = size;
1782 warning (0, "structure size boundary can only be set to %s",
1783 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1786 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1788 error ("RTP PIC is incompatible with Thumb");
1792 /* If stack checking is disabled, we can use r10 as the PIC register,
1793 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1794 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1796 if (TARGET_VXWORKS_RTP)
1797 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1798 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1801 if (flag_pic && TARGET_VXWORKS_RTP)
1802 arm_pic_register = 9;
1804 if (arm_pic_register_string != NULL)
1806 int pic_register = decode_reg_name (arm_pic_register_string);
1809 warning (0, "-mpic-register= is useless without -fpic");
1811 /* Prevent the user from choosing an obviously stupid PIC register. */
1812 else if (pic_register < 0 || call_used_regs[pic_register]
1813 || pic_register == HARD_FRAME_POINTER_REGNUM
1814 || pic_register == STACK_POINTER_REGNUM
1815 || pic_register >= PC_REGNUM
1816 || (TARGET_VXWORKS_RTP
1817 && (unsigned int) pic_register != arm_pic_register))
1818 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1820 arm_pic_register = pic_register;
1823 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1824 if (fix_cm3_ldrd == 2)
1826 if (selected_cpu == cortexm3)
1832 /* ??? We might want scheduling for thumb2. */
1833 if (TARGET_THUMB && flag_schedule_insns)
1835 /* Don't warn since it's on by default in -O2. */
1836 flag_schedule_insns = 0;
1841 arm_constant_limit = 1;
1843 /* If optimizing for size, bump the number of instructions that we
1844 are prepared to conditionally execute (even on a StrongARM). */
1845 max_insns_skipped = 6;
1849 /* For processors with load scheduling, it never costs more than
1850 2 cycles to load a constant, and the load scheduler may well
1851 reduce that to 1. */
1853 arm_constant_limit = 1;
1855 /* On XScale the longer latency of a load makes it more difficult
1856 to achieve a good schedule, so it's faster to synthesize
1857 constants that can be done in two insns. */
1858 if (arm_tune_xscale)
1859 arm_constant_limit = 2;
1861 /* StrongARM has early execution of branches, so a sequence
1862 that is worth skipping is shorter. */
1863 if (arm_tune_strongarm)
1864 max_insns_skipped = 3;
1867 /* Register global variables with the garbage collector. */
1868 arm_add_gc_roots ();
1872 arm_add_gc_roots (void)
1874 gcc_obstack_init(&minipool_obstack);
1875 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1878 /* A table of known ARM exception types.
1879 For use with the interrupt function attribute. */
1883 const char *const arg;
1884 const unsigned long return_value;
1888 static const isr_attribute_arg isr_attribute_args [] =
1890 { "IRQ", ARM_FT_ISR },
1891 { "irq", ARM_FT_ISR },
1892 { "FIQ", ARM_FT_FIQ },
1893 { "fiq", ARM_FT_FIQ },
1894 { "ABORT", ARM_FT_ISR },
1895 { "abort", ARM_FT_ISR },
1896 { "ABORT", ARM_FT_ISR },
1897 { "abort", ARM_FT_ISR },
1898 { "UNDEF", ARM_FT_EXCEPTION },
1899 { "undef", ARM_FT_EXCEPTION },
1900 { "SWI", ARM_FT_EXCEPTION },
1901 { "swi", ARM_FT_EXCEPTION },
1902 { NULL, ARM_FT_NORMAL }
1905 /* Returns the (interrupt) function type of the current
1906 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1908 static unsigned long
1909 arm_isr_value (tree argument)
1911 const isr_attribute_arg * ptr;
1915 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1917 /* No argument - default to IRQ. */
1918 if (argument == NULL_TREE)
1921 /* Get the value of the argument. */
1922 if (TREE_VALUE (argument) == NULL_TREE
1923 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1924 return ARM_FT_UNKNOWN;
1926 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1928 /* Check it against the list of known arguments. */
1929 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1930 if (streq (arg, ptr->arg))
1931 return ptr->return_value;
1933 /* An unrecognized interrupt type. */
1934 return ARM_FT_UNKNOWN;
1937 /* Computes the type of the current function. */
1939 static unsigned long
1940 arm_compute_func_type (void)
1942 unsigned long type = ARM_FT_UNKNOWN;
1946 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1948 /* Decide if the current function is volatile. Such functions
1949 never return, and many memory cycles can be saved by not storing
1950 register values that will never be needed again. This optimization
1951 was added to speed up context switching in a kernel application. */
1953 && (TREE_NOTHROW (current_function_decl)
1954 || !(flag_unwind_tables
1955 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1956 && TREE_THIS_VOLATILE (current_function_decl))
1957 type |= ARM_FT_VOLATILE;
1959 if (cfun->static_chain_decl != NULL)
1960 type |= ARM_FT_NESTED;
1962 attr = DECL_ATTRIBUTES (current_function_decl);
1964 a = lookup_attribute ("naked", attr);
1966 type |= ARM_FT_NAKED;
1968 a = lookup_attribute ("isr", attr);
1970 a = lookup_attribute ("interrupt", attr);
1973 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1975 type |= arm_isr_value (TREE_VALUE (a));
1980 /* Returns the type of the current function. */
1983 arm_current_func_type (void)
1985 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1986 cfun->machine->func_type = arm_compute_func_type ();
1988 return cfun->machine->func_type;
1992 arm_allocate_stack_slots_for_args (void)
1994 /* Naked functions should not allocate stack slots for arguments. */
1995 return !IS_NAKED (arm_current_func_type ());
1999 /* Output assembler code for a block containing the constant parts
2000 of a trampoline, leaving space for the variable parts.
2002 On the ARM, (if r8 is the static chain regnum, and remembering that
2003 referencing pc adds an offset of 8) the trampoline looks like:
2006 .word static chain value
2007 .word function's address
2008 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2011 arm_asm_trampoline_template (FILE *f)
2015 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2016 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2018 else if (TARGET_THUMB2)
2020 /* The Thumb-2 trampoline is similar to the arm implementation.
2021 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2022 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2023 STATIC_CHAIN_REGNUM, PC_REGNUM);
2024 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2028 ASM_OUTPUT_ALIGN (f, 2);
2029 fprintf (f, "\t.code\t16\n");
2030 fprintf (f, ".Ltrampoline_start:\n");
2031 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2032 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2033 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2034 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2035 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2036 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2038 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2039 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2042 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2045 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2047 rtx fnaddr, mem, a_tramp;
2049 emit_block_move (m_tramp, assemble_trampoline_template (),
2050 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2052 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2053 emit_move_insn (mem, chain_value);
2055 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2056 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2057 emit_move_insn (mem, fnaddr);
2059 a_tramp = XEXP (m_tramp, 0);
2060 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2061 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2062 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2065 /* Thumb trampolines should be entered in thumb mode, so set
2066 the bottom bit of the address. */
2069 arm_trampoline_adjust_address (rtx addr)
2072 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2073 NULL, 0, OPTAB_LIB_WIDEN);
2077 /* Return 1 if it is possible to return using a single instruction.
2078 If SIBLING is non-null, this is a test for a return before a sibling
2079 call. SIBLING is the call insn, so we can examine its register usage. */
2082 use_return_insn (int iscond, rtx sibling)
2085 unsigned int func_type;
2086 unsigned long saved_int_regs;
2087 unsigned HOST_WIDE_INT stack_adjust;
2088 arm_stack_offsets *offsets;
2090 /* Never use a return instruction before reload has run. */
2091 if (!reload_completed)
2094 func_type = arm_current_func_type ();
2096 /* Naked, volatile and stack alignment functions need special
2098 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2101 /* So do interrupt functions that use the frame pointer and Thumb
2102 interrupt functions. */
2103 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2106 offsets = arm_get_frame_offsets ();
2107 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2109 /* As do variadic functions. */
2110 if (crtl->args.pretend_args_size
2111 || cfun->machine->uses_anonymous_args
2112 /* Or if the function calls __builtin_eh_return () */
2113 || crtl->calls_eh_return
2114 /* Or if the function calls alloca */
2115 || cfun->calls_alloca
2116 /* Or if there is a stack adjustment. However, if the stack pointer
2117 is saved on the stack, we can use a pre-incrementing stack load. */
2118 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2119 && stack_adjust == 4)))
2122 saved_int_regs = offsets->saved_regs_mask;
2124 /* Unfortunately, the insn
2126 ldmib sp, {..., sp, ...}
2128 triggers a bug on most SA-110 based devices, such that the stack
2129 pointer won't be correctly restored if the instruction takes a
2130 page fault. We work around this problem by popping r3 along with
2131 the other registers, since that is never slower than executing
2132 another instruction.
2134 We test for !arm_arch5 here, because code for any architecture
2135 less than this could potentially be run on one of the buggy
2137 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2139 /* Validate that r3 is a call-clobbered register (always true in
2140 the default abi) ... */
2141 if (!call_used_regs[3])
2144 /* ... that it isn't being used for a return value ... */
2145 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2148 /* ... or for a tail-call argument ... */
2151 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2153 if (find_regno_fusage (sibling, USE, 3))
2157 /* ... and that there are no call-saved registers in r0-r2
2158 (always true in the default ABI). */
2159 if (saved_int_regs & 0x7)
2163 /* Can't be done if interworking with Thumb, and any registers have been
2165 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2168 /* On StrongARM, conditional returns are expensive if they aren't
2169 taken and multiple registers have been stacked. */
2170 if (iscond && arm_tune_strongarm)
2172 /* Conditional return when just the LR is stored is a simple
2173 conditional-load instruction, that's not expensive. */
2174 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2178 && arm_pic_register != INVALID_REGNUM
2179 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2183 /* If there are saved registers but the LR isn't saved, then we need
2184 two instructions for the return. */
2185 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2188 /* Can't be done if any of the FPA regs are pushed,
2189 since this also requires an insn. */
2190 if (TARGET_HARD_FLOAT && TARGET_FPA)
2191 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2192 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2195 /* Likewise VFP regs. */
2196 if (TARGET_HARD_FLOAT && TARGET_VFP)
2197 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2198 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2201 if (TARGET_REALLY_IWMMXT)
2202 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2203 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2209 /* Return TRUE if int I is a valid immediate ARM constant. */
2212 const_ok_for_arm (HOST_WIDE_INT i)
2216 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2217 be all zero, or all one. */
2218 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2219 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2220 != ((~(unsigned HOST_WIDE_INT) 0)
2221 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2224 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2226 /* Fast return for 0 and small values. We must do this for zero, since
2227 the code below can't handle that one case. */
2228 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2231 /* Get the number of trailing zeros. */
2232 lowbit = ffs((int) i) - 1;
2234 /* Only even shifts are allowed in ARM mode so round down to the
2235 nearest even number. */
2239 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2244 /* Allow rotated constants in ARM mode. */
2246 && ((i & ~0xc000003f) == 0
2247 || (i & ~0xf000000f) == 0
2248 || (i & ~0xfc000003) == 0))
2255 /* Allow repeated pattern. */
2258 if (i == v || i == (v | (v << 8)))
2265 /* Return true if I is a valid constant for the operation CODE. */
2267 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2269 if (const_ok_for_arm (i))
2293 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2295 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2301 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2305 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2312 /* Emit a sequence of insns to handle a large constant.
2313 CODE is the code of the operation required, it can be any of SET, PLUS,
2314 IOR, AND, XOR, MINUS;
2315 MODE is the mode in which the operation is being performed;
2316 VAL is the integer to operate on;
2317 SOURCE is the other operand (a register, or a null-pointer for SET);
2318 SUBTARGETS means it is safe to create scratch registers if that will
2319 either produce a simpler sequence, or we will want to cse the values.
2320 Return value is the number of insns emitted. */
2322 /* ??? Tweak this for thumb2. */
2324 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2325 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2329 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2330 cond = COND_EXEC_TEST (PATTERN (insn));
2334 if (subtargets || code == SET
2335 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2336 && REGNO (target) != REGNO (source)))
2338 /* After arm_reorg has been called, we can't fix up expensive
2339 constants by pushing them into memory so we must synthesize
2340 them in-line, regardless of the cost. This is only likely to
2341 be more costly on chips that have load delay slots and we are
2342 compiling without running the scheduler (so no splitting
2343 occurred before the final instruction emission).
2345 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2347 if (!after_arm_reorg
2349 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2351 > arm_constant_limit + (code != SET)))
2355 /* Currently SET is the only monadic value for CODE, all
2356 the rest are diadic. */
2357 if (TARGET_USE_MOVT)
2358 arm_emit_movpair (target, GEN_INT (val));
2360 emit_set_insn (target, GEN_INT (val));
2366 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2368 if (TARGET_USE_MOVT)
2369 arm_emit_movpair (temp, GEN_INT (val));
2371 emit_set_insn (temp, GEN_INT (val));
2373 /* For MINUS, the value is subtracted from, since we never
2374 have subtraction of a constant. */
2376 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2378 emit_set_insn (target,
2379 gen_rtx_fmt_ee (code, mode, source, temp));
2385 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2389 /* Return the number of ARM instructions required to synthesize the given
2392 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2394 HOST_WIDE_INT temp1;
2402 if (remainder & (3 << (i - 2)))
2407 temp1 = remainder & ((0x0ff << end)
2408 | ((i < end) ? (0xff >> (32 - end)) : 0));
2409 remainder &= ~temp1;
2414 } while (remainder);
2418 /* Emit an instruction with the indicated PATTERN. If COND is
2419 non-NULL, conditionalize the execution of the instruction on COND
2423 emit_constant_insn (rtx cond, rtx pattern)
2426 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2427 emit_insn (pattern);
2430 /* As above, but extra parameter GENERATE which, if clear, suppresses
2432 /* ??? This needs more work for thumb2. */
2435 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2436 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2441 int can_negate_initial = 0;
2444 int num_bits_set = 0;
2445 int set_sign_bit_copies = 0;
2446 int clear_sign_bit_copies = 0;
2447 int clear_zero_bit_copies = 0;
2448 int set_zero_bit_copies = 0;
2450 unsigned HOST_WIDE_INT temp1, temp2;
2451 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2453 /* Find out which operations are safe for a given CODE. Also do a quick
2454 check for degenerate cases; these can occur when DImode operations
2466 can_negate_initial = 1;
2470 if (remainder == 0xffffffff)
2473 emit_constant_insn (cond,
2474 gen_rtx_SET (VOIDmode, target,
2475 GEN_INT (ARM_SIGN_EXTEND (val))));
2481 if (reload_completed && rtx_equal_p (target, source))
2485 emit_constant_insn (cond,
2486 gen_rtx_SET (VOIDmode, target, source));
2498 emit_constant_insn (cond,
2499 gen_rtx_SET (VOIDmode, target, const0_rtx));
2502 if (remainder == 0xffffffff)
2504 if (reload_completed && rtx_equal_p (target, source))
2507 emit_constant_insn (cond,
2508 gen_rtx_SET (VOIDmode, target, source));
2517 if (reload_completed && rtx_equal_p (target, source))
2520 emit_constant_insn (cond,
2521 gen_rtx_SET (VOIDmode, target, source));
2525 /* We don't know how to handle other cases yet. */
2526 gcc_assert (remainder == 0xffffffff);
2529 emit_constant_insn (cond,
2530 gen_rtx_SET (VOIDmode, target,
2531 gen_rtx_NOT (mode, source)));
2535 /* We treat MINUS as (val - source), since (source - val) is always
2536 passed as (source + (-val)). */
2540 emit_constant_insn (cond,
2541 gen_rtx_SET (VOIDmode, target,
2542 gen_rtx_NEG (mode, source)));
2545 if (const_ok_for_arm (val))
2548 emit_constant_insn (cond,
2549 gen_rtx_SET (VOIDmode, target,
2550 gen_rtx_MINUS (mode, GEN_INT (val),
2562 /* If we can do it in one insn get out quickly. */
2563 if (const_ok_for_arm (val)
2564 || (can_negate_initial && const_ok_for_arm (-val))
2565 || (can_invert && const_ok_for_arm (~val)))
2568 emit_constant_insn (cond,
2569 gen_rtx_SET (VOIDmode, target,
2571 ? gen_rtx_fmt_ee (code, mode, source,
2577 /* Calculate a few attributes that may be useful for specific
2579 /* Count number of leading zeros. */
2580 for (i = 31; i >= 0; i--)
2582 if ((remainder & (1 << i)) == 0)
2583 clear_sign_bit_copies++;
2588 /* Count number of leading 1's. */
2589 for (i = 31; i >= 0; i--)
2591 if ((remainder & (1 << i)) != 0)
2592 set_sign_bit_copies++;
2597 /* Count number of trailing zero's. */
2598 for (i = 0; i <= 31; i++)
2600 if ((remainder & (1 << i)) == 0)
2601 clear_zero_bit_copies++;
2606 /* Count number of trailing 1's. */
2607 for (i = 0; i <= 31; i++)
2609 if ((remainder & (1 << i)) != 0)
2610 set_zero_bit_copies++;
2618 /* See if we can use movw. */
2619 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2622 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2627 /* See if we can do this by sign_extending a constant that is known
2628 to be negative. This is a good, way of doing it, since the shift
2629 may well merge into a subsequent insn. */
2630 if (set_sign_bit_copies > 1)
2632 if (const_ok_for_arm
2633 (temp1 = ARM_SIGN_EXTEND (remainder
2634 << (set_sign_bit_copies - 1))))
2638 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2639 emit_constant_insn (cond,
2640 gen_rtx_SET (VOIDmode, new_src,
2642 emit_constant_insn (cond,
2643 gen_ashrsi3 (target, new_src,
2644 GEN_INT (set_sign_bit_copies - 1)));
2648 /* For an inverted constant, we will need to set the low bits,
2649 these will be shifted out of harm's way. */
2650 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2651 if (const_ok_for_arm (~temp1))
2655 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2656 emit_constant_insn (cond,
2657 gen_rtx_SET (VOIDmode, new_src,
2659 emit_constant_insn (cond,
2660 gen_ashrsi3 (target, new_src,
2661 GEN_INT (set_sign_bit_copies - 1)));
2667 /* See if we can calculate the value as the difference between two
2668 valid immediates. */
2669 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2671 int topshift = clear_sign_bit_copies & ~1;
2673 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2674 & (0xff000000 >> topshift));
2676 /* If temp1 is zero, then that means the 9 most significant
2677 bits of remainder were 1 and we've caused it to overflow.
2678 When topshift is 0 we don't need to do anything since we
2679 can borrow from 'bit 32'. */
2680 if (temp1 == 0 && topshift != 0)
2681 temp1 = 0x80000000 >> (topshift - 1);
2683 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2685 if (const_ok_for_arm (temp2))
2689 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2690 emit_constant_insn (cond,
2691 gen_rtx_SET (VOIDmode, new_src,
2693 emit_constant_insn (cond,
2694 gen_addsi3 (target, new_src,
2702 /* See if we can generate this by setting the bottom (or the top)
2703 16 bits, and then shifting these into the other half of the
2704 word. We only look for the simplest cases, to do more would cost
2705 too much. Be careful, however, not to generate this when the
2706 alternative would take fewer insns. */
2707 if (val & 0xffff0000)
2709 temp1 = remainder & 0xffff0000;
2710 temp2 = remainder & 0x0000ffff;
2712 /* Overlaps outside this range are best done using other methods. */
2713 for (i = 9; i < 24; i++)
2715 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2716 && !const_ok_for_arm (temp2))
2718 rtx new_src = (subtargets
2719 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2721 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2722 source, subtargets, generate);
2730 gen_rtx_ASHIFT (mode, source,
2737 /* Don't duplicate cases already considered. */
2738 for (i = 17; i < 24; i++)
2740 if (((temp1 | (temp1 >> i)) == remainder)
2741 && !const_ok_for_arm (temp1))
2743 rtx new_src = (subtargets
2744 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2746 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2747 source, subtargets, generate);
2752 gen_rtx_SET (VOIDmode, target,
2755 gen_rtx_LSHIFTRT (mode, source,
2766 /* If we have IOR or XOR, and the constant can be loaded in a
2767 single instruction, and we can find a temporary to put it in,
2768 then this can be done in two instructions instead of 3-4. */
2770 /* TARGET can't be NULL if SUBTARGETS is 0 */
2771 || (reload_completed && !reg_mentioned_p (target, source)))
2773 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2777 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2779 emit_constant_insn (cond,
2780 gen_rtx_SET (VOIDmode, sub,
2782 emit_constant_insn (cond,
2783 gen_rtx_SET (VOIDmode, target,
2784 gen_rtx_fmt_ee (code, mode,
2795 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2796 and the remainder 0s for e.g. 0xfff00000)
2797 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2799 This can be done in 2 instructions by using shifts with mov or mvn.
2804 mvn r0, r0, lsr #12 */
2805 if (set_sign_bit_copies > 8
2806 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2810 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2811 rtx shift = GEN_INT (set_sign_bit_copies);
2815 gen_rtx_SET (VOIDmode, sub,
2817 gen_rtx_ASHIFT (mode,
2822 gen_rtx_SET (VOIDmode, target,
2824 gen_rtx_LSHIFTRT (mode, sub,
2831 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2833 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2835 For eg. r0 = r0 | 0xfff
2840 if (set_zero_bit_copies > 8
2841 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2845 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2846 rtx shift = GEN_INT (set_zero_bit_copies);
2850 gen_rtx_SET (VOIDmode, sub,
2852 gen_rtx_LSHIFTRT (mode,
2857 gen_rtx_SET (VOIDmode, target,
2859 gen_rtx_ASHIFT (mode, sub,
2865 /* This will never be reached for Thumb2 because orn is a valid
2866 instruction. This is for Thumb1 and the ARM 32 bit cases.
2868 x = y | constant (such that ~constant is a valid constant)
2870 x = ~(~y & ~constant).
2872 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2876 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2877 emit_constant_insn (cond,
2878 gen_rtx_SET (VOIDmode, sub,
2879 gen_rtx_NOT (mode, source)));
2882 sub = gen_reg_rtx (mode);
2883 emit_constant_insn (cond,
2884 gen_rtx_SET (VOIDmode, sub,
2885 gen_rtx_AND (mode, source,
2887 emit_constant_insn (cond,
2888 gen_rtx_SET (VOIDmode, target,
2889 gen_rtx_NOT (mode, sub)));
2896 /* See if two shifts will do 2 or more insn's worth of work. */
2897 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2899 HOST_WIDE_INT shift_mask = ((0xffffffff
2900 << (32 - clear_sign_bit_copies))
2903 if ((remainder | shift_mask) != 0xffffffff)
2907 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2908 insns = arm_gen_constant (AND, mode, cond,
2909 remainder | shift_mask,
2910 new_src, source, subtargets, 1);
2915 rtx targ = subtargets ? NULL_RTX : target;
2916 insns = arm_gen_constant (AND, mode, cond,
2917 remainder | shift_mask,
2918 targ, source, subtargets, 0);
2924 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2925 rtx shift = GEN_INT (clear_sign_bit_copies);
2927 emit_insn (gen_ashlsi3 (new_src, source, shift));
2928 emit_insn (gen_lshrsi3 (target, new_src, shift));
2934 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2936 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2938 if ((remainder | shift_mask) != 0xffffffff)
2942 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2944 insns = arm_gen_constant (AND, mode, cond,
2945 remainder | shift_mask,
2946 new_src, source, subtargets, 1);
2951 rtx targ = subtargets ? NULL_RTX : target;
2953 insns = arm_gen_constant (AND, mode, cond,
2954 remainder | shift_mask,
2955 targ, source, subtargets, 0);
2961 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2962 rtx shift = GEN_INT (clear_zero_bit_copies);
2964 emit_insn (gen_lshrsi3 (new_src, source, shift));
2965 emit_insn (gen_ashlsi3 (target, new_src, shift));
2977 for (i = 0; i < 32; i++)
2978 if (remainder & (1 << i))
2982 || (code != IOR && can_invert && num_bits_set > 16))
2983 remainder = (~remainder) & 0xffffffff;
2984 else if (code == PLUS && num_bits_set > 16)
2985 remainder = (-remainder) & 0xffffffff;
2992 /* Now try and find a way of doing the job in either two or three
2994 We start by looking for the largest block of zeros that are aligned on
2995 a 2-bit boundary, we then fill up the temps, wrapping around to the
2996 top of the word when we drop off the bottom.
2997 In the worst case this code should produce no more than four insns.
2998 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2999 best place to start. */
3001 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3007 int best_consecutive_zeros = 0;
3009 for (i = 0; i < 32; i += 2)
3011 int consecutive_zeros = 0;
3013 if (!(remainder & (3 << i)))
3015 while ((i < 32) && !(remainder & (3 << i)))
3017 consecutive_zeros += 2;
3020 if (consecutive_zeros > best_consecutive_zeros)
3022 best_consecutive_zeros = consecutive_zeros;
3023 best_start = i - consecutive_zeros;
3029 /* So long as it won't require any more insns to do so, it's
3030 desirable to emit a small constant (in bits 0...9) in the last
3031 insn. This way there is more chance that it can be combined with
3032 a later addressing insn to form a pre-indexed load or store
3033 operation. Consider:
3035 *((volatile int *)0xe0000100) = 1;
3036 *((volatile int *)0xe0000110) = 2;
3038 We want this to wind up as:
3042 str rB, [rA, #0x100]
3044 str rB, [rA, #0x110]
3046 rather than having to synthesize both large constants from scratch.
3048 Therefore, we calculate how many insns would be required to emit
3049 the constant starting from `best_start', and also starting from
3050 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3051 yield a shorter sequence, we may as well use zero. */
3053 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
3054 && (count_insns_for_constant (remainder, 0) <=
3055 count_insns_for_constant (remainder, best_start)))
3059 /* Now start emitting the insns. */
3067 if (remainder & (3 << (i - 2)))
3072 temp1 = remainder & ((0x0ff << end)
3073 | ((i < end) ? (0xff >> (32 - end)) : 0));
3074 remainder &= ~temp1;
3078 rtx new_src, temp1_rtx;
3080 if (code == SET || code == MINUS)
3082 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3083 if (can_invert && code != MINUS)
3088 if (remainder && subtargets)
3089 new_src = gen_reg_rtx (mode);
3094 else if (can_negate)
3098 temp1 = trunc_int_for_mode (temp1, mode);
3099 temp1_rtx = GEN_INT (temp1);
3103 else if (code == MINUS)
3104 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3106 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3108 emit_constant_insn (cond,
3109 gen_rtx_SET (VOIDmode, new_src,
3119 else if (code == MINUS)
3128 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3141 /* Canonicalize a comparison so that we are more likely to recognize it.
3142 This can be done for a few constant compares, where we can make the
3143 immediate value easier to load. */
3146 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3149 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3150 unsigned HOST_WIDE_INT maxval;
3151 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3162 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3164 *op1 = GEN_INT (i + 1);
3165 return code == GT ? GE : LT;
3172 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3174 *op1 = GEN_INT (i - 1);
3175 return code == GE ? GT : LE;
3181 if (i != ~((unsigned HOST_WIDE_INT) 0)
3182 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3184 *op1 = GEN_INT (i + 1);
3185 return code == GTU ? GEU : LTU;
3192 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3194 *op1 = GEN_INT (i - 1);
3195 return code == GEU ? GTU : LEU;
3207 /* Define how to find the value returned by a function. */
3210 arm_function_value(const_tree type, const_tree func,
3211 bool outgoing ATTRIBUTE_UNUSED)
3213 enum machine_mode mode;
3214 int unsignedp ATTRIBUTE_UNUSED;
3215 rtx r ATTRIBUTE_UNUSED;
3217 mode = TYPE_MODE (type);
3219 if (TARGET_AAPCS_BASED)
3220 return aapcs_allocate_return_reg (mode, type, func);
3222 /* Promote integer types. */
3223 if (INTEGRAL_TYPE_P (type))
3224 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3226 /* Promotes small structs returned in a register to full-word size
3227 for big-endian AAPCS. */
3228 if (arm_return_in_msb (type))
3230 HOST_WIDE_INT size = int_size_in_bytes (type);
3231 if (size % UNITS_PER_WORD != 0)
3233 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3234 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3238 return LIBCALL_VALUE (mode);
3242 libcall_eq (const void *p1, const void *p2)
3244 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3248 libcall_hash (const void *p1)
3250 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3254 add_libcall (htab_t htab, rtx libcall)
3256 *htab_find_slot (htab, libcall, INSERT) = libcall;
3260 arm_libcall_uses_aapcs_base (rtx libcall)
3262 static bool init_done = false;
3263 static htab_t libcall_htab;
3269 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3271 add_libcall (libcall_htab,
3272 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3273 add_libcall (libcall_htab,
3274 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3275 add_libcall (libcall_htab,
3276 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3277 add_libcall (libcall_htab,
3278 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3280 add_libcall (libcall_htab,
3281 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3282 add_libcall (libcall_htab,
3283 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3284 add_libcall (libcall_htab,
3285 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3286 add_libcall (libcall_htab,
3287 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3289 add_libcall (libcall_htab,
3290 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3291 add_libcall (libcall_htab,
3292 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3293 add_libcall (libcall_htab,
3294 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3295 add_libcall (libcall_htab,
3296 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3297 add_libcall (libcall_htab,
3298 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3299 add_libcall (libcall_htab,
3300 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3303 return libcall && htab_find (libcall_htab, libcall) != NULL;
3307 arm_libcall_value (enum machine_mode mode, rtx libcall)
3309 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3310 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3312 /* The following libcalls return their result in integer registers,
3313 even though they return a floating point value. */
3314 if (arm_libcall_uses_aapcs_base (libcall))
3315 return gen_rtx_REG (mode, ARG_REGISTER(1));
3319 return LIBCALL_VALUE (mode);
3322 /* Determine the amount of memory needed to store the possible return
3323 registers of an untyped call. */
3325 arm_apply_result_size (void)
3331 if (TARGET_HARD_FLOAT_ABI)
3337 if (TARGET_MAVERICK)
3340 if (TARGET_IWMMXT_ABI)
3347 /* Decide whether TYPE should be returned in memory (true)
3348 or in a register (false). FNTYPE is the type of the function making
3351 arm_return_in_memory (const_tree type, const_tree fntype)
3355 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3357 if (TARGET_AAPCS_BASED)
3359 /* Simple, non-aggregate types (ie not including vectors and
3360 complex) are always returned in a register (or registers).
3361 We don't care about which register here, so we can short-cut
3362 some of the detail. */
3363 if (!AGGREGATE_TYPE_P (type)
3364 && TREE_CODE (type) != VECTOR_TYPE
3365 && TREE_CODE (type) != COMPLEX_TYPE)
3368 /* Any return value that is no larger than one word can be
3370 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3373 /* Check any available co-processors to see if they accept the
3374 type as a register candidate (VFP, for example, can return
3375 some aggregates in consecutive registers). These aren't
3376 available if the call is variadic. */
3377 if (aapcs_select_return_coproc (type, fntype) >= 0)
3380 /* Vector values should be returned using ARM registers, not
3381 memory (unless they're over 16 bytes, which will break since
3382 we only have four call-clobbered registers to play with). */
3383 if (TREE_CODE (type) == VECTOR_TYPE)
3384 return (size < 0 || size > (4 * UNITS_PER_WORD));
3386 /* The rest go in memory. */
3390 if (TREE_CODE (type) == VECTOR_TYPE)
3391 return (size < 0 || size > (4 * UNITS_PER_WORD));
3393 if (!AGGREGATE_TYPE_P (type) &&
3394 (TREE_CODE (type) != VECTOR_TYPE))
3395 /* All simple types are returned in registers. */
3398 if (arm_abi != ARM_ABI_APCS)
3400 /* ATPCS and later return aggregate types in memory only if they are
3401 larger than a word (or are variable size). */
3402 return (size < 0 || size > UNITS_PER_WORD);
3405 /* For the arm-wince targets we choose to be compatible with Microsoft's
3406 ARM and Thumb compilers, which always return aggregates in memory. */
3408 /* All structures/unions bigger than one word are returned in memory.
3409 Also catch the case where int_size_in_bytes returns -1. In this case
3410 the aggregate is either huge or of variable size, and in either case
3411 we will want to return it via memory and not in a register. */
3412 if (size < 0 || size > UNITS_PER_WORD)
3415 if (TREE_CODE (type) == RECORD_TYPE)
3419 /* For a struct the APCS says that we only return in a register
3420 if the type is 'integer like' and every addressable element
3421 has an offset of zero. For practical purposes this means
3422 that the structure can have at most one non bit-field element
3423 and that this element must be the first one in the structure. */
3425 /* Find the first field, ignoring non FIELD_DECL things which will
3426 have been created by C++. */
3427 for (field = TYPE_FIELDS (type);
3428 field && TREE_CODE (field) != FIELD_DECL;
3429 field = TREE_CHAIN (field))
3433 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3435 /* Check that the first field is valid for returning in a register. */
3437 /* ... Floats are not allowed */
3438 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3441 /* ... Aggregates that are not themselves valid for returning in
3442 a register are not allowed. */
3443 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3446 /* Now check the remaining fields, if any. Only bitfields are allowed,
3447 since they are not addressable. */
3448 for (field = TREE_CHAIN (field);
3450 field = TREE_CHAIN (field))
3452 if (TREE_CODE (field) != FIELD_DECL)
3455 if (!DECL_BIT_FIELD_TYPE (field))
3462 if (TREE_CODE (type) == UNION_TYPE)
3466 /* Unions can be returned in registers if every element is
3467 integral, or can be returned in an integer register. */
3468 for (field = TYPE_FIELDS (type);
3470 field = TREE_CHAIN (field))
3472 if (TREE_CODE (field) != FIELD_DECL)
3475 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3478 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3484 #endif /* not ARM_WINCE */
3486 /* Return all other types in memory. */
3490 /* Indicate whether or not words of a double are in big-endian order. */
3493 arm_float_words_big_endian (void)
3495 if (TARGET_MAVERICK)
3498 /* For FPA, float words are always big-endian. For VFP, floats words
3499 follow the memory system mode. */
3507 return (TARGET_BIG_END ? 1 : 0);
3512 const struct pcs_attribute_arg
3516 } pcs_attribute_args[] =
3518 {"aapcs", ARM_PCS_AAPCS},
3519 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3521 /* We could recognize these, but changes would be needed elsewhere
3522 * to implement them. */
3523 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3524 {"atpcs", ARM_PCS_ATPCS},
3525 {"apcs", ARM_PCS_APCS},
3527 {NULL, ARM_PCS_UNKNOWN}
3531 arm_pcs_from_attribute (tree attr)
3533 const struct pcs_attribute_arg *ptr;
3536 /* Get the value of the argument. */
3537 if (TREE_VALUE (attr) == NULL_TREE
3538 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3539 return ARM_PCS_UNKNOWN;
3541 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3543 /* Check it against the list of known arguments. */
3544 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3545 if (streq (arg, ptr->arg))
3548 /* An unrecognized interrupt type. */
3549 return ARM_PCS_UNKNOWN;
3552 /* Get the PCS variant to use for this call. TYPE is the function's type
3553 specification, DECL is the specific declartion. DECL may be null if
3554 the call could be indirect or if this is a library call. */
3556 arm_get_pcs_model (const_tree type, const_tree decl)
3558 bool user_convention = false;
3559 enum arm_pcs user_pcs = arm_pcs_default;
3564 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3567 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3568 user_convention = true;
3571 if (TARGET_AAPCS_BASED)
3573 /* Detect varargs functions. These always use the base rules
3574 (no argument is ever a candidate for a co-processor
3576 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3577 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3578 != void_type_node));
3580 if (user_convention)
3582 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3583 sorry ("Non-AAPCS derived PCS variant");
3584 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3585 error ("Variadic functions must use the base AAPCS variant");
3589 return ARM_PCS_AAPCS;
3590 else if (user_convention)
3592 else if (decl && flag_unit_at_a_time)
3594 /* Local functions never leak outside this compilation unit,
3595 so we are free to use whatever conventions are
3597 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3598 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3600 return ARM_PCS_AAPCS_LOCAL;
3603 else if (user_convention && user_pcs != arm_pcs_default)
3604 sorry ("PCS variant");
3606 /* For everything else we use the target's default. */
3607 return arm_pcs_default;
3612 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3613 const_tree fntype ATTRIBUTE_UNUSED,
3614 rtx libcall ATTRIBUTE_UNUSED,
3615 const_tree fndecl ATTRIBUTE_UNUSED)
3617 /* Record the unallocated VFP registers. */
3618 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3619 pcum->aapcs_vfp_reg_alloc = 0;
3622 /* Walk down the type tree of TYPE counting consecutive base elements.
3623 If *MODEP is VOIDmode, then set it to the first valid floating point
3624 type. If a non-floating point type is found, or if a floating point
3625 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3626 otherwise return the count in the sub-tree. */
3628 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3630 enum machine_mode mode;
3633 switch (TREE_CODE (type))
3636 mode = TYPE_MODE (type);
3637 if (mode != DFmode && mode != SFmode)
3640 if (*modep == VOIDmode)
3649 mode = TYPE_MODE (TREE_TYPE (type));
3650 if (mode != DFmode && mode != SFmode)
3653 if (*modep == VOIDmode)
3662 /* Use V2SImode and V4SImode as representatives of all 64-bit
3663 and 128-bit vector types, whether or not those modes are
3664 supported with the present options. */
3665 size = int_size_in_bytes (type);
3678 if (*modep == VOIDmode)
3681 /* Vector modes are considered to be opaque: two vectors are
3682 equivalent for the purposes of being homogeneous aggregates
3683 if they are the same size. */
3692 tree index = TYPE_DOMAIN (type);
3694 /* Can't handle incomplete types. */
3695 if (!COMPLETE_TYPE_P(type))
3698 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3701 || !TYPE_MAX_VALUE (index)
3702 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3703 || !TYPE_MIN_VALUE (index)
3704 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3708 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3709 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3711 /* There must be no padding. */
3712 if (!host_integerp (TYPE_SIZE (type), 1)
3713 || (tree_low_cst (TYPE_SIZE (type), 1)
3714 != count * GET_MODE_BITSIZE (*modep)))
3726 /* Can't handle incomplete types. */
3727 if (!COMPLETE_TYPE_P(type))
3730 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3732 if (TREE_CODE (field) != FIELD_DECL)
3735 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3741 /* There must be no padding. */
3742 if (!host_integerp (TYPE_SIZE (type), 1)
3743 || (tree_low_cst (TYPE_SIZE (type), 1)
3744 != count * GET_MODE_BITSIZE (*modep)))
3751 case QUAL_UNION_TYPE:
3753 /* These aren't very interesting except in a degenerate case. */
3758 /* Can't handle incomplete types. */
3759 if (!COMPLETE_TYPE_P(type))
3762 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3764 if (TREE_CODE (field) != FIELD_DECL)
3767 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3770 count = count > sub_count ? count : sub_count;
3773 /* There must be no padding. */
3774 if (!host_integerp (TYPE_SIZE (type), 1)
3775 || (tree_low_cst (TYPE_SIZE (type), 1)
3776 != count * GET_MODE_BITSIZE (*modep)))
3790 aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
3791 enum machine_mode *base_mode,
3794 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3795 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3796 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3802 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3805 *base_mode = (mode == DCmode ? DFmode : SFmode);
3808 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3810 enum machine_mode aggregate_mode = VOIDmode;
3811 int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
3813 if (ag_count > 0 && ag_count <= 4)
3816 *base_mode = aggregate_mode;
3824 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3825 enum machine_mode mode, const_tree type)
3827 int count ATTRIBUTE_UNUSED;
3828 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3830 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3831 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3832 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3834 return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3838 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3841 if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
3842 || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
3843 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3845 return aapcs_vfp_is_call_or_return_candidate (mode, type,
3846 &pcum->aapcs_vfp_rmode,
3847 &pcum->aapcs_vfp_rcount);
3851 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3852 const_tree type ATTRIBUTE_UNUSED)
3854 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3855 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3858 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3859 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3861 pcum->aapcs_vfp_reg_alloc = mask << regno;
3862 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3865 int rcount = pcum->aapcs_vfp_rcount;
3867 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3871 /* Avoid using unsupported vector modes. */
3872 if (rmode == V2SImode)
3874 else if (rmode == V4SImode)
3881 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3882 for (i = 0; i < rcount; i++)
3884 rtx tmp = gen_rtx_REG (rmode,
3885 FIRST_VFP_REGNUM + regno + i * rshift);
3886 tmp = gen_rtx_EXPR_LIST
3888 GEN_INT (i * GET_MODE_SIZE (rmode)));
3889 XVECEXP (par, 0, i) = tmp;
3892 pcum->aapcs_reg = par;
3895 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3902 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3903 enum machine_mode mode,
3904 const_tree type ATTRIBUTE_UNUSED)
3906 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3907 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3908 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3910 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3913 enum machine_mode ag_mode;
3918 aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3922 if (ag_mode == V2SImode)
3924 else if (ag_mode == V4SImode)
3930 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3931 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3932 for (i = 0; i < count; i++)
3934 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3935 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3936 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3937 XVECEXP (par, 0, i) = tmp;
3943 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3947 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3948 enum machine_mode mode ATTRIBUTE_UNUSED,
3949 const_tree type ATTRIBUTE_UNUSED)
3951 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3952 pcum->aapcs_vfp_reg_alloc = 0;
3956 #define AAPCS_CP(X) \
3958 aapcs_ ## X ## _cum_init, \
3959 aapcs_ ## X ## _is_call_candidate, \
3960 aapcs_ ## X ## _allocate, \
3961 aapcs_ ## X ## _is_return_candidate, \
3962 aapcs_ ## X ## _allocate_return_reg, \
3963 aapcs_ ## X ## _advance \
3966 /* Table of co-processors that can be used to pass arguments in
3967 registers. Idealy no arugment should be a candidate for more than
3968 one co-processor table entry, but the table is processed in order
3969 and stops after the first match. If that entry then fails to put
3970 the argument into a co-processor register, the argument will go on
3974 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
3975 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
3977 /* Return true if an argument of mode MODE (or type TYPE if MODE is
3978 BLKmode) is a candidate for this co-processor's registers; this
3979 function should ignore any position-dependent state in
3980 CUMULATIVE_ARGS and only use call-type dependent information. */
3981 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3983 /* Return true if the argument does get a co-processor register; it
3984 should set aapcs_reg to an RTX of the register allocated as is
3985 required for a return from FUNCTION_ARG. */
3986 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3988 /* Return true if a result of mode MODE (or type TYPE if MODE is
3989 BLKmode) is can be returned in this co-processor's registers. */
3990 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
3992 /* Allocate and return an RTX element to hold the return type of a
3993 call, this routine must not fail and will only be called if
3994 is_return_candidate returned true with the same parameters. */
3995 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
3997 /* Finish processing this argument and prepare to start processing
3999 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4000 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4008 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4013 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4014 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4021 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4023 /* We aren't passed a decl, so we can't check that a call is local.
4024 However, it isn't clear that that would be a win anyway, since it
4025 might limit some tail-calling opportunities. */
4026 enum arm_pcs pcs_variant;
4030 const_tree fndecl = NULL_TREE;
4032 if (TREE_CODE (fntype) == FUNCTION_DECL)
4035 fntype = TREE_TYPE (fntype);
4038 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4041 pcs_variant = arm_pcs_default;
4043 if (pcs_variant != ARM_PCS_AAPCS)
4047 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4048 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4057 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4060 /* We aren't passed a decl, so we can't check that a call is local.
4061 However, it isn't clear that that would be a win anyway, since it
4062 might limit some tail-calling opportunities. */
4063 enum arm_pcs pcs_variant;
4064 int unsignedp ATTRIBUTE_UNUSED;
4068 const_tree fndecl = NULL_TREE;
4070 if (TREE_CODE (fntype) == FUNCTION_DECL)
4073 fntype = TREE_TYPE (fntype);
4076 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4079 pcs_variant = arm_pcs_default;
4081 /* Promote integer types. */
4082 if (type && INTEGRAL_TYPE_P (type))
4083 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4085 if (pcs_variant != ARM_PCS_AAPCS)
4089 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4090 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4092 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4096 /* Promotes small structs returned in a register to full-word size
4097 for big-endian AAPCS. */
4098 if (type && arm_return_in_msb (type))
4100 HOST_WIDE_INT size = int_size_in_bytes (type);
4101 if (size % UNITS_PER_WORD != 0)
4103 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4104 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4108 return gen_rtx_REG (mode, R0_REGNUM);
4112 aapcs_libcall_value (enum machine_mode mode)
4114 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4117 /* Lay out a function argument using the AAPCS rules. The rule
4118 numbers referred to here are those in the AAPCS. */
4120 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4121 tree type, int named)
4126 /* We only need to do this once per argument. */
4127 if (pcum->aapcs_arg_processed)
4130 pcum->aapcs_arg_processed = true;
4132 /* Special case: if named is false then we are handling an incoming
4133 anonymous argument which is on the stack. */
4137 /* Is this a potential co-processor register candidate? */
4138 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4140 int slot = aapcs_select_call_coproc (pcum, mode, type);
4141 pcum->aapcs_cprc_slot = slot;
4143 /* We don't have to apply any of the rules from part B of the
4144 preparation phase, these are handled elsewhere in the
4149 /* A Co-processor register candidate goes either in its own
4150 class of registers or on the stack. */
4151 if (!pcum->aapcs_cprc_failed[slot])
4153 /* C1.cp - Try to allocate the argument to co-processor
4155 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4158 /* C2.cp - Put the argument on the stack and note that we
4159 can't assign any more candidates in this slot. We also
4160 need to note that we have allocated stack space, so that
4161 we won't later try to split a non-cprc candidate between
4162 core registers and the stack. */
4163 pcum->aapcs_cprc_failed[slot] = true;
4164 pcum->can_split = false;
4167 /* We didn't get a register, so this argument goes on the
4169 gcc_assert (pcum->can_split == false);
4174 /* C3 - For double-word aligned arguments, round the NCRN up to the
4175 next even number. */
4176 ncrn = pcum->aapcs_ncrn;
4177 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4180 nregs = ARM_NUM_REGS2(mode, type);
4182 /* Sigh, this test should really assert that nregs > 0, but a GCC
4183 extension allows empty structs and then gives them empty size; it
4184 then allows such a structure to be passed by value. For some of
4185 the code below we have to pretend that such an argument has
4186 non-zero size so that we 'locate' it correctly either in
4187 registers or on the stack. */
4188 gcc_assert (nregs >= 0);
4190 nregs2 = nregs ? nregs : 1;
4192 /* C4 - Argument fits entirely in core registers. */
4193 if (ncrn + nregs2 <= NUM_ARG_REGS)
4195 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4196 pcum->aapcs_next_ncrn = ncrn + nregs;
4200 /* C5 - Some core registers left and there are no arguments already
4201 on the stack: split this argument between the remaining core
4202 registers and the stack. */
4203 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4205 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4206 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4207 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4211 /* C6 - NCRN is set to 4. */
4212 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4214 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4218 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4219 for a call to a function whose data type is FNTYPE.
4220 For a library call, FNTYPE is NULL. */
4222 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4224 tree fndecl ATTRIBUTE_UNUSED)
4226 /* Long call handling. */
4228 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4230 pcum->pcs_variant = arm_pcs_default;
4232 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4234 if (arm_libcall_uses_aapcs_base (libname))
4235 pcum->pcs_variant = ARM_PCS_AAPCS;
4237 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4238 pcum->aapcs_reg = NULL_RTX;
4239 pcum->aapcs_partial = 0;
4240 pcum->aapcs_arg_processed = false;
4241 pcum->aapcs_cprc_slot = -1;
4242 pcum->can_split = true;
4244 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4248 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4250 pcum->aapcs_cprc_failed[i] = false;
4251 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4259 /* On the ARM, the offset starts at 0. */
4261 pcum->iwmmxt_nregs = 0;
4262 pcum->can_split = true;
4264 /* Varargs vectors are treated the same as long long.
4265 named_count avoids having to change the way arm handles 'named' */
4266 pcum->named_count = 0;
4269 if (TARGET_REALLY_IWMMXT && fntype)
4273 for (fn_arg = TYPE_ARG_TYPES (fntype);
4275 fn_arg = TREE_CHAIN (fn_arg))
4276 pcum->named_count += 1;
4278 if (! pcum->named_count)
4279 pcum->named_count = INT_MAX;
4284 /* Return true if mode/type need doubleword alignment. */
4286 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4288 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4289 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4293 /* Determine where to put an argument to a function.
4294 Value is zero to push the argument on the stack,
4295 or a hard register in which to store the argument.
4297 MODE is the argument's machine mode.
4298 TYPE is the data type of the argument (as a tree).
4299 This is null for libcalls where that information may
4301 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4302 the preceding args and about the function being called.
4303 NAMED is nonzero if this argument is a named parameter
4304 (otherwise it is an extra parameter matching an ellipsis). */
4307 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4308 tree type, int named)
4312 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4313 a call insn (op3 of a call_value insn). */
4314 if (mode == VOIDmode)
4317 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4319 aapcs_layout_arg (pcum, mode, type, named);
4320 return pcum->aapcs_reg;
4323 /* Varargs vectors are treated the same as long long.
4324 named_count avoids having to change the way arm handles 'named' */
4325 if (TARGET_IWMMXT_ABI
4326 && arm_vector_mode_supported_p (mode)
4327 && pcum->named_count > pcum->nargs + 1)
4329 if (pcum->iwmmxt_nregs <= 9)
4330 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4333 pcum->can_split = false;
4338 /* Put doubleword aligned quantities in even register pairs. */
4340 && ARM_DOUBLEWORD_ALIGN
4341 && arm_needs_doubleword_align (mode, type))
4344 if (mode == VOIDmode)
4345 /* Pick an arbitrary value for operand 2 of the call insn. */
4348 /* Only allow splitting an arg between regs and memory if all preceding
4349 args were allocated to regs. For args passed by reference we only count
4350 the reference pointer. */
4351 if (pcum->can_split)
4354 nregs = ARM_NUM_REGS2 (mode, type);
4356 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4359 return gen_rtx_REG (mode, pcum->nregs);
4363 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4364 tree type, bool named)
4366 int nregs = pcum->nregs;
4368 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4370 aapcs_layout_arg (pcum, mode, type, named);
4371 return pcum->aapcs_partial;
4374 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4377 if (NUM_ARG_REGS > nregs
4378 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4380 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4386 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4387 tree type, bool named)
4389 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4391 aapcs_layout_arg (pcum, mode, type, named);
4393 if (pcum->aapcs_cprc_slot >= 0)
4395 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4397 pcum->aapcs_cprc_slot = -1;
4400 /* Generic stuff. */
4401 pcum->aapcs_arg_processed = false;
4402 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4403 pcum->aapcs_reg = NULL_RTX;
4404 pcum->aapcs_partial = 0;
4409 if (arm_vector_mode_supported_p (mode)
4410 && pcum->named_count > pcum->nargs
4411 && TARGET_IWMMXT_ABI)
4412 pcum->iwmmxt_nregs += 1;
4414 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4418 /* Variable sized types are passed by reference. This is a GCC
4419 extension to the ARM ABI. */
4422 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4423 enum machine_mode mode ATTRIBUTE_UNUSED,
4424 const_tree type, bool named ATTRIBUTE_UNUSED)
4426 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4429 /* Encode the current state of the #pragma [no_]long_calls. */
4432 OFF, /* No #pragma [no_]long_calls is in effect. */
4433 LONG, /* #pragma long_calls is in effect. */
4434 SHORT /* #pragma no_long_calls is in effect. */
4437 static arm_pragma_enum arm_pragma_long_calls = OFF;
4440 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4442 arm_pragma_long_calls = LONG;
4446 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4448 arm_pragma_long_calls = SHORT;
4452 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4454 arm_pragma_long_calls = OFF;
4457 /* Handle an attribute requiring a FUNCTION_DECL;
4458 arguments as in struct attribute_spec.handler. */
4460 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4461 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4463 if (TREE_CODE (*node) != FUNCTION_DECL)
4465 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4467 *no_add_attrs = true;
4473 /* Handle an "interrupt" or "isr" attribute;
4474 arguments as in struct attribute_spec.handler. */
4476 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4481 if (TREE_CODE (*node) != FUNCTION_DECL)
4483 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4485 *no_add_attrs = true;
4487 /* FIXME: the argument if any is checked for type attributes;
4488 should it be checked for decl ones? */
4492 if (TREE_CODE (*node) == FUNCTION_TYPE
4493 || TREE_CODE (*node) == METHOD_TYPE)
4495 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4497 warning (OPT_Wattributes, "%qE attribute ignored",
4499 *no_add_attrs = true;
4502 else if (TREE_CODE (*node) == POINTER_TYPE
4503 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4504 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4505 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4507 *node = build_variant_type_copy (*node);
4508 TREE_TYPE (*node) = build_type_attribute_variant
4510 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4511 *no_add_attrs = true;
4515 /* Possibly pass this attribute on from the type to a decl. */
4516 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4517 | (int) ATTR_FLAG_FUNCTION_NEXT
4518 | (int) ATTR_FLAG_ARRAY_NEXT))
4520 *no_add_attrs = true;
4521 return tree_cons (name, args, NULL_TREE);
4525 warning (OPT_Wattributes, "%qE attribute ignored",
4534 /* Handle a "pcs" attribute; arguments as in struct
4535 attribute_spec.handler. */
4537 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4538 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4540 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4542 warning (OPT_Wattributes, "%qE attribute ignored", name);
4543 *no_add_attrs = true;
4548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4549 /* Handle the "notshared" attribute. This attribute is another way of
4550 requesting hidden visibility. ARM's compiler supports
4551 "__declspec(notshared)"; we support the same thing via an
4555 arm_handle_notshared_attribute (tree *node,
4556 tree name ATTRIBUTE_UNUSED,
4557 tree args ATTRIBUTE_UNUSED,
4558 int flags ATTRIBUTE_UNUSED,
4561 tree decl = TYPE_NAME (*node);
4565 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4566 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4567 *no_add_attrs = false;
4573 /* Return 0 if the attributes for two types are incompatible, 1 if they
4574 are compatible, and 2 if they are nearly compatible (which causes a
4575 warning to be generated). */
4577 arm_comp_type_attributes (const_tree type1, const_tree type2)
4581 /* Check for mismatch of non-default calling convention. */
4582 if (TREE_CODE (type1) != FUNCTION_TYPE)
4585 /* Check for mismatched call attributes. */
4586 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4587 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4588 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4589 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4591 /* Only bother to check if an attribute is defined. */
4592 if (l1 | l2 | s1 | s2)
4594 /* If one type has an attribute, the other must have the same attribute. */
4595 if ((l1 != l2) || (s1 != s2))
4598 /* Disallow mixed attributes. */
4599 if ((l1 & s2) || (l2 & s1))
4603 /* Check for mismatched ISR attribute. */
4604 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4606 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4607 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4609 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4616 /* Assigns default attributes to newly defined type. This is used to
4617 set short_call/long_call attributes for function types of
4618 functions defined inside corresponding #pragma scopes. */
4620 arm_set_default_type_attributes (tree type)
4622 /* Add __attribute__ ((long_call)) to all functions, when
4623 inside #pragma long_calls or __attribute__ ((short_call)),
4624 when inside #pragma no_long_calls. */
4625 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4627 tree type_attr_list, attr_name;
4628 type_attr_list = TYPE_ATTRIBUTES (type);
4630 if (arm_pragma_long_calls == LONG)
4631 attr_name = get_identifier ("long_call");
4632 else if (arm_pragma_long_calls == SHORT)
4633 attr_name = get_identifier ("short_call");
4637 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4638 TYPE_ATTRIBUTES (type) = type_attr_list;
4642 /* Return true if DECL is known to be linked into section SECTION. */
4645 arm_function_in_section_p (tree decl, section *section)
4647 /* We can only be certain about functions defined in the same
4648 compilation unit. */
4649 if (!TREE_STATIC (decl))
4652 /* Make sure that SYMBOL always binds to the definition in this
4653 compilation unit. */
4654 if (!targetm.binds_local_p (decl))
4657 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4658 if (!DECL_SECTION_NAME (decl))
4660 /* Make sure that we will not create a unique section for DECL. */
4661 if (flag_function_sections || DECL_ONE_ONLY (decl))
4665 return function_section (decl) == section;
4668 /* Return nonzero if a 32-bit "long_call" should be generated for
4669 a call from the current function to DECL. We generate a long_call
4672 a. has an __attribute__((long call))
4673 or b. is within the scope of a #pragma long_calls
4674 or c. the -mlong-calls command line switch has been specified
4676 However we do not generate a long call if the function:
4678 d. has an __attribute__ ((short_call))
4679 or e. is inside the scope of a #pragma no_long_calls
4680 or f. is defined in the same section as the current function. */
4683 arm_is_long_call_p (tree decl)
4688 return TARGET_LONG_CALLS;
4690 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4691 if (lookup_attribute ("short_call", attrs))
4694 /* For "f", be conservative, and only cater for cases in which the
4695 whole of the current function is placed in the same section. */
4696 if (!flag_reorder_blocks_and_partition
4697 && TREE_CODE (decl) == FUNCTION_DECL
4698 && arm_function_in_section_p (decl, current_function_section ()))
4701 if (lookup_attribute ("long_call", attrs))
4704 return TARGET_LONG_CALLS;
4707 /* Return nonzero if it is ok to make a tail-call to DECL. */
4709 arm_function_ok_for_sibcall (tree decl, tree exp)
4711 unsigned long func_type;
4713 if (cfun->machine->sibcall_blocked)
4716 /* Never tailcall something for which we have no decl, or if we
4717 are in Thumb mode. */
4718 if (decl == NULL || TARGET_THUMB)
4721 /* The PIC register is live on entry to VxWorks PLT entries, so we
4722 must make the call before restoring the PIC register. */
4723 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4726 /* Cannot tail-call to long calls, since these are out of range of
4727 a branch instruction. */
4728 if (arm_is_long_call_p (decl))
4731 /* If we are interworking and the function is not declared static
4732 then we can't tail-call it unless we know that it exists in this
4733 compilation unit (since it might be a Thumb routine). */
4734 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4737 func_type = arm_current_func_type ();
4738 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4739 if (IS_INTERRUPT (func_type))
4742 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4744 /* Check that the return value locations are the same. For
4745 example that we aren't returning a value from the sibling in
4746 a VFP register but then need to transfer it to a core
4750 a = arm_function_value (TREE_TYPE (exp), decl, false);
4751 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4753 if (!rtx_equal_p (a, b))
4757 /* Never tailcall if function may be called with a misaligned SP. */
4758 if (IS_STACKALIGN (func_type))
4761 /* Everything else is ok. */
4766 /* Addressing mode support functions. */
4768 /* Return nonzero if X is a legitimate immediate operand when compiling
4769 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4771 legitimate_pic_operand_p (rtx x)
4773 if (GET_CODE (x) == SYMBOL_REF
4774 || (GET_CODE (x) == CONST
4775 && GET_CODE (XEXP (x, 0)) == PLUS
4776 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4782 /* Record that the current function needs a PIC register. Initialize
4783 cfun->machine->pic_reg if we have not already done so. */
4786 require_pic_register (void)
4788 /* A lot of the logic here is made obscure by the fact that this
4789 routine gets called as part of the rtx cost estimation process.
4790 We don't want those calls to affect any assumptions about the real
4791 function; and further, we can't call entry_of_function() until we
4792 start the real expansion process. */
4793 if (!crtl->uses_pic_offset_table)
4795 gcc_assert (can_create_pseudo_p ());
4796 if (arm_pic_register != INVALID_REGNUM)
4798 if (!cfun->machine->pic_reg)
4799 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4801 /* Play games to avoid marking the function as needing pic
4802 if we are being called as part of the cost-estimation
4804 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4805 crtl->uses_pic_offset_table = 1;
4811 if (!cfun->machine->pic_reg)
4812 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4814 /* Play games to avoid marking the function as needing pic
4815 if we are being called as part of the cost-estimation
4817 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4819 crtl->uses_pic_offset_table = 1;
4822 arm_load_pic_register (0UL);
4826 /* We can be called during expansion of PHI nodes, where
4827 we can't yet emit instructions directly in the final
4828 insn stream. Queue the insns on the entry edge, they will
4829 be committed after everything else is expanded. */
4830 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4837 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4839 if (GET_CODE (orig) == SYMBOL_REF
4840 || GET_CODE (orig) == LABEL_REF)
4842 rtx pic_ref, address;
4846 /* If this function doesn't have a pic register, create one now. */
4847 require_pic_register ();
4851 gcc_assert (can_create_pseudo_p ());
4852 reg = gen_reg_rtx (Pmode);
4858 address = gen_reg_rtx (Pmode);
4863 emit_insn (gen_pic_load_addr_arm (address, orig));
4864 else if (TARGET_THUMB2)
4865 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
4866 else /* TARGET_THUMB1 */
4867 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4869 /* VxWorks does not impose a fixed gap between segments; the run-time
4870 gap can be different from the object-file gap. We therefore can't
4871 use GOTOFF unless we are absolutely sure that the symbol is in the
4872 same segment as the GOT. Unfortunately, the flexibility of linker
4873 scripts means that we can't be sure of that in general, so assume
4874 that GOTOFF is never valid on VxWorks. */
4875 if ((GET_CODE (orig) == LABEL_REF
4876 || (GET_CODE (orig) == SYMBOL_REF &&
4877 SYMBOL_REF_LOCAL_P (orig)))
4879 && !TARGET_VXWORKS_RTP)
4880 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4883 pic_ref = gen_const_mem (Pmode,
4884 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4888 insn = emit_move_insn (reg, pic_ref);
4890 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4892 set_unique_reg_note (insn, REG_EQUAL, orig);
4896 else if (GET_CODE (orig) == CONST)
4900 if (GET_CODE (XEXP (orig, 0)) == PLUS
4901 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4904 /* Handle the case where we have: const (UNSPEC_TLS). */
4905 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4906 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4909 /* Handle the case where we have:
4910 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4912 if (GET_CODE (XEXP (orig, 0)) == PLUS
4913 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4914 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4916 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4922 gcc_assert (can_create_pseudo_p ());
4923 reg = gen_reg_rtx (Pmode);
4926 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4928 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4929 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4930 base == reg ? 0 : reg);
4932 if (GET_CODE (offset) == CONST_INT)
4934 /* The base register doesn't really matter, we only want to
4935 test the index for the appropriate mode. */
4936 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4938 gcc_assert (can_create_pseudo_p ());
4939 offset = force_reg (Pmode, offset);
4942 if (GET_CODE (offset) == CONST_INT)
4943 return plus_constant (base, INTVAL (offset));
4946 if (GET_MODE_SIZE (mode) > 4
4947 && (GET_MODE_CLASS (mode) == MODE_INT
4948 || TARGET_SOFT_FLOAT))
4950 emit_insn (gen_addsi3 (reg, base, offset));
4954 return gen_rtx_PLUS (Pmode, base, offset);
4961 /* Find a spare register to use during the prolog of a function. */
4964 thumb_find_work_register (unsigned long pushed_regs_mask)
4968 /* Check the argument registers first as these are call-used. The
4969 register allocation order means that sometimes r3 might be used
4970 but earlier argument registers might not, so check them all. */
4971 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
4972 if (!df_regs_ever_live_p (reg))
4975 /* Before going on to check the call-saved registers we can try a couple
4976 more ways of deducing that r3 is available. The first is when we are
4977 pushing anonymous arguments onto the stack and we have less than 4
4978 registers worth of fixed arguments(*). In this case r3 will be part of
4979 the variable argument list and so we can be sure that it will be
4980 pushed right at the start of the function. Hence it will be available
4981 for the rest of the prologue.
4982 (*): ie crtl->args.pretend_args_size is greater than 0. */
4983 if (cfun->machine->uses_anonymous_args
4984 && crtl->args.pretend_args_size > 0)
4985 return LAST_ARG_REGNUM;
4987 /* The other case is when we have fixed arguments but less than 4 registers
4988 worth. In this case r3 might be used in the body of the function, but
4989 it is not being used to convey an argument into the function. In theory
4990 we could just check crtl->args.size to see how many bytes are
4991 being passed in argument registers, but it seems that it is unreliable.
4992 Sometimes it will have the value 0 when in fact arguments are being
4993 passed. (See testcase execute/20021111-1.c for an example). So we also
4994 check the args_info.nregs field as well. The problem with this field is
4995 that it makes no allowances for arguments that are passed to the
4996 function but which are not used. Hence we could miss an opportunity
4997 when a function has an unused argument in r3. But it is better to be
4998 safe than to be sorry. */
4999 if (! cfun->machine->uses_anonymous_args
5000 && crtl->args.size >= 0
5001 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5002 && crtl->args.info.nregs < 4)
5003 return LAST_ARG_REGNUM;
5005 /* Otherwise look for a call-saved register that is going to be pushed. */
5006 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5007 if (pushed_regs_mask & (1 << reg))
5012 /* Thumb-2 can use high regs. */
5013 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5014 if (pushed_regs_mask & (1 << reg))
5017 /* Something went wrong - thumb_compute_save_reg_mask()
5018 should have arranged for a suitable register to be pushed. */
5022 static GTY(()) int pic_labelno;
5024 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5028 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5030 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5032 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5035 gcc_assert (flag_pic);
5037 pic_reg = cfun->machine->pic_reg;
5038 if (TARGET_VXWORKS_RTP)
5040 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5041 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5042 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5044 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5046 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5047 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5051 /* We use an UNSPEC rather than a LABEL_REF because this label
5052 never appears in the code stream. */
5054 labelno = GEN_INT (pic_labelno++);
5055 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5056 l1 = gen_rtx_CONST (VOIDmode, l1);
5058 /* On the ARM the PC register contains 'dot + 8' at the time of the
5059 addition, on the Thumb it is 'dot + 4'. */
5060 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5061 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5063 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5067 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5068 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5070 else if (TARGET_THUMB2)
5072 /* Thumb-2 only allows very limited access to the PC. Calculate the
5073 address in a temporary register. */
5074 if (arm_pic_register != INVALID_REGNUM)
5076 pic_tmp = gen_rtx_REG (SImode,
5077 thumb_find_work_register (saved_regs));
5081 gcc_assert (can_create_pseudo_p ());
5082 pic_tmp = gen_reg_rtx (Pmode);
5085 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
5086 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
5087 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
5089 else /* TARGET_THUMB1 */
5091 if (arm_pic_register != INVALID_REGNUM
5092 && REGNO (pic_reg) > LAST_LO_REGNUM)
5094 /* We will have pushed the pic register, so we should always be
5095 able to find a work register. */
5096 pic_tmp = gen_rtx_REG (SImode,
5097 thumb_find_work_register (saved_regs));
5098 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5099 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5102 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5103 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5107 /* Need to emit this whether or not we obey regdecls,
5108 since setjmp/longjmp can cause life info to screw up. */
5113 /* Return nonzero if X is valid as an ARM state addressing register. */
5115 arm_address_register_rtx_p (rtx x, int strict_p)
5119 if (GET_CODE (x) != REG)
5125 return ARM_REGNO_OK_FOR_BASE_P (regno);
5127 return (regno <= LAST_ARM_REGNUM
5128 || regno >= FIRST_PSEUDO_REGISTER
5129 || regno == FRAME_POINTER_REGNUM
5130 || regno == ARG_POINTER_REGNUM);
5133 /* Return TRUE if this rtx is the difference of a symbol and a label,
5134 and will reduce to a PC-relative relocation in the object file.
5135 Expressions like this can be left alone when generating PIC, rather
5136 than forced through the GOT. */
5138 pcrel_constant_p (rtx x)
5140 if (GET_CODE (x) == MINUS)
5141 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5146 /* Return nonzero if X is a valid ARM state address operand. */
5148 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5152 enum rtx_code code = GET_CODE (x);
5154 if (arm_address_register_rtx_p (x, strict_p))
5157 use_ldrd = (TARGET_LDRD
5159 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5161 if (code == POST_INC || code == PRE_DEC
5162 || ((code == PRE_INC || code == POST_DEC)
5163 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5164 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5166 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5167 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5168 && GET_CODE (XEXP (x, 1)) == PLUS
5169 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5171 rtx addend = XEXP (XEXP (x, 1), 1);
5173 /* Don't allow ldrd post increment by register because it's hard
5174 to fixup invalid register choices. */
5176 && GET_CODE (x) == POST_MODIFY
5177 && GET_CODE (addend) == REG)
5180 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5181 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5184 /* After reload constants split into minipools will have addresses
5185 from a LABEL_REF. */
5186 else if (reload_completed
5187 && (code == LABEL_REF
5189 && GET_CODE (XEXP (x, 0)) == PLUS
5190 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5191 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5194 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5197 else if (code == PLUS)
5199 rtx xop0 = XEXP (x, 0);
5200 rtx xop1 = XEXP (x, 1);
5202 return ((arm_address_register_rtx_p (xop0, strict_p)
5203 && GET_CODE(xop1) == CONST_INT
5204 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5205 || (arm_address_register_rtx_p (xop1, strict_p)
5206 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5210 /* Reload currently can't handle MINUS, so disable this for now */
5211 else if (GET_CODE (x) == MINUS)
5213 rtx xop0 = XEXP (x, 0);
5214 rtx xop1 = XEXP (x, 1);
5216 return (arm_address_register_rtx_p (xop0, strict_p)
5217 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5221 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5222 && code == SYMBOL_REF
5223 && CONSTANT_POOL_ADDRESS_P (x)
5225 && symbol_mentioned_p (get_pool_constant (x))
5226 && ! pcrel_constant_p (get_pool_constant (x))))
5232 /* Return nonzero if X is a valid Thumb-2 address operand. */
5234 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5237 enum rtx_code code = GET_CODE (x);
5239 if (arm_address_register_rtx_p (x, strict_p))
5242 use_ldrd = (TARGET_LDRD
5244 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5246 if (code == POST_INC || code == PRE_DEC
5247 || ((code == PRE_INC || code == POST_DEC)
5248 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5249 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5251 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5252 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5253 && GET_CODE (XEXP (x, 1)) == PLUS
5254 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5256 /* Thumb-2 only has autoincrement by constant. */
5257 rtx addend = XEXP (XEXP (x, 1), 1);
5258 HOST_WIDE_INT offset;
5260 if (GET_CODE (addend) != CONST_INT)
5263 offset = INTVAL(addend);
5264 if (GET_MODE_SIZE (mode) <= 4)
5265 return (offset > -256 && offset < 256);
5267 return (use_ldrd && offset > -1024 && offset < 1024
5268 && (offset & 3) == 0);
5271 /* After reload constants split into minipools will have addresses
5272 from a LABEL_REF. */
5273 else if (reload_completed
5274 && (code == LABEL_REF
5276 && GET_CODE (XEXP (x, 0)) == PLUS
5277 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5278 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5281 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5284 else if (code == PLUS)
5286 rtx xop0 = XEXP (x, 0);
5287 rtx xop1 = XEXP (x, 1);
5289 return ((arm_address_register_rtx_p (xop0, strict_p)
5290 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5291 || (arm_address_register_rtx_p (xop1, strict_p)
5292 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5295 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5296 && code == SYMBOL_REF
5297 && CONSTANT_POOL_ADDRESS_P (x)
5299 && symbol_mentioned_p (get_pool_constant (x))
5300 && ! pcrel_constant_p (get_pool_constant (x))))
5306 /* Return nonzero if INDEX is valid for an address index operand in
5309 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5312 HOST_WIDE_INT range;
5313 enum rtx_code code = GET_CODE (index);
5315 /* Standard coprocessor addressing modes. */
5316 if (TARGET_HARD_FLOAT
5317 && (TARGET_FPA || TARGET_MAVERICK)
5318 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5319 || (TARGET_MAVERICK && mode == DImode)))
5320 return (code == CONST_INT && INTVAL (index) < 1024
5321 && INTVAL (index) > -1024
5322 && (INTVAL (index) & 3) == 0);
5325 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5326 return (code == CONST_INT
5327 && INTVAL (index) < 1016
5328 && INTVAL (index) > -1024
5329 && (INTVAL (index) & 3) == 0);
5331 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5332 return (code == CONST_INT
5333 && INTVAL (index) < 1024
5334 && INTVAL (index) > -1024
5335 && (INTVAL (index) & 3) == 0);
5337 if (arm_address_register_rtx_p (index, strict_p)
5338 && (GET_MODE_SIZE (mode) <= 4))
5341 if (mode == DImode || mode == DFmode)
5343 if (code == CONST_INT)
5345 HOST_WIDE_INT val = INTVAL (index);
5348 return val > -256 && val < 256;
5350 return val > -4096 && val < 4092;
5353 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5356 if (GET_MODE_SIZE (mode) <= 4
5360 || (mode == QImode && outer == SIGN_EXTEND))))
5364 rtx xiop0 = XEXP (index, 0);
5365 rtx xiop1 = XEXP (index, 1);
5367 return ((arm_address_register_rtx_p (xiop0, strict_p)
5368 && power_of_two_operand (xiop1, SImode))
5369 || (arm_address_register_rtx_p (xiop1, strict_p)
5370 && power_of_two_operand (xiop0, SImode)));
5372 else if (code == LSHIFTRT || code == ASHIFTRT
5373 || code == ASHIFT || code == ROTATERT)
5375 rtx op = XEXP (index, 1);
5377 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5378 && GET_CODE (op) == CONST_INT
5380 && INTVAL (op) <= 31);
5384 /* For ARM v4 we may be doing a sign-extend operation during the
5390 || (outer == SIGN_EXTEND && mode == QImode))
5396 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5398 return (code == CONST_INT
5399 && INTVAL (index) < range
5400 && INTVAL (index) > -range);
5403 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5404 index operand. i.e. 1, 2, 4 or 8. */
5406 thumb2_index_mul_operand (rtx op)
5410 if (GET_CODE(op) != CONST_INT)
5414 return (val == 1 || val == 2 || val == 4 || val == 8);
5417 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5419 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5421 enum rtx_code code = GET_CODE (index);
5423 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5424 /* Standard coprocessor addressing modes. */
5425 if (TARGET_HARD_FLOAT
5426 && (TARGET_FPA || TARGET_MAVERICK)
5427 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5428 || (TARGET_MAVERICK && mode == DImode)))
5429 return (code == CONST_INT && INTVAL (index) < 1024
5430 && INTVAL (index) > -1024
5431 && (INTVAL (index) & 3) == 0);
5433 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5435 /* For DImode assume values will usually live in core regs
5436 and only allow LDRD addressing modes. */
5437 if (!TARGET_LDRD || mode != DImode)
5438 return (code == CONST_INT
5439 && INTVAL (index) < 1024
5440 && INTVAL (index) > -1024
5441 && (INTVAL (index) & 3) == 0);
5445 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5446 return (code == CONST_INT
5447 && INTVAL (index) < 1016
5448 && INTVAL (index) > -1024
5449 && (INTVAL (index) & 3) == 0);
5451 if (arm_address_register_rtx_p (index, strict_p)
5452 && (GET_MODE_SIZE (mode) <= 4))
5455 if (mode == DImode || mode == DFmode)
5457 if (code == CONST_INT)
5459 HOST_WIDE_INT val = INTVAL (index);
5460 /* ??? Can we assume ldrd for thumb2? */
5461 /* Thumb-2 ldrd only has reg+const addressing modes. */
5462 /* ldrd supports offsets of +-1020.
5463 However the ldr fallback does not. */
5464 return val > -256 && val < 256 && (val & 3) == 0;
5472 rtx xiop0 = XEXP (index, 0);
5473 rtx xiop1 = XEXP (index, 1);
5475 return ((arm_address_register_rtx_p (xiop0, strict_p)
5476 && thumb2_index_mul_operand (xiop1))
5477 || (arm_address_register_rtx_p (xiop1, strict_p)
5478 && thumb2_index_mul_operand (xiop0)));
5480 else if (code == ASHIFT)
5482 rtx op = XEXP (index, 1);
5484 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5485 && GET_CODE (op) == CONST_INT
5487 && INTVAL (op) <= 3);
5490 return (code == CONST_INT
5491 && INTVAL (index) < 4096
5492 && INTVAL (index) > -256);
5495 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5497 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5501 if (GET_CODE (x) != REG)
5507 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5509 return (regno <= LAST_LO_REGNUM
5510 || regno > LAST_VIRTUAL_REGISTER
5511 || regno == FRAME_POINTER_REGNUM
5512 || (GET_MODE_SIZE (mode) >= 4
5513 && (regno == STACK_POINTER_REGNUM
5514 || regno >= FIRST_PSEUDO_REGISTER
5515 || x == hard_frame_pointer_rtx
5516 || x == arg_pointer_rtx)));
5519 /* Return nonzero if x is a legitimate index register. This is the case
5520 for any base register that can access a QImode object. */
5522 thumb1_index_register_rtx_p (rtx x, int strict_p)
5524 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5527 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5529 The AP may be eliminated to either the SP or the FP, so we use the
5530 least common denominator, e.g. SImode, and offsets from 0 to 64.
5532 ??? Verify whether the above is the right approach.
5534 ??? Also, the FP may be eliminated to the SP, so perhaps that
5535 needs special handling also.
5537 ??? Look at how the mips16 port solves this problem. It probably uses
5538 better ways to solve some of these problems.
5540 Although it is not incorrect, we don't accept QImode and HImode
5541 addresses based on the frame pointer or arg pointer until the
5542 reload pass starts. This is so that eliminating such addresses
5543 into stack based ones won't produce impossible code. */
5545 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5547 /* ??? Not clear if this is right. Experiment. */
5548 if (GET_MODE_SIZE (mode) < 4
5549 && !(reload_in_progress || reload_completed)
5550 && (reg_mentioned_p (frame_pointer_rtx, x)
5551 || reg_mentioned_p (arg_pointer_rtx, x)
5552 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5553 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5554 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5555 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5558 /* Accept any base register. SP only in SImode or larger. */
5559 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5562 /* This is PC relative data before arm_reorg runs. */
5563 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5564 && GET_CODE (x) == SYMBOL_REF
5565 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5568 /* This is PC relative data after arm_reorg runs. */
5569 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5571 && (GET_CODE (x) == LABEL_REF
5572 || (GET_CODE (x) == CONST
5573 && GET_CODE (XEXP (x, 0)) == PLUS
5574 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5575 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5578 /* Post-inc indexing only supported for SImode and larger. */
5579 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5580 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5583 else if (GET_CODE (x) == PLUS)
5585 /* REG+REG address can be any two index registers. */
5586 /* We disallow FRAME+REG addressing since we know that FRAME
5587 will be replaced with STACK, and SP relative addressing only
5588 permits SP+OFFSET. */
5589 if (GET_MODE_SIZE (mode) <= 4
5590 && XEXP (x, 0) != frame_pointer_rtx
5591 && XEXP (x, 1) != frame_pointer_rtx
5592 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5593 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5596 /* REG+const has 5-7 bit offset for non-SP registers. */
5597 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5598 || XEXP (x, 0) == arg_pointer_rtx)
5599 && GET_CODE (XEXP (x, 1)) == CONST_INT
5600 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5603 /* REG+const has 10-bit offset for SP, but only SImode and
5604 larger is supported. */
5605 /* ??? Should probably check for DI/DFmode overflow here
5606 just like GO_IF_LEGITIMATE_OFFSET does. */
5607 else if (GET_CODE (XEXP (x, 0)) == REG
5608 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5609 && GET_MODE_SIZE (mode) >= 4
5610 && GET_CODE (XEXP (x, 1)) == CONST_INT
5611 && INTVAL (XEXP (x, 1)) >= 0
5612 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5613 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5616 else if (GET_CODE (XEXP (x, 0)) == REG
5617 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5618 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5619 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5620 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5621 && GET_MODE_SIZE (mode) >= 4
5622 && GET_CODE (XEXP (x, 1)) == CONST_INT
5623 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5627 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5628 && GET_MODE_SIZE (mode) == 4
5629 && GET_CODE (x) == SYMBOL_REF
5630 && CONSTANT_POOL_ADDRESS_P (x)
5632 && symbol_mentioned_p (get_pool_constant (x))
5633 && ! pcrel_constant_p (get_pool_constant (x))))
5639 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5640 instruction of mode MODE. */
5642 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5644 switch (GET_MODE_SIZE (mode))
5647 return val >= 0 && val < 32;
5650 return val >= 0 && val < 64 && (val & 1) == 0;
5654 && (val + GET_MODE_SIZE (mode)) <= 128
5660 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5663 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5664 else if (TARGET_THUMB2)
5665 return thumb2_legitimate_address_p (mode, x, strict_p);
5666 else /* if (TARGET_THUMB1) */
5667 return thumb1_legitimate_address_p (mode, x, strict_p);
5670 /* Build the SYMBOL_REF for __tls_get_addr. */
5672 static GTY(()) rtx tls_get_addr_libfunc;
5675 get_tls_get_addr (void)
5677 if (!tls_get_addr_libfunc)
5678 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5679 return tls_get_addr_libfunc;
5683 arm_load_tp (rtx target)
5686 target = gen_reg_rtx (SImode);
5690 /* Can return in any reg. */
5691 emit_insn (gen_load_tp_hard (target));
5695 /* Always returned in r0. Immediately copy the result into a pseudo,
5696 otherwise other uses of r0 (e.g. setting up function arguments) may
5697 clobber the value. */
5701 emit_insn (gen_load_tp_soft ());
5703 tmp = gen_rtx_REG (SImode, 0);
5704 emit_move_insn (target, tmp);
5710 load_tls_operand (rtx x, rtx reg)
5714 if (reg == NULL_RTX)
5715 reg = gen_reg_rtx (SImode);
5717 tmp = gen_rtx_CONST (SImode, x);
5719 emit_move_insn (reg, tmp);
5725 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5727 rtx insns, label, labelno, sum;
5731 labelno = GEN_INT (pic_labelno++);
5732 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5733 label = gen_rtx_CONST (VOIDmode, label);
5735 sum = gen_rtx_UNSPEC (Pmode,
5736 gen_rtvec (4, x, GEN_INT (reloc), label,
5737 GEN_INT (TARGET_ARM ? 8 : 4)),
5739 reg = load_tls_operand (sum, reg);
5742 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5743 else if (TARGET_THUMB2)
5746 /* Thumb-2 only allows very limited access to the PC. Calculate
5747 the address in a temporary register. */
5748 tmp = gen_reg_rtx (SImode);
5749 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5750 emit_insn (gen_addsi3(reg, reg, tmp));
5752 else /* TARGET_THUMB1 */
5753 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5755 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5756 Pmode, 1, reg, Pmode);
5758 insns = get_insns ();
5765 legitimize_tls_address (rtx x, rtx reg)
5767 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5768 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5772 case TLS_MODEL_GLOBAL_DYNAMIC:
5773 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5774 dest = gen_reg_rtx (Pmode);
5775 emit_libcall_block (insns, dest, ret, x);
5778 case TLS_MODEL_LOCAL_DYNAMIC:
5779 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5781 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5782 share the LDM result with other LD model accesses. */
5783 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5785 dest = gen_reg_rtx (Pmode);
5786 emit_libcall_block (insns, dest, ret, eqv);
5788 /* Load the addend. */
5789 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5791 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5792 return gen_rtx_PLUS (Pmode, dest, addend);
5794 case TLS_MODEL_INITIAL_EXEC:
5795 labelno = GEN_INT (pic_labelno++);
5796 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5797 label = gen_rtx_CONST (VOIDmode, label);
5798 sum = gen_rtx_UNSPEC (Pmode,
5799 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5800 GEN_INT (TARGET_ARM ? 8 : 4)),
5802 reg = load_tls_operand (sum, reg);
5805 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5806 else if (TARGET_THUMB2)
5809 /* Thumb-2 only allows very limited access to the PC. Calculate
5810 the address in a temporary register. */
5811 tmp = gen_reg_rtx (SImode);
5812 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5813 emit_insn (gen_addsi3(reg, reg, tmp));
5814 emit_move_insn (reg, gen_const_mem (SImode, reg));
5818 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5819 emit_move_insn (reg, gen_const_mem (SImode, reg));
5822 tp = arm_load_tp (NULL_RTX);
5824 return gen_rtx_PLUS (Pmode, tp, reg);
5826 case TLS_MODEL_LOCAL_EXEC:
5827 tp = arm_load_tp (NULL_RTX);
5829 reg = gen_rtx_UNSPEC (Pmode,
5830 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5832 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5834 return gen_rtx_PLUS (Pmode, tp, reg);
5841 /* Try machine-dependent ways of modifying an illegitimate address
5842 to be legitimate. If we find one, return the new, valid address. */
5844 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5848 /* TODO: legitimize_address for Thumb2. */
5851 return thumb_legitimize_address (x, orig_x, mode);
5854 if (arm_tls_symbol_p (x))
5855 return legitimize_tls_address (x, NULL_RTX);
5857 if (GET_CODE (x) == PLUS)
5859 rtx xop0 = XEXP (x, 0);
5860 rtx xop1 = XEXP (x, 1);
5862 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5863 xop0 = force_reg (SImode, xop0);
5865 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5866 xop1 = force_reg (SImode, xop1);
5868 if (ARM_BASE_REGISTER_RTX_P (xop0)
5869 && GET_CODE (xop1) == CONST_INT)
5871 HOST_WIDE_INT n, low_n;
5875 /* VFP addressing modes actually allow greater offsets, but for
5876 now we just stick with the lowest common denominator. */
5878 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5890 low_n = ((mode) == TImode ? 0
5891 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5895 base_reg = gen_reg_rtx (SImode);
5896 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5897 emit_move_insn (base_reg, val);
5898 x = plus_constant (base_reg, low_n);
5900 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5901 x = gen_rtx_PLUS (SImode, xop0, xop1);
5904 /* XXX We don't allow MINUS any more -- see comment in
5905 arm_legitimate_address_outer_p (). */
5906 else if (GET_CODE (x) == MINUS)
5908 rtx xop0 = XEXP (x, 0);
5909 rtx xop1 = XEXP (x, 1);
5911 if (CONSTANT_P (xop0))
5912 xop0 = force_reg (SImode, xop0);
5914 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5915 xop1 = force_reg (SImode, xop1);
5917 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5918 x = gen_rtx_MINUS (SImode, xop0, xop1);
5921 /* Make sure to take full advantage of the pre-indexed addressing mode
5922 with absolute addresses which often allows for the base register to
5923 be factorized for multiple adjacent memory references, and it might
5924 even allows for the mini pool to be avoided entirely. */
5925 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5928 HOST_WIDE_INT mask, base, index;
5931 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5932 use a 8-bit index. So let's use a 12-bit index for SImode only and
5933 hope that arm_gen_constant will enable ldrb to use more bits. */
5934 bits = (mode == SImode) ? 12 : 8;
5935 mask = (1 << bits) - 1;
5936 base = INTVAL (x) & ~mask;
5937 index = INTVAL (x) & mask;
5938 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5940 /* It'll most probably be more efficient to generate the base
5941 with more bits set and use a negative index instead. */
5945 base_reg = force_reg (SImode, GEN_INT (base));
5946 x = plus_constant (base_reg, index);
5951 /* We need to find and carefully transform any SYMBOL and LABEL
5952 references; so go back to the original address expression. */
5953 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5955 if (new_x != orig_x)
5963 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5964 to be legitimate. If we find one, return the new, valid address. */
5966 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5968 if (arm_tls_symbol_p (x))
5969 return legitimize_tls_address (x, NULL_RTX);
5971 if (GET_CODE (x) == PLUS
5972 && GET_CODE (XEXP (x, 1)) == CONST_INT
5973 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
5974 || INTVAL (XEXP (x, 1)) < 0))
5976 rtx xop0 = XEXP (x, 0);
5977 rtx xop1 = XEXP (x, 1);
5978 HOST_WIDE_INT offset = INTVAL (xop1);
5980 /* Try and fold the offset into a biasing of the base register and
5981 then offsetting that. Don't do this when optimizing for space
5982 since it can cause too many CSEs. */
5983 if (optimize_size && offset >= 0
5984 && offset < 256 + 31 * GET_MODE_SIZE (mode))
5986 HOST_WIDE_INT delta;
5989 delta = offset - (256 - GET_MODE_SIZE (mode));
5990 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
5991 delta = 31 * GET_MODE_SIZE (mode);
5993 delta = offset & (~31 * GET_MODE_SIZE (mode));
5995 xop0 = force_operand (plus_constant (xop0, offset - delta),
5997 x = plus_constant (xop0, delta);
5999 else if (offset < 0 && offset > -256)
6000 /* Small negative offsets are best done with a subtract before the
6001 dereference, forcing these into a register normally takes two
6003 x = force_operand (x, NULL_RTX);
6006 /* For the remaining cases, force the constant into a register. */
6007 xop1 = force_reg (SImode, xop1);
6008 x = gen_rtx_PLUS (SImode, xop0, xop1);
6011 else if (GET_CODE (x) == PLUS
6012 && s_register_operand (XEXP (x, 1), SImode)
6013 && !s_register_operand (XEXP (x, 0), SImode))
6015 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6017 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6022 /* We need to find and carefully transform any SYMBOL and LABEL
6023 references; so go back to the original address expression. */
6024 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6026 if (new_x != orig_x)
6034 thumb_legitimize_reload_address (rtx *x_p,
6035 enum machine_mode mode,
6036 int opnum, int type,
6037 int ind_levels ATTRIBUTE_UNUSED)
6041 if (GET_CODE (x) == PLUS
6042 && GET_MODE_SIZE (mode) < 4
6043 && REG_P (XEXP (x, 0))
6044 && XEXP (x, 0) == stack_pointer_rtx
6045 && GET_CODE (XEXP (x, 1)) == CONST_INT
6046 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6051 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6052 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6056 /* If both registers are hi-regs, then it's better to reload the
6057 entire expression rather than each register individually. That
6058 only requires one reload register rather than two. */
6059 if (GET_CODE (x) == PLUS
6060 && REG_P (XEXP (x, 0))
6061 && REG_P (XEXP (x, 1))
6062 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6063 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6068 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6069 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6076 /* Test for various thread-local symbols. */
6078 /* Return TRUE if X is a thread-local symbol. */
6081 arm_tls_symbol_p (rtx x)
6083 if (! TARGET_HAVE_TLS)
6086 if (GET_CODE (x) != SYMBOL_REF)
6089 return SYMBOL_REF_TLS_MODEL (x) != 0;
6092 /* Helper for arm_tls_referenced_p. */
6095 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6097 if (GET_CODE (*x) == SYMBOL_REF)
6098 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6100 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6101 TLS offsets, not real symbol references. */
6102 if (GET_CODE (*x) == UNSPEC
6103 && XINT (*x, 1) == UNSPEC_TLS)
6109 /* Return TRUE if X contains any TLS symbol references. */
6112 arm_tls_referenced_p (rtx x)
6114 if (! TARGET_HAVE_TLS)
6117 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6120 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6123 arm_cannot_force_const_mem (rtx x)
6127 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6129 split_const (x, &base, &offset);
6130 if (GET_CODE (base) == SYMBOL_REF
6131 && !offset_within_block_p (base, INTVAL (offset)))
6134 return arm_tls_referenced_p (x);
6137 #define REG_OR_SUBREG_REG(X) \
6138 (GET_CODE (X) == REG \
6139 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6141 #define REG_OR_SUBREG_RTX(X) \
6142 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6144 #ifndef COSTS_N_INSNS
6145 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6148 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6150 enum machine_mode mode = GET_MODE (x);
6163 return COSTS_N_INSNS (1);
6166 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6169 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6176 return COSTS_N_INSNS (2) + cycles;
6178 return COSTS_N_INSNS (1) + 16;
6181 return (COSTS_N_INSNS (1)
6182 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6183 + GET_CODE (SET_DEST (x)) == MEM));
6188 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6190 if (thumb_shiftable_const (INTVAL (x)))
6191 return COSTS_N_INSNS (2);
6192 return COSTS_N_INSNS (3);
6194 else if ((outer == PLUS || outer == COMPARE)
6195 && INTVAL (x) < 256 && INTVAL (x) > -256)
6197 else if (outer == AND
6198 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6199 return COSTS_N_INSNS (1);
6200 else if (outer == ASHIFT || outer == ASHIFTRT
6201 || outer == LSHIFTRT)
6203 return COSTS_N_INSNS (2);
6209 return COSTS_N_INSNS (3);
6227 /* XXX another guess. */
6228 /* Memory costs quite a lot for the first word, but subsequent words
6229 load at the equivalent of a single insn each. */
6230 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6231 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6236 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6241 /* XXX still guessing. */
6242 switch (GET_MODE (XEXP (x, 0)))
6245 return (1 + (mode == DImode ? 4 : 0)
6246 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6249 return (4 + (mode == DImode ? 4 : 0)
6250 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6253 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6265 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6267 enum machine_mode mode = GET_MODE (x);
6268 enum rtx_code subcode;
6270 enum rtx_code code = GET_CODE (x);
6277 /* Memory costs quite a lot for the first word, but subsequent words
6278 load at the equivalent of a single insn each. */
6279 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6286 if (TARGET_HARD_FLOAT && mode == SFmode)
6287 *total = COSTS_N_INSNS (2);
6288 else if (TARGET_HARD_FLOAT && mode == DFmode)
6289 *total = COSTS_N_INSNS (4);
6291 *total = COSTS_N_INSNS (20);
6295 if (GET_CODE (XEXP (x, 1)) == REG)
6296 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6297 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6298 *total = rtx_cost (XEXP (x, 1), code, speed);
6304 *total += COSTS_N_INSNS (4);
6309 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6310 *total += rtx_cost (XEXP (x, 0), code, speed);
6313 *total += COSTS_N_INSNS (3);
6317 *total += COSTS_N_INSNS (1);
6318 /* Increase the cost of complex shifts because they aren't any faster,
6319 and reduce dual issue opportunities. */
6320 if (arm_tune_cortex_a9
6321 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6329 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6331 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6332 *total = COSTS_N_INSNS (1);
6334 *total = COSTS_N_INSNS (20);
6337 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6338 /* Thumb2 does not have RSB, so all arguments must be
6339 registers (subtracting a constant is canonicalized as
6340 addition of the negated constant). */
6346 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6347 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6348 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6350 *total += rtx_cost (XEXP (x, 1), code, speed);
6354 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6355 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6357 *total += rtx_cost (XEXP (x, 0), code, speed);
6364 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6366 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6368 *total = COSTS_N_INSNS (1);
6369 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6370 && arm_const_double_rtx (XEXP (x, 0)))
6372 *total += rtx_cost (XEXP (x, 1), code, speed);
6376 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6377 && arm_const_double_rtx (XEXP (x, 1)))
6379 *total += rtx_cost (XEXP (x, 0), code, speed);
6385 *total = COSTS_N_INSNS (20);
6389 *total = COSTS_N_INSNS (1);
6390 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6391 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6393 *total += rtx_cost (XEXP (x, 1), code, speed);
6397 subcode = GET_CODE (XEXP (x, 1));
6398 if (subcode == ASHIFT || subcode == ASHIFTRT
6399 || subcode == LSHIFTRT
6400 || subcode == ROTATE || subcode == ROTATERT)
6402 *total += rtx_cost (XEXP (x, 0), code, speed);
6403 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6407 /* A shift as a part of RSB costs no more than RSB itself. */
6408 if (GET_CODE (XEXP (x, 0)) == MULT
6409 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6412 *total += rtx_cost (XEXP (x, 1), code, speed);
6417 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6419 *total += rtx_cost (XEXP (x, 0), code, speed);
6420 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6424 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6425 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6427 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6428 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6429 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6430 *total += COSTS_N_INSNS (1);
6438 if (code == PLUS && arm_arch6 && mode == SImode
6439 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6440 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6442 *total = COSTS_N_INSNS (1);
6443 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6445 *total += rtx_cost (XEXP (x, 1), code, speed);
6449 /* MLA: All arguments must be registers. We filter out
6450 multiplication by a power of two, so that we fall down into
6452 if (GET_CODE (XEXP (x, 0)) == MULT
6453 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6455 /* The cost comes from the cost of the multiply. */
6459 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6461 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6463 *total = COSTS_N_INSNS (1);
6464 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6465 && arm_const_double_rtx (XEXP (x, 1)))
6467 *total += rtx_cost (XEXP (x, 0), code, speed);
6474 *total = COSTS_N_INSNS (20);
6478 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6479 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6481 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6482 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6483 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6484 *total += COSTS_N_INSNS (1);
6490 case AND: case XOR: case IOR:
6493 /* Normally the frame registers will be spilt into reg+const during
6494 reload, so it is a bad idea to combine them with other instructions,
6495 since then they might not be moved outside of loops. As a compromise
6496 we allow integration with ops that have a constant as their second
6498 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6499 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6500 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6501 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6502 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6507 *total += COSTS_N_INSNS (2);
6508 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6509 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6511 *total += rtx_cost (XEXP (x, 0), code, speed);
6518 *total += COSTS_N_INSNS (1);
6519 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6520 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6522 *total += rtx_cost (XEXP (x, 0), code, speed);
6525 subcode = GET_CODE (XEXP (x, 0));
6526 if (subcode == ASHIFT || subcode == ASHIFTRT
6527 || subcode == LSHIFTRT
6528 || subcode == ROTATE || subcode == ROTATERT)
6530 *total += rtx_cost (XEXP (x, 1), code, speed);
6531 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6536 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6538 *total += rtx_cost (XEXP (x, 1), code, speed);
6539 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6543 if (subcode == UMIN || subcode == UMAX
6544 || subcode == SMIN || subcode == SMAX)
6546 *total = COSTS_N_INSNS (3);
6553 /* This should have been handled by the CPU specific routines. */
6557 if (arm_arch3m && mode == SImode
6558 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6560 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6561 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6562 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6563 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6565 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6568 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6572 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6574 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6576 *total = COSTS_N_INSNS (1);
6579 *total = COSTS_N_INSNS (2);
6585 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6586 if (mode == SImode && code == NOT)
6588 subcode = GET_CODE (XEXP (x, 0));
6589 if (subcode == ASHIFT || subcode == ASHIFTRT
6590 || subcode == LSHIFTRT
6591 || subcode == ROTATE || subcode == ROTATERT
6593 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6595 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6596 /* Register shifts cost an extra cycle. */
6597 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6598 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6607 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6609 *total = COSTS_N_INSNS (4);
6613 operand = XEXP (x, 0);
6615 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6616 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6617 && GET_CODE (XEXP (operand, 0)) == REG
6618 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6619 *total += COSTS_N_INSNS (1);
6620 *total += (rtx_cost (XEXP (x, 1), code, speed)
6621 + rtx_cost (XEXP (x, 2), code, speed));
6625 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6627 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6633 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6634 && mode == SImode && XEXP (x, 1) == const0_rtx)
6636 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6642 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6643 && mode == SImode && XEXP (x, 1) == const0_rtx)
6645 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6665 /* SCC insns. In the case where the comparison has already been
6666 performed, then they cost 2 instructions. Otherwise they need
6667 an additional comparison before them. */
6668 *total = COSTS_N_INSNS (2);
6669 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6676 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6682 *total += COSTS_N_INSNS (1);
6683 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6684 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6686 *total += rtx_cost (XEXP (x, 0), code, speed);
6690 subcode = GET_CODE (XEXP (x, 0));
6691 if (subcode == ASHIFT || subcode == ASHIFTRT
6692 || subcode == LSHIFTRT
6693 || subcode == ROTATE || subcode == ROTATERT)
6695 *total += rtx_cost (XEXP (x, 1), code, speed);
6696 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6701 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6703 *total += rtx_cost (XEXP (x, 1), code, speed);
6704 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6714 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6715 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6716 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6717 *total += rtx_cost (XEXP (x, 1), code, speed);
6721 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6723 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6725 *total = COSTS_N_INSNS (1);
6728 *total = COSTS_N_INSNS (20);
6731 *total = COSTS_N_INSNS (1);
6733 *total += COSTS_N_INSNS (3);
6737 if (GET_MODE_CLASS (mode) == MODE_INT)
6741 *total += COSTS_N_INSNS (1);
6743 if (GET_MODE (XEXP (x, 0)) != SImode)
6747 if (GET_CODE (XEXP (x, 0)) != MEM)
6748 *total += COSTS_N_INSNS (1);
6750 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6751 *total += COSTS_N_INSNS (2);
6760 if (GET_MODE_CLASS (mode) == MODE_INT)
6763 *total += COSTS_N_INSNS (1);
6765 if (GET_MODE (XEXP (x, 0)) != SImode)
6769 if (GET_CODE (XEXP (x, 0)) != MEM)
6770 *total += COSTS_N_INSNS (1);
6772 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6773 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6780 switch (GET_MODE (XEXP (x, 0)))
6787 *total = COSTS_N_INSNS (1);
6797 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6801 if (const_ok_for_arm (INTVAL (x))
6802 || const_ok_for_arm (~INTVAL (x)))
6803 *total = COSTS_N_INSNS (1);
6805 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6806 INTVAL (x), NULL_RTX,
6813 *total = COSTS_N_INSNS (3);
6817 *total = COSTS_N_INSNS (1);
6821 *total = COSTS_N_INSNS (1);
6822 *total += rtx_cost (XEXP (x, 0), code, speed);
6826 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
6827 *total = COSTS_N_INSNS (1);
6829 *total = COSTS_N_INSNS (4);
6833 *total = COSTS_N_INSNS (4);
6838 /* RTX costs when optimizing for size. */
6840 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6843 enum machine_mode mode = GET_MODE (x);
6846 /* XXX TBD. For now, use the standard costs. */
6847 *total = thumb1_rtx_costs (x, code, outer_code);
6851 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6855 /* A memory access costs 1 insn if the mode is small, or the address is
6856 a single register, otherwise it costs one insn per word. */
6857 if (REG_P (XEXP (x, 0)))
6858 *total = COSTS_N_INSNS (1);
6860 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6867 /* Needs a libcall, so it costs about this. */
6868 *total = COSTS_N_INSNS (2);
6872 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6874 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6882 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6884 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6887 else if (mode == SImode)
6889 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6890 /* Slightly disparage register shifts, but not by much. */
6891 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6892 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6896 /* Needs a libcall. */
6897 *total = COSTS_N_INSNS (2);
6901 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6903 *total = COSTS_N_INSNS (1);
6909 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6910 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6912 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6913 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6914 || subcode1 == ROTATE || subcode1 == ROTATERT
6915 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6916 || subcode1 == ASHIFTRT)
6918 /* It's just the cost of the two operands. */
6923 *total = COSTS_N_INSNS (1);
6927 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6931 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6933 *total = COSTS_N_INSNS (1);
6937 /* A shift as a part of ADD costs nothing. */
6938 if (GET_CODE (XEXP (x, 0)) == MULT
6939 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6941 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6942 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6943 *total += rtx_cost (XEXP (x, 1), code, false);
6948 case AND: case XOR: case IOR:
6951 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
6953 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
6954 || subcode == LSHIFTRT || subcode == ASHIFTRT
6955 || (code == AND && subcode == NOT))
6957 /* It's just the cost of the two operands. */
6963 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6967 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6971 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6973 *total = COSTS_N_INSNS (1);
6979 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6988 if (cc_register (XEXP (x, 0), VOIDmode))
6991 *total = COSTS_N_INSNS (1);
6995 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6996 *total = COSTS_N_INSNS (1);
6998 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7003 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7005 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7006 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7009 *total += COSTS_N_INSNS (1);
7014 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7016 switch (GET_MODE (XEXP (x, 0)))
7019 *total += COSTS_N_INSNS (1);
7023 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7029 *total += COSTS_N_INSNS (2);
7034 *total += COSTS_N_INSNS (1);
7039 if (const_ok_for_arm (INTVAL (x)))
7040 /* A multiplication by a constant requires another instruction
7041 to load the constant to a register. */
7042 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7044 else if (const_ok_for_arm (~INTVAL (x)))
7045 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7046 else if (const_ok_for_arm (-INTVAL (x)))
7048 if (outer_code == COMPARE || outer_code == PLUS
7049 || outer_code == MINUS)
7052 *total = COSTS_N_INSNS (1);
7055 *total = COSTS_N_INSNS (2);
7061 *total = COSTS_N_INSNS (2);
7065 *total = COSTS_N_INSNS (4);
7070 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7071 cost of these slightly. */
7072 *total = COSTS_N_INSNS (1) + 1;
7076 if (mode != VOIDmode)
7077 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7079 *total = COSTS_N_INSNS (4); /* How knows? */
7084 /* RTX costs when optimizing for size. */
7086 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7090 return arm_size_rtx_costs (x, (enum rtx_code) code,
7091 (enum rtx_code) outer_code, total);
7093 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7094 (enum rtx_code) outer_code,
7098 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7099 supported on any "slowmul" cores, so it can be ignored. */
7102 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7103 int *total, bool speed)
7105 enum machine_mode mode = GET_MODE (x);
7109 *total = thumb1_rtx_costs (x, code, outer_code);
7116 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7119 *total = COSTS_N_INSNS (20);
7123 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7125 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7126 & (unsigned HOST_WIDE_INT) 0xffffffff);
7127 int cost, const_ok = const_ok_for_arm (i);
7128 int j, booth_unit_size;
7130 /* Tune as appropriate. */
7131 cost = const_ok ? 4 : 8;
7132 booth_unit_size = 2;
7133 for (j = 0; i && j < 32; j += booth_unit_size)
7135 i >>= booth_unit_size;
7139 *total = COSTS_N_INSNS (cost);
7140 *total += rtx_cost (XEXP (x, 0), code, speed);
7144 *total = COSTS_N_INSNS (20);
7148 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7153 /* RTX cost for cores with a fast multiply unit (M variants). */
7156 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7157 int *total, bool speed)
7159 enum machine_mode mode = GET_MODE (x);
7163 *total = thumb1_rtx_costs (x, code, outer_code);
7167 /* ??? should thumb2 use different costs? */
7171 /* There is no point basing this on the tuning, since it is always the
7172 fast variant if it exists at all. */
7174 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7175 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7176 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7178 *total = COSTS_N_INSNS(2);
7185 *total = COSTS_N_INSNS (5);
7189 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7191 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7192 & (unsigned HOST_WIDE_INT) 0xffffffff);
7193 int cost, const_ok = const_ok_for_arm (i);
7194 int j, booth_unit_size;
7196 /* Tune as appropriate. */
7197 cost = const_ok ? 4 : 8;
7198 booth_unit_size = 8;
7199 for (j = 0; i && j < 32; j += booth_unit_size)
7201 i >>= booth_unit_size;
7205 *total = COSTS_N_INSNS(cost);
7211 *total = COSTS_N_INSNS (4);
7215 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7217 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7219 *total = COSTS_N_INSNS (1);
7224 /* Requires a lib call */
7225 *total = COSTS_N_INSNS (20);
7229 return arm_rtx_costs_1 (x, outer_code, total, speed);
7234 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7235 so it can be ignored. */
7238 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7240 enum machine_mode mode = GET_MODE (x);
7244 *total = thumb1_rtx_costs (x, code, outer_code);
7251 if (GET_CODE (XEXP (x, 0)) != MULT)
7252 return arm_rtx_costs_1 (x, outer_code, total, speed);
7254 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7255 will stall until the multiplication is complete. */
7256 *total = COSTS_N_INSNS (3);
7260 /* There is no point basing this on the tuning, since it is always the
7261 fast variant if it exists at all. */
7263 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7264 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7265 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7267 *total = COSTS_N_INSNS (2);
7274 *total = COSTS_N_INSNS (5);
7278 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7280 /* If operand 1 is a constant we can more accurately
7281 calculate the cost of the multiply. The multiplier can
7282 retire 15 bits on the first cycle and a further 12 on the
7283 second. We do, of course, have to load the constant into
7284 a register first. */
7285 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7286 /* There's a general overhead of one cycle. */
7288 unsigned HOST_WIDE_INT masked_const;
7293 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7295 masked_const = i & 0xffff8000;
7296 if (masked_const != 0)
7299 masked_const = i & 0xf8000000;
7300 if (masked_const != 0)
7303 *total = COSTS_N_INSNS (cost);
7309 *total = COSTS_N_INSNS (3);
7313 /* Requires a lib call */
7314 *total = COSTS_N_INSNS (20);
7318 return arm_rtx_costs_1 (x, outer_code, total, speed);
7323 /* RTX costs for 9e (and later) cores. */
7326 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7327 int *total, bool speed)
7329 enum machine_mode mode = GET_MODE (x);
7336 *total = COSTS_N_INSNS (3);
7340 *total = thumb1_rtx_costs (x, code, outer_code);
7348 /* There is no point basing this on the tuning, since it is always the
7349 fast variant if it exists at all. */
7351 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7352 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7353 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7355 *total = COSTS_N_INSNS (2);
7362 *total = COSTS_N_INSNS (5);
7368 *total = COSTS_N_INSNS (2);
7372 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7374 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7376 *total = COSTS_N_INSNS (1);
7381 *total = COSTS_N_INSNS (20);
7385 return arm_rtx_costs_1 (x, outer_code, total, speed);
7388 /* All address computations that can be done are free, but rtx cost returns
7389 the same for practically all of them. So we weight the different types
7390 of address here in the order (most pref first):
7391 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7393 arm_arm_address_cost (rtx x)
7395 enum rtx_code c = GET_CODE (x);
7397 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7399 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7404 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7407 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7417 arm_thumb_address_cost (rtx x)
7419 enum rtx_code c = GET_CODE (x);
7424 && GET_CODE (XEXP (x, 0)) == REG
7425 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7432 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7434 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7438 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7442 /* Some true dependencies can have a higher cost depending
7443 on precisely how certain input operands are used. */
7445 && REG_NOTE_KIND (link) == 0
7446 && recog_memoized (insn) >= 0
7447 && recog_memoized (dep) >= 0)
7449 int shift_opnum = get_attr_shift (insn);
7450 enum attr_type attr_type = get_attr_type (dep);
7452 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7453 operand for INSN. If we have a shifted input operand and the
7454 instruction we depend on is another ALU instruction, then we may
7455 have to account for an additional stall. */
7456 if (shift_opnum != 0
7457 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7459 rtx shifted_operand;
7462 /* Get the shifted operand. */
7463 extract_insn (insn);
7464 shifted_operand = recog_data.operand[shift_opnum];
7466 /* Iterate over all the operands in DEP. If we write an operand
7467 that overlaps with SHIFTED_OPERAND, then we have increase the
7468 cost of this dependency. */
7470 preprocess_constraints ();
7471 for (opno = 0; opno < recog_data.n_operands; opno++)
7473 /* We can ignore strict inputs. */
7474 if (recog_data.operand_type[opno] == OP_IN)
7477 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7484 /* XXX This is not strictly true for the FPA. */
7485 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7486 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7489 /* Call insns don't incur a stall, even if they follow a load. */
7490 if (REG_NOTE_KIND (link) == 0
7491 && GET_CODE (insn) == CALL_INSN)
7494 if ((i_pat = single_set (insn)) != NULL
7495 && GET_CODE (SET_SRC (i_pat)) == MEM
7496 && (d_pat = single_set (dep)) != NULL
7497 && GET_CODE (SET_DEST (d_pat)) == MEM)
7499 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7500 /* This is a load after a store, there is no conflict if the load reads
7501 from a cached area. Assume that loads from the stack, and from the
7502 constant pool are cached, and that others will miss. This is a
7505 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7506 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7507 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7508 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7515 static int fp_consts_inited = 0;
7517 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7518 static const char * const strings_fp[8] =
7521 "4", "5", "0.5", "10"
7524 static REAL_VALUE_TYPE values_fp[8];
7527 init_fp_table (void)
7533 fp_consts_inited = 1;
7535 fp_consts_inited = 8;
7537 for (i = 0; i < fp_consts_inited; i++)
7539 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7544 /* Return TRUE if rtx X is a valid immediate FP constant. */
7546 arm_const_double_rtx (rtx x)
7551 if (!fp_consts_inited)
7554 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7555 if (REAL_VALUE_MINUS_ZERO (r))
7558 for (i = 0; i < fp_consts_inited; i++)
7559 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7565 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7567 neg_const_double_rtx_ok_for_fpa (rtx x)
7572 if (!fp_consts_inited)
7575 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7576 r = REAL_VALUE_NEGATE (r);
7577 if (REAL_VALUE_MINUS_ZERO (r))
7580 for (i = 0; i < 8; i++)
7581 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7588 /* VFPv3 has a fairly wide range of representable immediates, formed from
7589 "quarter-precision" floating-point values. These can be evaluated using this
7590 formula (with ^ for exponentiation):
7594 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7595 16 <= n <= 31 and 0 <= r <= 7.
7597 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7599 - A (most-significant) is the sign bit.
7600 - BCD are the exponent (encoded as r XOR 3).
7601 - EFGH are the mantissa (encoded as n - 16).
7604 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7605 fconst[sd] instruction, or -1 if X isn't suitable. */
7607 vfp3_const_double_index (rtx x)
7609 REAL_VALUE_TYPE r, m;
7611 unsigned HOST_WIDE_INT mantissa, mant_hi;
7612 unsigned HOST_WIDE_INT mask;
7613 HOST_WIDE_INT m1, m2;
7614 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7616 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7619 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7621 /* We can't represent these things, so detect them first. */
7622 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7625 /* Extract sign, exponent and mantissa. */
7626 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7627 r = REAL_VALUE_ABS (r);
7628 exponent = REAL_EXP (&r);
7629 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7630 highest (sign) bit, with a fixed binary point at bit point_pos.
7631 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7632 bits for the mantissa, this may fail (low bits would be lost). */
7633 real_ldexp (&m, &r, point_pos - exponent);
7634 REAL_VALUE_TO_INT (&m1, &m2, m);
7638 /* If there are bits set in the low part of the mantissa, we can't
7639 represent this value. */
7643 /* Now make it so that mantissa contains the most-significant bits, and move
7644 the point_pos to indicate that the least-significant bits have been
7646 point_pos -= HOST_BITS_PER_WIDE_INT;
7649 /* We can permit four significant bits of mantissa only, plus a high bit
7650 which is always 1. */
7651 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7652 if ((mantissa & mask) != 0)
7655 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7656 mantissa >>= point_pos - 5;
7658 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7659 floating-point immediate zero with Neon using an integer-zero load, but
7660 that case is handled elsewhere.) */
7664 gcc_assert (mantissa >= 16 && mantissa <= 31);
7666 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7667 normalized significands are in the range [1, 2). (Our mantissa is shifted
7668 left 4 places at this point relative to normalized IEEE754 values). GCC
7669 internally uses [0.5, 1) (see real.c), so the exponent returned from
7670 REAL_EXP must be altered. */
7671 exponent = 5 - exponent;
7673 if (exponent < 0 || exponent > 7)
7676 /* Sign, mantissa and exponent are now in the correct form to plug into the
7677 formula described in the comment above. */
7678 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7681 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7683 vfp3_const_double_rtx (rtx x)
7688 return vfp3_const_double_index (x) != -1;
7691 /* Recognize immediates which can be used in various Neon instructions. Legal
7692 immediates are described by the following table (for VMVN variants, the
7693 bitwise inverse of the constant shown is recognized. In either case, VMOV
7694 is output and the correct instruction to use for a given constant is chosen
7695 by the assembler). The constant shown is replicated across all elements of
7696 the destination vector.
7698 insn elems variant constant (binary)
7699 ---- ----- ------- -----------------
7700 vmov i32 0 00000000 00000000 00000000 abcdefgh
7701 vmov i32 1 00000000 00000000 abcdefgh 00000000
7702 vmov i32 2 00000000 abcdefgh 00000000 00000000
7703 vmov i32 3 abcdefgh 00000000 00000000 00000000
7704 vmov i16 4 00000000 abcdefgh
7705 vmov i16 5 abcdefgh 00000000
7706 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7707 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7708 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7709 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7710 vmvn i16 10 00000000 abcdefgh
7711 vmvn i16 11 abcdefgh 00000000
7712 vmov i32 12 00000000 00000000 abcdefgh 11111111
7713 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7714 vmov i32 14 00000000 abcdefgh 11111111 11111111
7715 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7717 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7718 eeeeeeee ffffffff gggggggg hhhhhhhh
7719 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7721 For case 18, B = !b. Representable values are exactly those accepted by
7722 vfp3_const_double_index, but are output as floating-point numbers rather
7725 Variants 0-5 (inclusive) may also be used as immediates for the second
7726 operand of VORR/VBIC instructions.
7728 The INVERSE argument causes the bitwise inverse of the given operand to be
7729 recognized instead (used for recognizing legal immediates for the VAND/VORN
7730 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7731 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7732 output, rather than the real insns vbic/vorr).
7734 INVERSE makes no difference to the recognition of float vectors.
7736 The return value is the variant of immediate as shown in the above table, or
7737 -1 if the given value doesn't match any of the listed patterns.
7740 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7741 rtx *modconst, int *elementwidth)
7743 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7745 for (i = 0; i < idx; i += (STRIDE)) \
7750 immtype = (CLASS); \
7751 elsize = (ELSIZE); \
7755 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7756 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7757 unsigned char bytes[16];
7758 int immtype = -1, matches;
7759 unsigned int invmask = inverse ? 0xff : 0;
7761 /* Vectors of float constants. */
7762 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7764 rtx el0 = CONST_VECTOR_ELT (op, 0);
7767 if (!vfp3_const_double_rtx (el0))
7770 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7772 for (i = 1; i < n_elts; i++)
7774 rtx elt = CONST_VECTOR_ELT (op, i);
7777 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7779 if (!REAL_VALUES_EQUAL (r0, re))
7784 *modconst = CONST_VECTOR_ELT (op, 0);
7792 /* Splat vector constant out into a byte vector. */
7793 for (i = 0; i < n_elts; i++)
7795 rtx el = CONST_VECTOR_ELT (op, i);
7796 unsigned HOST_WIDE_INT elpart;
7797 unsigned int part, parts;
7799 if (GET_CODE (el) == CONST_INT)
7801 elpart = INTVAL (el);
7804 else if (GET_CODE (el) == CONST_DOUBLE)
7806 elpart = CONST_DOUBLE_LOW (el);
7812 for (part = 0; part < parts; part++)
7815 for (byte = 0; byte < innersize; byte++)
7817 bytes[idx++] = (elpart & 0xff) ^ invmask;
7818 elpart >>= BITS_PER_UNIT;
7820 if (GET_CODE (el) == CONST_DOUBLE)
7821 elpart = CONST_DOUBLE_HIGH (el);
7826 gcc_assert (idx == GET_MODE_SIZE (mode));
7830 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7831 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7833 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7834 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7836 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7837 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7839 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7840 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7842 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7844 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7846 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7847 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7849 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7850 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7852 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7853 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7855 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7856 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7858 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7860 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7862 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7863 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7865 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7866 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7868 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7869 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7871 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7872 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7874 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7876 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7877 && bytes[i] == bytes[(i + 8) % idx]);
7885 *elementwidth = elsize;
7889 unsigned HOST_WIDE_INT imm = 0;
7891 /* Un-invert bytes of recognized vector, if necessary. */
7893 for (i = 0; i < idx; i++)
7894 bytes[i] ^= invmask;
7898 /* FIXME: Broken on 32-bit H_W_I hosts. */
7899 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7901 for (i = 0; i < 8; i++)
7902 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7903 << (i * BITS_PER_UNIT);
7905 *modconst = GEN_INT (imm);
7909 unsigned HOST_WIDE_INT imm = 0;
7911 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7912 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7914 *modconst = GEN_INT (imm);
7922 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7923 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7924 float elements), and a modified constant (whatever should be output for a
7925 VMOV) in *MODCONST. */
7928 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7929 rtx *modconst, int *elementwidth)
7933 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7939 *modconst = tmpconst;
7942 *elementwidth = tmpwidth;
7947 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
7948 the immediate is valid, write a constant suitable for using as an operand
7949 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
7950 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
7953 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
7954 rtx *modconst, int *elementwidth)
7958 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
7960 if (retval < 0 || retval > 5)
7964 *modconst = tmpconst;
7967 *elementwidth = tmpwidth;
7972 /* Return a string suitable for output of Neon immediate logic operation
7976 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
7977 int inverse, int quad)
7979 int width, is_valid;
7980 static char templ[40];
7982 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
7984 gcc_assert (is_valid != 0);
7987 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
7989 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
7994 /* Output a sequence of pairwise operations to implement a reduction.
7995 NOTE: We do "too much work" here, because pairwise operations work on two
7996 registers-worth of operands in one go. Unfortunately we can't exploit those
7997 extra calculations to do the full operation in fewer steps, I don't think.
7998 Although all vector elements of the result but the first are ignored, we
7999 actually calculate the same result in each of the elements. An alternative
8000 such as initially loading a vector with zero to use as each of the second
8001 operands would use up an additional register and take an extra instruction,
8002 for no particular gain. */
8005 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8006 rtx (*reduc) (rtx, rtx, rtx))
8008 enum machine_mode inner = GET_MODE_INNER (mode);
8009 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8012 for (i = parts / 2; i >= 1; i /= 2)
8014 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8015 emit_insn (reduc (dest, tmpsum, tmpsum));
8020 /* Initialize a vector with non-constant elements. FIXME: We can do better
8021 than the current implementation (building a vector on the stack and then
8022 loading it) in many cases. See rs6000.c. */
8025 neon_expand_vector_init (rtx target, rtx vals)
8027 enum machine_mode mode = GET_MODE (target);
8028 enum machine_mode inner = GET_MODE_INNER (mode);
8029 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
8032 gcc_assert (VECTOR_MODE_P (mode));
8034 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8035 for (i = 0; i < n_elts; i++)
8036 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
8037 XVECEXP (vals, 0, i));
8039 emit_move_insn (target, mem);
8042 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8043 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8044 reported source locations are bogus. */
8047 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8052 gcc_assert (GET_CODE (operand) == CONST_INT);
8054 lane = INTVAL (operand);
8056 if (lane < low || lane >= high)
8060 /* Bounds-check lanes. */
8063 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8065 bounds_check (operand, low, high, "lane out of range");
8068 /* Bounds-check constants. */
8071 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8073 bounds_check (operand, low, high, "constant out of range");
8077 neon_element_bits (enum machine_mode mode)
8080 return GET_MODE_BITSIZE (mode);
8082 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8086 /* Predicates for `match_operand' and `match_operator'. */
8088 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8090 cirrus_memory_offset (rtx op)
8092 /* Reject eliminable registers. */
8093 if (! (reload_in_progress || reload_completed)
8094 && ( reg_mentioned_p (frame_pointer_rtx, op)
8095 || reg_mentioned_p (arg_pointer_rtx, op)
8096 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8097 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8098 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8099 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8102 if (GET_CODE (op) == MEM)
8108 /* Match: (mem (reg)). */
8109 if (GET_CODE (ind) == REG)
8115 if (GET_CODE (ind) == PLUS
8116 && GET_CODE (XEXP (ind, 0)) == REG
8117 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8118 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8125 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8126 WB is true if full writeback address modes are allowed and is false
8127 if limited writeback address modes (POST_INC and PRE_DEC) are
8131 arm_coproc_mem_operand (rtx op, bool wb)
8135 /* Reject eliminable registers. */
8136 if (! (reload_in_progress || reload_completed)
8137 && ( reg_mentioned_p (frame_pointer_rtx, op)
8138 || reg_mentioned_p (arg_pointer_rtx, op)
8139 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8140 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8141 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8142 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8145 /* Constants are converted into offsets from labels. */
8146 if (GET_CODE (op) != MEM)
8151 if (reload_completed
8152 && (GET_CODE (ind) == LABEL_REF
8153 || (GET_CODE (ind) == CONST
8154 && GET_CODE (XEXP (ind, 0)) == PLUS
8155 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8156 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8159 /* Match: (mem (reg)). */
8160 if (GET_CODE (ind) == REG)
8161 return arm_address_register_rtx_p (ind, 0);
8163 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8164 acceptable in any case (subject to verification by
8165 arm_address_register_rtx_p). We need WB to be true to accept
8166 PRE_INC and POST_DEC. */
8167 if (GET_CODE (ind) == POST_INC
8168 || GET_CODE (ind) == PRE_DEC
8170 && (GET_CODE (ind) == PRE_INC
8171 || GET_CODE (ind) == POST_DEC)))
8172 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8175 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8176 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8177 && GET_CODE (XEXP (ind, 1)) == PLUS
8178 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8179 ind = XEXP (ind, 1);
8184 if (GET_CODE (ind) == PLUS
8185 && GET_CODE (XEXP (ind, 0)) == REG
8186 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8187 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8188 && INTVAL (XEXP (ind, 1)) > -1024
8189 && INTVAL (XEXP (ind, 1)) < 1024
8190 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8196 /* Return TRUE if OP is a memory operand which we can load or store a vector
8197 to/from. TYPE is one of the following values:
8198 0 - Vector load/stor (vldr)
8199 1 - Core registers (ldm)
8200 2 - Element/structure loads (vld1)
8203 neon_vector_mem_operand (rtx op, int type)
8207 /* Reject eliminable registers. */
8208 if (! (reload_in_progress || reload_completed)
8209 && ( reg_mentioned_p (frame_pointer_rtx, op)
8210 || reg_mentioned_p (arg_pointer_rtx, op)
8211 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8212 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8213 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8214 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8217 /* Constants are converted into offsets from labels. */
8218 if (GET_CODE (op) != MEM)
8223 if (reload_completed
8224 && (GET_CODE (ind) == LABEL_REF
8225 || (GET_CODE (ind) == CONST
8226 && GET_CODE (XEXP (ind, 0)) == PLUS
8227 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8228 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8231 /* Match: (mem (reg)). */
8232 if (GET_CODE (ind) == REG)
8233 return arm_address_register_rtx_p (ind, 0);
8235 /* Allow post-increment with Neon registers. */
8236 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8237 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8239 /* FIXME: vld1 allows register post-modify. */
8245 && GET_CODE (ind) == PLUS
8246 && GET_CODE (XEXP (ind, 0)) == REG
8247 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8248 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8249 && INTVAL (XEXP (ind, 1)) > -1024
8250 && INTVAL (XEXP (ind, 1)) < 1016
8251 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8257 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8260 neon_struct_mem_operand (rtx op)
8264 /* Reject eliminable registers. */
8265 if (! (reload_in_progress || reload_completed)
8266 && ( reg_mentioned_p (frame_pointer_rtx, op)
8267 || reg_mentioned_p (arg_pointer_rtx, op)
8268 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8269 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8270 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8271 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8274 /* Constants are converted into offsets from labels. */
8275 if (GET_CODE (op) != MEM)
8280 if (reload_completed
8281 && (GET_CODE (ind) == LABEL_REF
8282 || (GET_CODE (ind) == CONST
8283 && GET_CODE (XEXP (ind, 0)) == PLUS
8284 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8285 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8288 /* Match: (mem (reg)). */
8289 if (GET_CODE (ind) == REG)
8290 return arm_address_register_rtx_p (ind, 0);
8295 /* Return true if X is a register that will be eliminated later on. */
8297 arm_eliminable_register (rtx x)
8299 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8300 || REGNO (x) == ARG_POINTER_REGNUM
8301 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8302 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8305 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8306 coprocessor registers. Otherwise return NO_REGS. */
8309 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8313 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8315 return GENERAL_REGS;
8319 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8320 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8321 && neon_vector_mem_operand (x, 0))
8324 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8327 return GENERAL_REGS;
8330 /* Values which must be returned in the most-significant end of the return
8334 arm_return_in_msb (const_tree valtype)
8336 return (TARGET_AAPCS_BASED
8338 && (AGGREGATE_TYPE_P (valtype)
8339 || TREE_CODE (valtype) == COMPLEX_TYPE));
8342 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8343 Use by the Cirrus Maverick code which has to workaround
8344 a hardware bug triggered by such instructions. */
8346 arm_memory_load_p (rtx insn)
8348 rtx body, lhs, rhs;;
8350 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8353 body = PATTERN (insn);
8355 if (GET_CODE (body) != SET)
8358 lhs = XEXP (body, 0);
8359 rhs = XEXP (body, 1);
8361 lhs = REG_OR_SUBREG_RTX (lhs);
8363 /* If the destination is not a general purpose
8364 register we do not have to worry. */
8365 if (GET_CODE (lhs) != REG
8366 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8369 /* As well as loads from memory we also have to react
8370 to loads of invalid constants which will be turned
8371 into loads from the minipool. */
8372 return (GET_CODE (rhs) == MEM
8373 || GET_CODE (rhs) == SYMBOL_REF
8374 || note_invalid_constants (insn, -1, false));
8377 /* Return TRUE if INSN is a Cirrus instruction. */
8379 arm_cirrus_insn_p (rtx insn)
8381 enum attr_cirrus attr;
8383 /* get_attr cannot accept USE or CLOBBER. */
8385 || GET_CODE (insn) != INSN
8386 || GET_CODE (PATTERN (insn)) == USE
8387 || GET_CODE (PATTERN (insn)) == CLOBBER)
8390 attr = get_attr_cirrus (insn);
8392 return attr != CIRRUS_NOT;
8395 /* Cirrus reorg for invalid instruction combinations. */
8397 cirrus_reorg (rtx first)
8399 enum attr_cirrus attr;
8400 rtx body = PATTERN (first);
8404 /* Any branch must be followed by 2 non Cirrus instructions. */
8405 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8408 t = next_nonnote_insn (first);
8410 if (arm_cirrus_insn_p (t))
8413 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8417 emit_insn_after (gen_nop (), first);
8422 /* (float (blah)) is in parallel with a clobber. */
8423 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8424 body = XVECEXP (body, 0, 0);
8426 if (GET_CODE (body) == SET)
8428 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8430 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8431 be followed by a non Cirrus insn. */
8432 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8434 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8435 emit_insn_after (gen_nop (), first);
8439 else if (arm_memory_load_p (first))
8441 unsigned int arm_regno;
8443 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8444 ldr/cfmv64hr combination where the Rd field is the same
8445 in both instructions must be split with a non Cirrus
8452 /* Get Arm register number for ldr insn. */
8453 if (GET_CODE (lhs) == REG)
8454 arm_regno = REGNO (lhs);
8457 gcc_assert (GET_CODE (rhs) == REG);
8458 arm_regno = REGNO (rhs);
8462 first = next_nonnote_insn (first);
8464 if (! arm_cirrus_insn_p (first))
8467 body = PATTERN (first);
8469 /* (float (blah)) is in parallel with a clobber. */
8470 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8471 body = XVECEXP (body, 0, 0);
8473 if (GET_CODE (body) == FLOAT)
8474 body = XEXP (body, 0);
8476 if (get_attr_cirrus (first) == CIRRUS_MOVE
8477 && GET_CODE (XEXP (body, 1)) == REG
8478 && arm_regno == REGNO (XEXP (body, 1)))
8479 emit_insn_after (gen_nop (), first);
8485 /* get_attr cannot accept USE or CLOBBER. */
8487 || GET_CODE (first) != INSN
8488 || GET_CODE (PATTERN (first)) == USE
8489 || GET_CODE (PATTERN (first)) == CLOBBER)
8492 attr = get_attr_cirrus (first);
8494 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8495 must be followed by a non-coprocessor instruction. */
8496 if (attr == CIRRUS_COMPARE)
8500 t = next_nonnote_insn (first);
8502 if (arm_cirrus_insn_p (t))
8505 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8509 emit_insn_after (gen_nop (), first);
8515 /* Return TRUE if X references a SYMBOL_REF. */
8517 symbol_mentioned_p (rtx x)
8522 if (GET_CODE (x) == SYMBOL_REF)
8525 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8526 are constant offsets, not symbols. */
8527 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8530 fmt = GET_RTX_FORMAT (GET_CODE (x));
8532 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8538 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8539 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8542 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8549 /* Return TRUE if X references a LABEL_REF. */
8551 label_mentioned_p (rtx x)
8556 if (GET_CODE (x) == LABEL_REF)
8559 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8560 instruction, but they are constant offsets, not symbols. */
8561 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8564 fmt = GET_RTX_FORMAT (GET_CODE (x));
8565 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8571 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8572 if (label_mentioned_p (XVECEXP (x, i, j)))
8575 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8583 tls_mentioned_p (rtx x)
8585 switch (GET_CODE (x))
8588 return tls_mentioned_p (XEXP (x, 0));
8591 if (XINT (x, 1) == UNSPEC_TLS)
8599 /* Must not copy a SET whose source operand is PC-relative. */
8602 arm_cannot_copy_insn_p (rtx insn)
8604 rtx pat = PATTERN (insn);
8606 if (GET_CODE (pat) == SET)
8608 rtx rhs = SET_SRC (pat);
8610 if (GET_CODE (rhs) == UNSPEC
8611 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8614 if (GET_CODE (rhs) == MEM
8615 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8616 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8626 enum rtx_code code = GET_CODE (x);
8643 /* Return 1 if memory locations are adjacent. */
8645 adjacent_mem_locations (rtx a, rtx b)
8647 /* We don't guarantee to preserve the order of these memory refs. */
8648 if (volatile_refs_p (a) || volatile_refs_p (b))
8651 if ((GET_CODE (XEXP (a, 0)) == REG
8652 || (GET_CODE (XEXP (a, 0)) == PLUS
8653 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8654 && (GET_CODE (XEXP (b, 0)) == REG
8655 || (GET_CODE (XEXP (b, 0)) == PLUS
8656 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8658 HOST_WIDE_INT val0 = 0, val1 = 0;
8662 if (GET_CODE (XEXP (a, 0)) == PLUS)
8664 reg0 = XEXP (XEXP (a, 0), 0);
8665 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8670 if (GET_CODE (XEXP (b, 0)) == PLUS)
8672 reg1 = XEXP (XEXP (b, 0), 0);
8673 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8678 /* Don't accept any offset that will require multiple
8679 instructions to handle, since this would cause the
8680 arith_adjacentmem pattern to output an overlong sequence. */
8681 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8684 /* Don't allow an eliminable register: register elimination can make
8685 the offset too large. */
8686 if (arm_eliminable_register (reg0))
8689 val_diff = val1 - val0;
8693 /* If the target has load delay slots, then there's no benefit
8694 to using an ldm instruction unless the offset is zero and
8695 we are optimizing for size. */
8696 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8697 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8698 && (val_diff == 4 || val_diff == -4));
8701 return ((REGNO (reg0) == REGNO (reg1))
8702 && (val_diff == 4 || val_diff == -4));
8709 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8710 HOST_WIDE_INT *load_offset)
8712 int unsorted_regs[4];
8713 HOST_WIDE_INT unsorted_offsets[4];
8718 /* Can only handle 2, 3, or 4 insns at present,
8719 though could be easily extended if required. */
8720 gcc_assert (nops >= 2 && nops <= 4);
8722 memset (order, 0, 4 * sizeof (int));
8724 /* Loop over the operands and check that the memory references are
8725 suitable (i.e. immediate offsets from the same base register). At
8726 the same time, extract the target register, and the memory
8728 for (i = 0; i < nops; i++)
8733 /* Convert a subreg of a mem into the mem itself. */
8734 if (GET_CODE (operands[nops + i]) == SUBREG)
8735 operands[nops + i] = alter_subreg (operands + (nops + i));
8737 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8739 /* Don't reorder volatile memory references; it doesn't seem worth
8740 looking for the case where the order is ok anyway. */
8741 if (MEM_VOLATILE_P (operands[nops + i]))
8744 offset = const0_rtx;
8746 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8747 || (GET_CODE (reg) == SUBREG
8748 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8749 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8750 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8752 || (GET_CODE (reg) == SUBREG
8753 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8754 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8759 base_reg = REGNO (reg);
8760 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8761 ? REGNO (operands[i])
8762 : REGNO (SUBREG_REG (operands[i])));
8767 if (base_reg != (int) REGNO (reg))
8768 /* Not addressed from the same base register. */
8771 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8772 ? REGNO (operands[i])
8773 : REGNO (SUBREG_REG (operands[i])));
8774 if (unsorted_regs[i] < unsorted_regs[order[0]])
8778 /* If it isn't an integer register, or if it overwrites the
8779 base register but isn't the last insn in the list, then
8780 we can't do this. */
8781 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8782 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8785 unsorted_offsets[i] = INTVAL (offset);
8788 /* Not a suitable memory address. */
8792 /* All the useful information has now been extracted from the
8793 operands into unsorted_regs and unsorted_offsets; additionally,
8794 order[0] has been set to the lowest numbered register in the
8795 list. Sort the registers into order, and check that the memory
8796 offsets are ascending and adjacent. */
8798 for (i = 1; i < nops; i++)
8802 order[i] = order[i - 1];
8803 for (j = 0; j < nops; j++)
8804 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
8805 && (order[i] == order[i - 1]
8806 || unsorted_regs[j] < unsorted_regs[order[i]]))
8809 /* Have we found a suitable register? if not, one must be used more
8811 if (order[i] == order[i - 1])
8814 /* Is the memory address adjacent and ascending? */
8815 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
8823 for (i = 0; i < nops; i++)
8824 regs[i] = unsorted_regs[order[i]];
8826 *load_offset = unsorted_offsets[order[0]];
8829 if (unsorted_offsets[order[0]] == 0)
8830 return 1; /* ldmia */
8832 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
8833 return 2; /* ldmib */
8835 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
8836 return 3; /* ldmda */
8838 if (unsorted_offsets[order[nops - 1]] == -4)
8839 return 4; /* ldmdb */
8841 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
8842 if the offset isn't small enough. The reason 2 ldrs are faster
8843 is because these ARMs are able to do more than one cache access
8844 in a single cycle. The ARM9 and StrongARM have Harvard caches,
8845 whilst the ARM8 has a double bandwidth cache. This means that
8846 these cores can do both an instruction fetch and a data fetch in
8847 a single cycle, so the trick of calculating the address into a
8848 scratch register (one of the result regs) and then doing a load
8849 multiple actually becomes slower (and no smaller in code size).
8850 That is the transformation
8852 ldr rd1, [rbase + offset]
8853 ldr rd2, [rbase + offset + 4]
8857 add rd1, rbase, offset
8858 ldmia rd1, {rd1, rd2}
8860 produces worse code -- '3 cycles + any stalls on rd2' instead of
8861 '2 cycles + any stalls on rd2'. On ARMs with only one cache
8862 access per cycle, the first sequence could never complete in less
8863 than 6 cycles, whereas the ldm sequence would only take 5 and
8864 would make better use of sequential accesses if not hitting the
8867 We cheat here and test 'arm_ld_sched' which we currently know to
8868 only be true for the ARM8, ARM9 and StrongARM. If this ever
8869 changes, then the test below needs to be reworked. */
8870 if (nops == 2 && arm_ld_sched)
8873 /* Can't do it without setting up the offset, only do this if it takes
8874 no more than one insn. */
8875 return (const_ok_for_arm (unsorted_offsets[order[0]])
8876 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
8880 emit_ldm_seq (rtx *operands, int nops)
8884 HOST_WIDE_INT offset;
8888 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8891 strcpy (buf, "ldm%(ia%)\t");
8895 strcpy (buf, "ldm%(ib%)\t");
8899 strcpy (buf, "ldm%(da%)\t");
8903 strcpy (buf, "ldm%(db%)\t");
8908 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8909 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8912 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8913 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8915 output_asm_insn (buf, operands);
8917 strcpy (buf, "ldm%(ia%)\t");
8924 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8925 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8927 for (i = 1; i < nops; i++)
8928 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8929 reg_names[regs[i]]);
8931 strcat (buf, "}\t%@ phole ldm");
8933 output_asm_insn (buf, operands);
8938 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8939 HOST_WIDE_INT * load_offset)
8941 int unsorted_regs[4];
8942 HOST_WIDE_INT unsorted_offsets[4];
8947 /* Can only handle 2, 3, or 4 insns at present, though could be easily
8948 extended if required. */
8949 gcc_assert (nops >= 2 && nops <= 4);
8951 memset (order, 0, 4 * sizeof (int));
8953 /* Loop over the operands and check that the memory references are
8954 suitable (i.e. immediate offsets from the same base register). At
8955 the same time, extract the target register, and the memory
8957 for (i = 0; i < nops; i++)
8962 /* Convert a subreg of a mem into the mem itself. */
8963 if (GET_CODE (operands[nops + i]) == SUBREG)
8964 operands[nops + i] = alter_subreg (operands + (nops + i));
8966 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8968 /* Don't reorder volatile memory references; it doesn't seem worth
8969 looking for the case where the order is ok anyway. */
8970 if (MEM_VOLATILE_P (operands[nops + i]))
8973 offset = const0_rtx;
8975 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8976 || (GET_CODE (reg) == SUBREG
8977 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8978 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8979 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8981 || (GET_CODE (reg) == SUBREG
8982 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8983 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8988 base_reg = REGNO (reg);
8989 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8990 ? REGNO (operands[i])
8991 : REGNO (SUBREG_REG (operands[i])));
8996 if (base_reg != (int) REGNO (reg))
8997 /* Not addressed from the same base register. */
9000 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9001 ? REGNO (operands[i])
9002 : REGNO (SUBREG_REG (operands[i])));
9003 if (unsorted_regs[i] < unsorted_regs[order[0]])
9007 /* If it isn't an integer register, then we can't do this. */
9008 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9011 unsorted_offsets[i] = INTVAL (offset);
9014 /* Not a suitable memory address. */
9018 /* All the useful information has now been extracted from the
9019 operands into unsorted_regs and unsorted_offsets; additionally,
9020 order[0] has been set to the lowest numbered register in the
9021 list. Sort the registers into order, and check that the memory
9022 offsets are ascending and adjacent. */
9024 for (i = 1; i < nops; i++)
9028 order[i] = order[i - 1];
9029 for (j = 0; j < nops; j++)
9030 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9031 && (order[i] == order[i - 1]
9032 || unsorted_regs[j] < unsorted_regs[order[i]]))
9035 /* Have we found a suitable register? if not, one must be used more
9037 if (order[i] == order[i - 1])
9040 /* Is the memory address adjacent and ascending? */
9041 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9049 for (i = 0; i < nops; i++)
9050 regs[i] = unsorted_regs[order[i]];
9052 *load_offset = unsorted_offsets[order[0]];
9055 if (unsorted_offsets[order[0]] == 0)
9056 return 1; /* stmia */
9058 if (unsorted_offsets[order[0]] == 4)
9059 return 2; /* stmib */
9061 if (unsorted_offsets[order[nops - 1]] == 0)
9062 return 3; /* stmda */
9064 if (unsorted_offsets[order[nops - 1]] == -4)
9065 return 4; /* stmdb */
9071 emit_stm_seq (rtx *operands, int nops)
9075 HOST_WIDE_INT offset;
9079 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9082 strcpy (buf, "stm%(ia%)\t");
9086 strcpy (buf, "stm%(ib%)\t");
9090 strcpy (buf, "stm%(da%)\t");
9094 strcpy (buf, "stm%(db%)\t");
9101 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9102 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9104 for (i = 1; i < nops; i++)
9105 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9106 reg_names[regs[i]]);
9108 strcat (buf, "}\t%@ phole stm");
9110 output_asm_insn (buf, operands);
9114 /* Routines for use in generating RTL. */
9117 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9118 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9120 HOST_WIDE_INT offset = *offsetp;
9123 int sign = up ? 1 : -1;
9126 /* XScale has load-store double instructions, but they have stricter
9127 alignment requirements than load-store multiple, so we cannot
9130 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9131 the pipeline until completion.
9139 An ldr instruction takes 1-3 cycles, but does not block the
9148 Best case ldr will always win. However, the more ldr instructions
9149 we issue, the less likely we are to be able to schedule them well.
9150 Using ldr instructions also increases code size.
9152 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9153 for counts of 3 or 4 regs. */
9154 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9160 for (i = 0; i < count; i++)
9162 addr = plus_constant (from, i * 4 * sign);
9163 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9164 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9170 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9180 result = gen_rtx_PARALLEL (VOIDmode,
9181 rtvec_alloc (count + (write_back ? 1 : 0)));
9184 XVECEXP (result, 0, 0)
9185 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9190 for (j = 0; i < count; i++, j++)
9192 addr = plus_constant (from, j * 4 * sign);
9193 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9194 XVECEXP (result, 0, i)
9195 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9206 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9207 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9209 HOST_WIDE_INT offset = *offsetp;
9212 int sign = up ? 1 : -1;
9215 /* See arm_gen_load_multiple for discussion of
9216 the pros/cons of ldm/stm usage for XScale. */
9217 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9223 for (i = 0; i < count; i++)
9225 addr = plus_constant (to, i * 4 * sign);
9226 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9227 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9233 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9243 result = gen_rtx_PARALLEL (VOIDmode,
9244 rtvec_alloc (count + (write_back ? 1 : 0)));
9247 XVECEXP (result, 0, 0)
9248 = gen_rtx_SET (VOIDmode, to,
9249 plus_constant (to, count * 4 * sign));
9254 for (j = 0; i < count; i++, j++)
9256 addr = plus_constant (to, j * 4 * sign);
9257 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9258 XVECEXP (result, 0, i)
9259 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9270 arm_gen_movmemqi (rtx *operands)
9272 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9273 HOST_WIDE_INT srcoffset, dstoffset;
9275 rtx src, dst, srcbase, dstbase;
9276 rtx part_bytes_reg = NULL;
9279 if (GET_CODE (operands[2]) != CONST_INT
9280 || GET_CODE (operands[3]) != CONST_INT
9281 || INTVAL (operands[2]) > 64
9282 || INTVAL (operands[3]) & 3)
9285 dstbase = operands[0];
9286 srcbase = operands[1];
9288 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9289 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9291 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9292 out_words_to_go = INTVAL (operands[2]) / 4;
9293 last_bytes = INTVAL (operands[2]) & 3;
9294 dstoffset = srcoffset = 0;
9296 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9297 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9299 for (i = 0; in_words_to_go >= 2; i+=4)
9301 if (in_words_to_go > 4)
9302 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9303 srcbase, &srcoffset));
9305 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9306 FALSE, srcbase, &srcoffset));
9308 if (out_words_to_go)
9310 if (out_words_to_go > 4)
9311 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9312 dstbase, &dstoffset));
9313 else if (out_words_to_go != 1)
9314 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9318 dstbase, &dstoffset));
9321 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9322 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9323 if (last_bytes != 0)
9325 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9331 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9332 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9335 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9336 if (out_words_to_go)
9340 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9341 sreg = copy_to_reg (mem);
9343 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9344 emit_move_insn (mem, sreg);
9347 gcc_assert (!in_words_to_go); /* Sanity check */
9352 gcc_assert (in_words_to_go > 0);
9354 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9355 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9358 gcc_assert (!last_bytes || part_bytes_reg);
9360 if (BYTES_BIG_ENDIAN && last_bytes)
9362 rtx tmp = gen_reg_rtx (SImode);
9364 /* The bytes we want are in the top end of the word. */
9365 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9366 GEN_INT (8 * (4 - last_bytes))));
9367 part_bytes_reg = tmp;
9371 mem = adjust_automodify_address (dstbase, QImode,
9372 plus_constant (dst, last_bytes - 1),
9373 dstoffset + last_bytes - 1);
9374 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9378 tmp = gen_reg_rtx (SImode);
9379 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9380 part_bytes_reg = tmp;
9389 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9390 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9394 rtx tmp = gen_reg_rtx (SImode);
9395 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9396 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9397 part_bytes_reg = tmp;
9404 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9405 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9412 /* Select a dominance comparison mode if possible for a test of the general
9413 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9414 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9415 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9416 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9417 In all cases OP will be either EQ or NE, but we don't need to know which
9418 here. If we are unable to support a dominance comparison we return
9419 CC mode. This will then fail to match for the RTL expressions that
9420 generate this call. */
9422 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9424 enum rtx_code cond1, cond2;
9427 /* Currently we will probably get the wrong result if the individual
9428 comparisons are not simple. This also ensures that it is safe to
9429 reverse a comparison if necessary. */
9430 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9432 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9436 /* The if_then_else variant of this tests the second condition if the
9437 first passes, but is true if the first fails. Reverse the first
9438 condition to get a true "inclusive-or" expression. */
9439 if (cond_or == DOM_CC_NX_OR_Y)
9440 cond1 = reverse_condition (cond1);
9442 /* If the comparisons are not equal, and one doesn't dominate the other,
9443 then we can't do this. */
9445 && !comparison_dominates_p (cond1, cond2)
9446 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9451 enum rtx_code temp = cond1;
9459 if (cond_or == DOM_CC_X_AND_Y)
9464 case EQ: return CC_DEQmode;
9465 case LE: return CC_DLEmode;
9466 case LEU: return CC_DLEUmode;
9467 case GE: return CC_DGEmode;
9468 case GEU: return CC_DGEUmode;
9469 default: gcc_unreachable ();
9473 if (cond_or == DOM_CC_X_AND_Y)
9489 if (cond_or == DOM_CC_X_AND_Y)
9505 if (cond_or == DOM_CC_X_AND_Y)
9521 if (cond_or == DOM_CC_X_AND_Y)
9536 /* The remaining cases only occur when both comparisons are the
9539 gcc_assert (cond1 == cond2);
9543 gcc_assert (cond1 == cond2);
9547 gcc_assert (cond1 == cond2);
9551 gcc_assert (cond1 == cond2);
9555 gcc_assert (cond1 == cond2);
9564 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9566 /* All floating point compares return CCFP if it is an equality
9567 comparison, and CCFPE otherwise. */
9568 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9588 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9597 /* A compare with a shifted operand. Because of canonicalization, the
9598 comparison will have to be swapped when we emit the assembler. */
9599 if (GET_MODE (y) == SImode
9600 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9601 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9602 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9603 || GET_CODE (x) == ROTATERT))
9606 /* This operation is performed swapped, but since we only rely on the Z
9607 flag we don't need an additional mode. */
9608 if (GET_MODE (y) == SImode
9609 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9610 && GET_CODE (x) == NEG
9611 && (op == EQ || op == NE))
9614 /* This is a special case that is used by combine to allow a
9615 comparison of a shifted byte load to be split into a zero-extend
9616 followed by a comparison of the shifted integer (only valid for
9617 equalities and unsigned inequalities). */
9618 if (GET_MODE (x) == SImode
9619 && GET_CODE (x) == ASHIFT
9620 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9621 && GET_CODE (XEXP (x, 0)) == SUBREG
9622 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9623 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9624 && (op == EQ || op == NE
9625 || op == GEU || op == GTU || op == LTU || op == LEU)
9626 && GET_CODE (y) == CONST_INT)
9629 /* A construct for a conditional compare, if the false arm contains
9630 0, then both conditions must be true, otherwise either condition
9631 must be true. Not all conditions are possible, so CCmode is
9632 returned if it can't be done. */
9633 if (GET_CODE (x) == IF_THEN_ELSE
9634 && (XEXP (x, 2) == const0_rtx
9635 || XEXP (x, 2) == const1_rtx)
9636 && COMPARISON_P (XEXP (x, 0))
9637 && COMPARISON_P (XEXP (x, 1)))
9638 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9639 INTVAL (XEXP (x, 2)));
9641 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9642 if (GET_CODE (x) == AND
9643 && COMPARISON_P (XEXP (x, 0))
9644 && COMPARISON_P (XEXP (x, 1)))
9645 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9648 if (GET_CODE (x) == IOR
9649 && COMPARISON_P (XEXP (x, 0))
9650 && COMPARISON_P (XEXP (x, 1)))
9651 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9654 /* An operation (on Thumb) where we want to test for a single bit.
9655 This is done by shifting that bit up into the top bit of a
9656 scratch register; we can then branch on the sign bit. */
9658 && GET_MODE (x) == SImode
9659 && (op == EQ || op == NE)
9660 && GET_CODE (x) == ZERO_EXTRACT
9661 && XEXP (x, 1) == const1_rtx)
9664 /* An operation that sets the condition codes as a side-effect, the
9665 V flag is not set correctly, so we can only use comparisons where
9666 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9668 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9669 if (GET_MODE (x) == SImode
9671 && (op == EQ || op == NE || op == LT || op == GE)
9672 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9673 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9674 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9675 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9676 || GET_CODE (x) == LSHIFTRT
9677 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9678 || GET_CODE (x) == ROTATERT
9679 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9682 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9685 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9686 && GET_CODE (x) == PLUS
9687 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9693 /* X and Y are two things to compare using CODE. Emit the compare insn and
9694 return the rtx for register 0 in the proper mode. FP means this is a
9695 floating point compare: I don't think that it is needed on the arm. */
9697 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9699 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9700 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9702 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9707 /* Generate a sequence of insns that will generate the correct return
9708 address mask depending on the physical architecture that the program
9711 arm_gen_return_addr_mask (void)
9713 rtx reg = gen_reg_rtx (Pmode);
9715 emit_insn (gen_return_addr_mask (reg));
9720 arm_reload_in_hi (rtx *operands)
9722 rtx ref = operands[1];
9724 HOST_WIDE_INT offset = 0;
9726 if (GET_CODE (ref) == SUBREG)
9728 offset = SUBREG_BYTE (ref);
9729 ref = SUBREG_REG (ref);
9732 if (GET_CODE (ref) == REG)
9734 /* We have a pseudo which has been spilt onto the stack; there
9735 are two cases here: the first where there is a simple
9736 stack-slot replacement and a second where the stack-slot is
9737 out of range, or is used as a subreg. */
9738 if (reg_equiv_mem[REGNO (ref)])
9740 ref = reg_equiv_mem[REGNO (ref)];
9741 base = find_replacement (&XEXP (ref, 0));
9744 /* The slot is out of range, or was dressed up in a SUBREG. */
9745 base = reg_equiv_address[REGNO (ref)];
9748 base = find_replacement (&XEXP (ref, 0));
9750 /* Handle the case where the address is too complex to be offset by 1. */
9751 if (GET_CODE (base) == MINUS
9752 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9754 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9756 emit_set_insn (base_plus, base);
9759 else if (GET_CODE (base) == PLUS)
9761 /* The addend must be CONST_INT, or we would have dealt with it above. */
9762 HOST_WIDE_INT hi, lo;
9764 offset += INTVAL (XEXP (base, 1));
9765 base = XEXP (base, 0);
9767 /* Rework the address into a legal sequence of insns. */
9768 /* Valid range for lo is -4095 -> 4095 */
9771 : -((-offset) & 0xfff));
9773 /* Corner case, if lo is the max offset then we would be out of range
9774 once we have added the additional 1 below, so bump the msb into the
9775 pre-loading insn(s). */
9779 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9780 ^ (HOST_WIDE_INT) 0x80000000)
9781 - (HOST_WIDE_INT) 0x80000000);
9783 gcc_assert (hi + lo == offset);
9787 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9789 /* Get the base address; addsi3 knows how to handle constants
9790 that require more than one insn. */
9791 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9797 /* Operands[2] may overlap operands[0] (though it won't overlap
9798 operands[1]), that's why we asked for a DImode reg -- so we can
9799 use the bit that does not overlap. */
9800 if (REGNO (operands[2]) == REGNO (operands[0]))
9801 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9803 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9805 emit_insn (gen_zero_extendqisi2 (scratch,
9806 gen_rtx_MEM (QImode,
9807 plus_constant (base,
9809 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
9810 gen_rtx_MEM (QImode,
9811 plus_constant (base,
9813 if (!BYTES_BIG_ENDIAN)
9814 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9815 gen_rtx_IOR (SImode,
9818 gen_rtx_SUBREG (SImode, operands[0], 0),
9822 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9823 gen_rtx_IOR (SImode,
9824 gen_rtx_ASHIFT (SImode, scratch,
9826 gen_rtx_SUBREG (SImode, operands[0], 0)));
9829 /* Handle storing a half-word to memory during reload by synthesizing as two
9830 byte stores. Take care not to clobber the input values until after we
9831 have moved them somewhere safe. This code assumes that if the DImode
9832 scratch in operands[2] overlaps either the input value or output address
9833 in some way, then that value must die in this insn (we absolutely need
9834 two scratch registers for some corner cases). */
9836 arm_reload_out_hi (rtx *operands)
9838 rtx ref = operands[0];
9839 rtx outval = operands[1];
9841 HOST_WIDE_INT offset = 0;
9843 if (GET_CODE (ref) == SUBREG)
9845 offset = SUBREG_BYTE (ref);
9846 ref = SUBREG_REG (ref);
9849 if (GET_CODE (ref) == REG)
9851 /* We have a pseudo which has been spilt onto the stack; there
9852 are two cases here: the first where there is a simple
9853 stack-slot replacement and a second where the stack-slot is
9854 out of range, or is used as a subreg. */
9855 if (reg_equiv_mem[REGNO (ref)])
9857 ref = reg_equiv_mem[REGNO (ref)];
9858 base = find_replacement (&XEXP (ref, 0));
9861 /* The slot is out of range, or was dressed up in a SUBREG. */
9862 base = reg_equiv_address[REGNO (ref)];
9865 base = find_replacement (&XEXP (ref, 0));
9867 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9869 /* Handle the case where the address is too complex to be offset by 1. */
9870 if (GET_CODE (base) == MINUS
9871 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9873 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9875 /* Be careful not to destroy OUTVAL. */
9876 if (reg_overlap_mentioned_p (base_plus, outval))
9878 /* Updating base_plus might destroy outval, see if we can
9879 swap the scratch and base_plus. */
9880 if (!reg_overlap_mentioned_p (scratch, outval))
9883 scratch = base_plus;
9888 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9890 /* Be conservative and copy OUTVAL into the scratch now,
9891 this should only be necessary if outval is a subreg
9892 of something larger than a word. */
9893 /* XXX Might this clobber base? I can't see how it can,
9894 since scratch is known to overlap with OUTVAL, and
9895 must be wider than a word. */
9896 emit_insn (gen_movhi (scratch_hi, outval));
9897 outval = scratch_hi;
9901 emit_set_insn (base_plus, base);
9904 else if (GET_CODE (base) == PLUS)
9906 /* The addend must be CONST_INT, or we would have dealt with it above. */
9907 HOST_WIDE_INT hi, lo;
9909 offset += INTVAL (XEXP (base, 1));
9910 base = XEXP (base, 0);
9912 /* Rework the address into a legal sequence of insns. */
9913 /* Valid range for lo is -4095 -> 4095 */
9916 : -((-offset) & 0xfff));
9918 /* Corner case, if lo is the max offset then we would be out of range
9919 once we have added the additional 1 below, so bump the msb into the
9920 pre-loading insn(s). */
9924 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9925 ^ (HOST_WIDE_INT) 0x80000000)
9926 - (HOST_WIDE_INT) 0x80000000);
9928 gcc_assert (hi + lo == offset);
9932 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9934 /* Be careful not to destroy OUTVAL. */
9935 if (reg_overlap_mentioned_p (base_plus, outval))
9937 /* Updating base_plus might destroy outval, see if we
9938 can swap the scratch and base_plus. */
9939 if (!reg_overlap_mentioned_p (scratch, outval))
9942 scratch = base_plus;
9947 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9949 /* Be conservative and copy outval into scratch now,
9950 this should only be necessary if outval is a
9951 subreg of something larger than a word. */
9952 /* XXX Might this clobber base? I can't see how it
9953 can, since scratch is known to overlap with
9955 emit_insn (gen_movhi (scratch_hi, outval));
9956 outval = scratch_hi;
9960 /* Get the base address; addsi3 knows how to handle constants
9961 that require more than one insn. */
9962 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9968 if (BYTES_BIG_ENDIAN)
9970 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9971 plus_constant (base, offset + 1)),
9972 gen_lowpart (QImode, outval)));
9973 emit_insn (gen_lshrsi3 (scratch,
9974 gen_rtx_SUBREG (SImode, outval, 0),
9976 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9977 gen_lowpart (QImode, scratch)));
9981 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9982 gen_lowpart (QImode, outval)));
9983 emit_insn (gen_lshrsi3 (scratch,
9984 gen_rtx_SUBREG (SImode, outval, 0),
9986 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9987 plus_constant (base, offset + 1)),
9988 gen_lowpart (QImode, scratch)));
9992 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
9993 (padded to the size of a word) should be passed in a register. */
9996 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
9998 if (TARGET_AAPCS_BASED)
9999 return must_pass_in_stack_var_size (mode, type);
10001 return must_pass_in_stack_var_size_or_pad (mode, type);
10005 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10006 Return true if an argument passed on the stack should be padded upwards,
10007 i.e. if the least-significant byte has useful data.
10008 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10009 aggregate types are placed in the lowest memory address. */
10012 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10014 if (!TARGET_AAPCS_BASED)
10015 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10017 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10024 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10025 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10026 byte of the register has useful data, and return the opposite if the
10027 most significant byte does.
10028 For AAPCS, small aggregates and small complex types are always padded
10032 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10033 tree type, int first ATTRIBUTE_UNUSED)
10035 if (TARGET_AAPCS_BASED
10036 && BYTES_BIG_ENDIAN
10037 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10038 && int_size_in_bytes (type) <= 4)
10041 /* Otherwise, use default padding. */
10042 return !BYTES_BIG_ENDIAN;
10046 /* Print a symbolic form of X to the debug file, F. */
10048 arm_print_value (FILE *f, rtx x)
10050 switch (GET_CODE (x))
10053 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10057 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10065 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10067 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10068 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10076 fprintf (f, "\"%s\"", XSTR (x, 0));
10080 fprintf (f, "`%s'", XSTR (x, 0));
10084 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10088 arm_print_value (f, XEXP (x, 0));
10092 arm_print_value (f, XEXP (x, 0));
10094 arm_print_value (f, XEXP (x, 1));
10102 fprintf (f, "????");
10107 /* Routines for manipulation of the constant pool. */
10109 /* Arm instructions cannot load a large constant directly into a
10110 register; they have to come from a pc relative load. The constant
10111 must therefore be placed in the addressable range of the pc
10112 relative load. Depending on the precise pc relative load
10113 instruction the range is somewhere between 256 bytes and 4k. This
10114 means that we often have to dump a constant inside a function, and
10115 generate code to branch around it.
10117 It is important to minimize this, since the branches will slow
10118 things down and make the code larger.
10120 Normally we can hide the table after an existing unconditional
10121 branch so that there is no interruption of the flow, but in the
10122 worst case the code looks like this:
10140 We fix this by performing a scan after scheduling, which notices
10141 which instructions need to have their operands fetched from the
10142 constant table and builds the table.
10144 The algorithm starts by building a table of all the constants that
10145 need fixing up and all the natural barriers in the function (places
10146 where a constant table can be dropped without breaking the flow).
10147 For each fixup we note how far the pc-relative replacement will be
10148 able to reach and the offset of the instruction into the function.
10150 Having built the table we then group the fixes together to form
10151 tables that are as large as possible (subject to addressing
10152 constraints) and emit each table of constants after the last
10153 barrier that is within range of all the instructions in the group.
10154 If a group does not contain a barrier, then we forcibly create one
10155 by inserting a jump instruction into the flow. Once the table has
10156 been inserted, the insns are then modified to reference the
10157 relevant entry in the pool.
10159 Possible enhancements to the algorithm (not implemented) are:
10161 1) For some processors and object formats, there may be benefit in
10162 aligning the pools to the start of cache lines; this alignment
10163 would need to be taken into account when calculating addressability
10166 /* These typedefs are located at the start of this file, so that
10167 they can be used in the prototypes there. This comment is to
10168 remind readers of that fact so that the following structures
10169 can be understood more easily.
10171 typedef struct minipool_node Mnode;
10172 typedef struct minipool_fixup Mfix; */
10174 struct minipool_node
10176 /* Doubly linked chain of entries. */
10179 /* The maximum offset into the code that this entry can be placed. While
10180 pushing fixes for forward references, all entries are sorted in order
10181 of increasing max_address. */
10182 HOST_WIDE_INT max_address;
10183 /* Similarly for an entry inserted for a backwards ref. */
10184 HOST_WIDE_INT min_address;
10185 /* The number of fixes referencing this entry. This can become zero
10186 if we "unpush" an entry. In this case we ignore the entry when we
10187 come to emit the code. */
10189 /* The offset from the start of the minipool. */
10190 HOST_WIDE_INT offset;
10191 /* The value in table. */
10193 /* The mode of value. */
10194 enum machine_mode mode;
10195 /* The size of the value. With iWMMXt enabled
10196 sizes > 4 also imply an alignment of 8-bytes. */
10200 struct minipool_fixup
10204 HOST_WIDE_INT address;
10206 enum machine_mode mode;
10210 HOST_WIDE_INT forwards;
10211 HOST_WIDE_INT backwards;
10214 /* Fixes less than a word need padding out to a word boundary. */
10215 #define MINIPOOL_FIX_SIZE(mode) \
10216 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10218 static Mnode * minipool_vector_head;
10219 static Mnode * minipool_vector_tail;
10220 static rtx minipool_vector_label;
10221 static int minipool_pad;
10223 /* The linked list of all minipool fixes required for this function. */
10224 Mfix * minipool_fix_head;
10225 Mfix * minipool_fix_tail;
10226 /* The fix entry for the current minipool, once it has been placed. */
10227 Mfix * minipool_barrier;
10229 /* Determines if INSN is the start of a jump table. Returns the end
10230 of the TABLE or NULL_RTX. */
10232 is_jump_table (rtx insn)
10236 if (GET_CODE (insn) == JUMP_INSN
10237 && JUMP_LABEL (insn) != NULL
10238 && ((table = next_real_insn (JUMP_LABEL (insn)))
10239 == next_real_insn (insn))
10241 && GET_CODE (table) == JUMP_INSN
10242 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10243 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10249 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10250 #define JUMP_TABLES_IN_TEXT_SECTION 0
10253 static HOST_WIDE_INT
10254 get_jump_table_size (rtx insn)
10256 /* ADDR_VECs only take room if read-only data does into the text
10258 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10260 rtx body = PATTERN (insn);
10261 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10262 HOST_WIDE_INT size;
10263 HOST_WIDE_INT modesize;
10265 modesize = GET_MODE_SIZE (GET_MODE (body));
10266 size = modesize * XVECLEN (body, elt);
10270 /* Round up size of TBB table to a halfword boundary. */
10271 size = (size + 1) & ~(HOST_WIDE_INT)1;
10274 /* No padding necessary for TBH. */
10277 /* Add two bytes for alignment on Thumb. */
10282 gcc_unreachable ();
10290 /* Move a minipool fix MP from its current location to before MAX_MP.
10291 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10292 constraints may need updating. */
10294 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10295 HOST_WIDE_INT max_address)
10297 /* The code below assumes these are different. */
10298 gcc_assert (mp != max_mp);
10300 if (max_mp == NULL)
10302 if (max_address < mp->max_address)
10303 mp->max_address = max_address;
10307 if (max_address > max_mp->max_address - mp->fix_size)
10308 mp->max_address = max_mp->max_address - mp->fix_size;
10310 mp->max_address = max_address;
10312 /* Unlink MP from its current position. Since max_mp is non-null,
10313 mp->prev must be non-null. */
10314 mp->prev->next = mp->next;
10315 if (mp->next != NULL)
10316 mp->next->prev = mp->prev;
10318 minipool_vector_tail = mp->prev;
10320 /* Re-insert it before MAX_MP. */
10322 mp->prev = max_mp->prev;
10325 if (mp->prev != NULL)
10326 mp->prev->next = mp;
10328 minipool_vector_head = mp;
10331 /* Save the new entry. */
10334 /* Scan over the preceding entries and adjust their addresses as
10336 while (mp->prev != NULL
10337 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10339 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10346 /* Add a constant to the minipool for a forward reference. Returns the
10347 node added or NULL if the constant will not fit in this pool. */
10349 add_minipool_forward_ref (Mfix *fix)
10351 /* If set, max_mp is the first pool_entry that has a lower
10352 constraint than the one we are trying to add. */
10353 Mnode * max_mp = NULL;
10354 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10357 /* If the minipool starts before the end of FIX->INSN then this FIX
10358 can not be placed into the current pool. Furthermore, adding the
10359 new constant pool entry may cause the pool to start FIX_SIZE bytes
10361 if (minipool_vector_head &&
10362 (fix->address + get_attr_length (fix->insn)
10363 >= minipool_vector_head->max_address - fix->fix_size))
10366 /* Scan the pool to see if a constant with the same value has
10367 already been added. While we are doing this, also note the
10368 location where we must insert the constant if it doesn't already
10370 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10372 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10373 && fix->mode == mp->mode
10374 && (GET_CODE (fix->value) != CODE_LABEL
10375 || (CODE_LABEL_NUMBER (fix->value)
10376 == CODE_LABEL_NUMBER (mp->value)))
10377 && rtx_equal_p (fix->value, mp->value))
10379 /* More than one fix references this entry. */
10381 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10384 /* Note the insertion point if necessary. */
10386 && mp->max_address > max_address)
10389 /* If we are inserting an 8-bytes aligned quantity and
10390 we have not already found an insertion point, then
10391 make sure that all such 8-byte aligned quantities are
10392 placed at the start of the pool. */
10393 if (ARM_DOUBLEWORD_ALIGN
10395 && fix->fix_size >= 8
10396 && mp->fix_size < 8)
10399 max_address = mp->max_address;
10403 /* The value is not currently in the minipool, so we need to create
10404 a new entry for it. If MAX_MP is NULL, the entry will be put on
10405 the end of the list since the placement is less constrained than
10406 any existing entry. Otherwise, we insert the new fix before
10407 MAX_MP and, if necessary, adjust the constraints on the other
10410 mp->fix_size = fix->fix_size;
10411 mp->mode = fix->mode;
10412 mp->value = fix->value;
10414 /* Not yet required for a backwards ref. */
10415 mp->min_address = -65536;
10417 if (max_mp == NULL)
10419 mp->max_address = max_address;
10421 mp->prev = minipool_vector_tail;
10423 if (mp->prev == NULL)
10425 minipool_vector_head = mp;
10426 minipool_vector_label = gen_label_rtx ();
10429 mp->prev->next = mp;
10431 minipool_vector_tail = mp;
10435 if (max_address > max_mp->max_address - mp->fix_size)
10436 mp->max_address = max_mp->max_address - mp->fix_size;
10438 mp->max_address = max_address;
10441 mp->prev = max_mp->prev;
10443 if (mp->prev != NULL)
10444 mp->prev->next = mp;
10446 minipool_vector_head = mp;
10449 /* Save the new entry. */
10452 /* Scan over the preceding entries and adjust their addresses as
10454 while (mp->prev != NULL
10455 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10457 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10465 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10466 HOST_WIDE_INT min_address)
10468 HOST_WIDE_INT offset;
10470 /* The code below assumes these are different. */
10471 gcc_assert (mp != min_mp);
10473 if (min_mp == NULL)
10475 if (min_address > mp->min_address)
10476 mp->min_address = min_address;
10480 /* We will adjust this below if it is too loose. */
10481 mp->min_address = min_address;
10483 /* Unlink MP from its current position. Since min_mp is non-null,
10484 mp->next must be non-null. */
10485 mp->next->prev = mp->prev;
10486 if (mp->prev != NULL)
10487 mp->prev->next = mp->next;
10489 minipool_vector_head = mp->next;
10491 /* Reinsert it after MIN_MP. */
10493 mp->next = min_mp->next;
10495 if (mp->next != NULL)
10496 mp->next->prev = mp;
10498 minipool_vector_tail = mp;
10504 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10506 mp->offset = offset;
10507 if (mp->refcount > 0)
10508 offset += mp->fix_size;
10510 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10511 mp->next->min_address = mp->min_address + mp->fix_size;
10517 /* Add a constant to the minipool for a backward reference. Returns the
10518 node added or NULL if the constant will not fit in this pool.
10520 Note that the code for insertion for a backwards reference can be
10521 somewhat confusing because the calculated offsets for each fix do
10522 not take into account the size of the pool (which is still under
10525 add_minipool_backward_ref (Mfix *fix)
10527 /* If set, min_mp is the last pool_entry that has a lower constraint
10528 than the one we are trying to add. */
10529 Mnode *min_mp = NULL;
10530 /* This can be negative, since it is only a constraint. */
10531 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10534 /* If we can't reach the current pool from this insn, or if we can't
10535 insert this entry at the end of the pool without pushing other
10536 fixes out of range, then we don't try. This ensures that we
10537 can't fail later on. */
10538 if (min_address >= minipool_barrier->address
10539 || (minipool_vector_tail->min_address + fix->fix_size
10540 >= minipool_barrier->address))
10543 /* Scan the pool to see if a constant with the same value has
10544 already been added. While we are doing this, also note the
10545 location where we must insert the constant if it doesn't already
10547 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10549 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10550 && fix->mode == mp->mode
10551 && (GET_CODE (fix->value) != CODE_LABEL
10552 || (CODE_LABEL_NUMBER (fix->value)
10553 == CODE_LABEL_NUMBER (mp->value)))
10554 && rtx_equal_p (fix->value, mp->value)
10555 /* Check that there is enough slack to move this entry to the
10556 end of the table (this is conservative). */
10557 && (mp->max_address
10558 > (minipool_barrier->address
10559 + minipool_vector_tail->offset
10560 + minipool_vector_tail->fix_size)))
10563 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10566 if (min_mp != NULL)
10567 mp->min_address += fix->fix_size;
10570 /* Note the insertion point if necessary. */
10571 if (mp->min_address < min_address)
10573 /* For now, we do not allow the insertion of 8-byte alignment
10574 requiring nodes anywhere but at the start of the pool. */
10575 if (ARM_DOUBLEWORD_ALIGN
10576 && fix->fix_size >= 8 && mp->fix_size < 8)
10581 else if (mp->max_address
10582 < minipool_barrier->address + mp->offset + fix->fix_size)
10584 /* Inserting before this entry would push the fix beyond
10585 its maximum address (which can happen if we have
10586 re-located a forwards fix); force the new fix to come
10588 if (ARM_DOUBLEWORD_ALIGN
10589 && fix->fix_size >= 8 && mp->fix_size < 8)
10594 min_address = mp->min_address + fix->fix_size;
10597 /* Do not insert a non-8-byte aligned quantity before 8-byte
10598 aligned quantities. */
10599 else if (ARM_DOUBLEWORD_ALIGN
10600 && fix->fix_size < 8
10601 && mp->fix_size >= 8)
10604 min_address = mp->min_address + fix->fix_size;
10609 /* We need to create a new entry. */
10611 mp->fix_size = fix->fix_size;
10612 mp->mode = fix->mode;
10613 mp->value = fix->value;
10615 mp->max_address = minipool_barrier->address + 65536;
10617 mp->min_address = min_address;
10619 if (min_mp == NULL)
10622 mp->next = minipool_vector_head;
10624 if (mp->next == NULL)
10626 minipool_vector_tail = mp;
10627 minipool_vector_label = gen_label_rtx ();
10630 mp->next->prev = mp;
10632 minipool_vector_head = mp;
10636 mp->next = min_mp->next;
10640 if (mp->next != NULL)
10641 mp->next->prev = mp;
10643 minipool_vector_tail = mp;
10646 /* Save the new entry. */
10654 /* Scan over the following entries and adjust their offsets. */
10655 while (mp->next != NULL)
10657 if (mp->next->min_address < mp->min_address + mp->fix_size)
10658 mp->next->min_address = mp->min_address + mp->fix_size;
10661 mp->next->offset = mp->offset + mp->fix_size;
10663 mp->next->offset = mp->offset;
10672 assign_minipool_offsets (Mfix *barrier)
10674 HOST_WIDE_INT offset = 0;
10677 minipool_barrier = barrier;
10679 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10681 mp->offset = offset;
10683 if (mp->refcount > 0)
10684 offset += mp->fix_size;
10688 /* Output the literal table */
10690 dump_minipool (rtx scan)
10696 if (ARM_DOUBLEWORD_ALIGN)
10697 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10698 if (mp->refcount > 0 && mp->fix_size >= 8)
10705 fprintf (dump_file,
10706 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10707 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10709 scan = emit_label_after (gen_label_rtx (), scan);
10710 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10711 scan = emit_label_after (minipool_vector_label, scan);
10713 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10715 if (mp->refcount > 0)
10719 fprintf (dump_file,
10720 ";; Offset %u, min %ld, max %ld ",
10721 (unsigned) mp->offset, (unsigned long) mp->min_address,
10722 (unsigned long) mp->max_address);
10723 arm_print_value (dump_file, mp->value);
10724 fputc ('\n', dump_file);
10727 switch (mp->fix_size)
10729 #ifdef HAVE_consttable_1
10731 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10735 #ifdef HAVE_consttable_2
10737 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10741 #ifdef HAVE_consttable_4
10743 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10747 #ifdef HAVE_consttable_8
10749 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10753 #ifdef HAVE_consttable_16
10755 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10760 gcc_unreachable ();
10768 minipool_vector_head = minipool_vector_tail = NULL;
10769 scan = emit_insn_after (gen_consttable_end (), scan);
10770 scan = emit_barrier_after (scan);
10773 /* Return the cost of forcibly inserting a barrier after INSN. */
10775 arm_barrier_cost (rtx insn)
10777 /* Basing the location of the pool on the loop depth is preferable,
10778 but at the moment, the basic block information seems to be
10779 corrupt by this stage of the compilation. */
10780 int base_cost = 50;
10781 rtx next = next_nonnote_insn (insn);
10783 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10786 switch (GET_CODE (insn))
10789 /* It will always be better to place the table before the label, rather
10798 return base_cost - 10;
10801 return base_cost + 10;
10805 /* Find the best place in the insn stream in the range
10806 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
10807 Create the barrier by inserting a jump and add a new fix entry for
10810 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
10812 HOST_WIDE_INT count = 0;
10814 rtx from = fix->insn;
10815 /* The instruction after which we will insert the jump. */
10816 rtx selected = NULL;
10818 /* The address at which the jump instruction will be placed. */
10819 HOST_WIDE_INT selected_address;
10821 HOST_WIDE_INT max_count = max_address - fix->address;
10822 rtx label = gen_label_rtx ();
10824 selected_cost = arm_barrier_cost (from);
10825 selected_address = fix->address;
10827 while (from && count < max_count)
10832 /* This code shouldn't have been called if there was a natural barrier
10834 gcc_assert (GET_CODE (from) != BARRIER);
10836 /* Count the length of this insn. */
10837 count += get_attr_length (from);
10839 /* If there is a jump table, add its length. */
10840 tmp = is_jump_table (from);
10843 count += get_jump_table_size (tmp);
10845 /* Jump tables aren't in a basic block, so base the cost on
10846 the dispatch insn. If we select this location, we will
10847 still put the pool after the table. */
10848 new_cost = arm_barrier_cost (from);
10850 if (count < max_count
10851 && (!selected || new_cost <= selected_cost))
10854 selected_cost = new_cost;
10855 selected_address = fix->address + count;
10858 /* Continue after the dispatch table. */
10859 from = NEXT_INSN (tmp);
10863 new_cost = arm_barrier_cost (from);
10865 if (count < max_count
10866 && (!selected || new_cost <= selected_cost))
10869 selected_cost = new_cost;
10870 selected_address = fix->address + count;
10873 from = NEXT_INSN (from);
10876 /* Make sure that we found a place to insert the jump. */
10877 gcc_assert (selected);
10879 /* Create a new JUMP_INSN that branches around a barrier. */
10880 from = emit_jump_insn_after (gen_jump (label), selected);
10881 JUMP_LABEL (from) = label;
10882 barrier = emit_barrier_after (from);
10883 emit_label_after (label, barrier);
10885 /* Create a minipool barrier entry for the new barrier. */
10886 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
10887 new_fix->insn = barrier;
10888 new_fix->address = selected_address;
10889 new_fix->next = fix->next;
10890 fix->next = new_fix;
10895 /* Record that there is a natural barrier in the insn stream at
10898 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
10900 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10903 fix->address = address;
10906 if (minipool_fix_head != NULL)
10907 minipool_fix_tail->next = fix;
10909 minipool_fix_head = fix;
10911 minipool_fix_tail = fix;
10914 /* Record INSN, which will need fixing up to load a value from the
10915 minipool. ADDRESS is the offset of the insn since the start of the
10916 function; LOC is a pointer to the part of the insn which requires
10917 fixing; VALUE is the constant that must be loaded, which is of type
10920 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
10921 enum machine_mode mode, rtx value)
10923 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10926 fix->address = address;
10929 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
10930 fix->value = value;
10931 fix->forwards = get_attr_pool_range (insn);
10932 fix->backwards = get_attr_neg_pool_range (insn);
10933 fix->minipool = NULL;
10935 /* If an insn doesn't have a range defined for it, then it isn't
10936 expecting to be reworked by this code. Better to stop now than
10937 to generate duff assembly code. */
10938 gcc_assert (fix->forwards || fix->backwards);
10940 /* If an entry requires 8-byte alignment then assume all constant pools
10941 require 4 bytes of padding. Trying to do this later on a per-pool
10942 basis is awkward because existing pool entries have to be modified. */
10943 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
10948 fprintf (dump_file,
10949 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
10950 GET_MODE_NAME (mode),
10951 INSN_UID (insn), (unsigned long) address,
10952 -1 * (long)fix->backwards, (long)fix->forwards);
10953 arm_print_value (dump_file, fix->value);
10954 fprintf (dump_file, "\n");
10957 /* Add it to the chain of fixes. */
10960 if (minipool_fix_head != NULL)
10961 minipool_fix_tail->next = fix;
10963 minipool_fix_head = fix;
10965 minipool_fix_tail = fix;
10968 /* Return the cost of synthesizing a 64-bit constant VAL inline.
10969 Returns the number of insns needed, or 99 if we don't know how to
10972 arm_const_double_inline_cost (rtx val)
10974 rtx lowpart, highpart;
10975 enum machine_mode mode;
10977 mode = GET_MODE (val);
10979 if (mode == VOIDmode)
10982 gcc_assert (GET_MODE_SIZE (mode) == 8);
10984 lowpart = gen_lowpart (SImode, val);
10985 highpart = gen_highpart_mode (SImode, mode, val);
10987 gcc_assert (GET_CODE (lowpart) == CONST_INT);
10988 gcc_assert (GET_CODE (highpart) == CONST_INT);
10990 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
10991 NULL_RTX, NULL_RTX, 0, 0)
10992 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
10993 NULL_RTX, NULL_RTX, 0, 0));
10996 /* Return true if it is worthwhile to split a 64-bit constant into two
10997 32-bit operations. This is the case if optimizing for size, or
10998 if we have load delay slots, or if one 32-bit part can be done with
10999 a single data operation. */
11001 arm_const_double_by_parts (rtx val)
11003 enum machine_mode mode = GET_MODE (val);
11006 if (optimize_size || arm_ld_sched)
11009 if (mode == VOIDmode)
11012 part = gen_highpart_mode (SImode, mode, val);
11014 gcc_assert (GET_CODE (part) == CONST_INT);
11016 if (const_ok_for_arm (INTVAL (part))
11017 || const_ok_for_arm (~INTVAL (part)))
11020 part = gen_lowpart (SImode, val);
11022 gcc_assert (GET_CODE (part) == CONST_INT);
11024 if (const_ok_for_arm (INTVAL (part))
11025 || const_ok_for_arm (~INTVAL (part)))
11031 /* Scan INSN and note any of its operands that need fixing.
11032 If DO_PUSHES is false we do not actually push any of the fixups
11033 needed. The function returns TRUE if any fixups were needed/pushed.
11034 This is used by arm_memory_load_p() which needs to know about loads
11035 of constants that will be converted into minipool loads. */
11037 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11039 bool result = false;
11042 extract_insn (insn);
11044 if (!constrain_operands (1))
11045 fatal_insn_not_found (insn);
11047 if (recog_data.n_alternatives == 0)
11050 /* Fill in recog_op_alt with information about the constraints of
11052 preprocess_constraints ();
11054 for (opno = 0; opno < recog_data.n_operands; opno++)
11056 /* Things we need to fix can only occur in inputs. */
11057 if (recog_data.operand_type[opno] != OP_IN)
11060 /* If this alternative is a memory reference, then any mention
11061 of constants in this alternative is really to fool reload
11062 into allowing us to accept one there. We need to fix them up
11063 now so that we output the right code. */
11064 if (recog_op_alt[opno][which_alternative].memory_ok)
11066 rtx op = recog_data.operand[opno];
11068 if (CONSTANT_P (op))
11071 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11072 recog_data.operand_mode[opno], op);
11075 else if (GET_CODE (op) == MEM
11076 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11077 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11081 rtx cop = avoid_constant_pool_reference (op);
11083 /* Casting the address of something to a mode narrower
11084 than a word can cause avoid_constant_pool_reference()
11085 to return the pool reference itself. That's no good to
11086 us here. Lets just hope that we can use the
11087 constant pool value directly. */
11089 cop = get_pool_constant (XEXP (op, 0));
11091 push_minipool_fix (insn, address,
11092 recog_data.operand_loc[opno],
11093 recog_data.operand_mode[opno], cop);
11104 /* Gcc puts the pool in the wrong place for ARM, since we can only
11105 load addresses a limited distance around the pc. We do some
11106 special munging to move the constant pool values to the correct
11107 point in the code. */
11112 HOST_WIDE_INT address = 0;
11115 minipool_fix_head = minipool_fix_tail = NULL;
11117 /* The first insn must always be a note, or the code below won't
11118 scan it properly. */
11119 insn = get_insns ();
11120 gcc_assert (GET_CODE (insn) == NOTE);
11123 /* Scan all the insns and record the operands that will need fixing. */
11124 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11126 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11127 && (arm_cirrus_insn_p (insn)
11128 || GET_CODE (insn) == JUMP_INSN
11129 || arm_memory_load_p (insn)))
11130 cirrus_reorg (insn);
11132 if (GET_CODE (insn) == BARRIER)
11133 push_minipool_barrier (insn, address);
11134 else if (INSN_P (insn))
11138 note_invalid_constants (insn, address, true);
11139 address += get_attr_length (insn);
11141 /* If the insn is a vector jump, add the size of the table
11142 and skip the table. */
11143 if ((table = is_jump_table (insn)) != NULL)
11145 address += get_jump_table_size (table);
11151 fix = minipool_fix_head;
11153 /* Now scan the fixups and perform the required changes. */
11158 Mfix * last_added_fix;
11159 Mfix * last_barrier = NULL;
11162 /* Skip any further barriers before the next fix. */
11163 while (fix && GET_CODE (fix->insn) == BARRIER)
11166 /* No more fixes. */
11170 last_added_fix = NULL;
11172 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11174 if (GET_CODE (ftmp->insn) == BARRIER)
11176 if (ftmp->address >= minipool_vector_head->max_address)
11179 last_barrier = ftmp;
11181 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11184 last_added_fix = ftmp; /* Keep track of the last fix added. */
11187 /* If we found a barrier, drop back to that; any fixes that we
11188 could have reached but come after the barrier will now go in
11189 the next mini-pool. */
11190 if (last_barrier != NULL)
11192 /* Reduce the refcount for those fixes that won't go into this
11194 for (fdel = last_barrier->next;
11195 fdel && fdel != ftmp;
11198 fdel->minipool->refcount--;
11199 fdel->minipool = NULL;
11202 ftmp = last_barrier;
11206 /* ftmp is first fix that we can't fit into this pool and
11207 there no natural barriers that we could use. Insert a
11208 new barrier in the code somewhere between the previous
11209 fix and this one, and arrange to jump around it. */
11210 HOST_WIDE_INT max_address;
11212 /* The last item on the list of fixes must be a barrier, so
11213 we can never run off the end of the list of fixes without
11214 last_barrier being set. */
11217 max_address = minipool_vector_head->max_address;
11218 /* Check that there isn't another fix that is in range that
11219 we couldn't fit into this pool because the pool was
11220 already too large: we need to put the pool before such an
11221 instruction. The pool itself may come just after the
11222 fix because create_fix_barrier also allows space for a
11223 jump instruction. */
11224 if (ftmp->address < max_address)
11225 max_address = ftmp->address + 1;
11227 last_barrier = create_fix_barrier (last_added_fix, max_address);
11230 assign_minipool_offsets (last_barrier);
11234 if (GET_CODE (ftmp->insn) != BARRIER
11235 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11242 /* Scan over the fixes we have identified for this pool, fixing them
11243 up and adding the constants to the pool itself. */
11244 for (this_fix = fix; this_fix && ftmp != this_fix;
11245 this_fix = this_fix->next)
11246 if (GET_CODE (this_fix->insn) != BARRIER)
11249 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11250 minipool_vector_label),
11251 this_fix->minipool->offset);
11252 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11255 dump_minipool (last_barrier->insn);
11259 /* From now on we must synthesize any constants that we can't handle
11260 directly. This can happen if the RTL gets split during final
11261 instruction generation. */
11262 after_arm_reorg = 1;
11264 /* Free the minipool memory. */
11265 obstack_free (&minipool_obstack, minipool_startobj);
11268 /* Routines to output assembly language. */
11270 /* If the rtx is the correct value then return the string of the number.
11271 In this way we can ensure that valid double constants are generated even
11272 when cross compiling. */
11274 fp_immediate_constant (rtx x)
11279 if (!fp_consts_inited)
11282 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11283 for (i = 0; i < 8; i++)
11284 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11285 return strings_fp[i];
11287 gcc_unreachable ();
11290 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11291 static const char *
11292 fp_const_from_val (REAL_VALUE_TYPE *r)
11296 if (!fp_consts_inited)
11299 for (i = 0; i < 8; i++)
11300 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11301 return strings_fp[i];
11303 gcc_unreachable ();
11306 /* Output the operands of a LDM/STM instruction to STREAM.
11307 MASK is the ARM register set mask of which only bits 0-15 are important.
11308 REG is the base register, either the frame pointer or the stack pointer,
11309 INSTR is the possibly suffixed load or store instruction.
11310 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11313 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11314 unsigned long mask, int rfe)
11317 bool not_first = FALSE;
11319 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11320 fputc ('\t', stream);
11321 asm_fprintf (stream, instr, reg);
11322 fputc ('{', stream);
11324 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11325 if (mask & (1 << i))
11328 fprintf (stream, ", ");
11330 asm_fprintf (stream, "%r", i);
11335 fprintf (stream, "}^\n");
11337 fprintf (stream, "}\n");
11341 /* Output a FLDMD instruction to STREAM.
11342 BASE if the register containing the address.
11343 REG and COUNT specify the register range.
11344 Extra registers may be added to avoid hardware bugs.
11346 We output FLDMD even for ARMv5 VFP implementations. Although
11347 FLDMD is technically not supported until ARMv6, it is believed
11348 that all VFP implementations support its use in this context. */
11351 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11355 /* Workaround ARM10 VFPr1 bug. */
11356 if (count == 2 && !arm_arch6)
11363 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11364 load into multiple parts if we have to handle more than 16 registers. */
11367 vfp_output_fldmd (stream, base, reg, 16);
11368 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11372 fputc ('\t', stream);
11373 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11375 for (i = reg; i < reg + count; i++)
11378 fputs (", ", stream);
11379 asm_fprintf (stream, "d%d", i);
11381 fputs ("}\n", stream);
11386 /* Output the assembly for a store multiple. */
11389 vfp_output_fstmd (rtx * operands)
11396 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11397 p = strlen (pattern);
11399 gcc_assert (GET_CODE (operands[1]) == REG);
11401 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11402 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11404 p += sprintf (&pattern[p], ", d%d", base + i);
11406 strcpy (&pattern[p], "}");
11408 output_asm_insn (pattern, operands);
11413 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11414 number of bytes pushed. */
11417 vfp_emit_fstmd (int base_reg, int count)
11424 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11425 register pairs are stored by a store multiple insn. We avoid this
11426 by pushing an extra pair. */
11427 if (count == 2 && !arm_arch6)
11429 if (base_reg == LAST_VFP_REGNUM - 3)
11434 /* FSTMD may not store more than 16 doubleword registers at once. Split
11435 larger stores into multiple parts (up to a maximum of two, in
11440 /* NOTE: base_reg is an internal register number, so each D register
11442 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11443 saved += vfp_emit_fstmd (base_reg, 16);
11447 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11448 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11450 reg = gen_rtx_REG (DFmode, base_reg);
11453 XVECEXP (par, 0, 0)
11454 = gen_rtx_SET (VOIDmode,
11455 gen_frame_mem (BLKmode,
11456 gen_rtx_PRE_DEC (BLKmode,
11457 stack_pointer_rtx)),
11458 gen_rtx_UNSPEC (BLKmode,
11459 gen_rtvec (1, reg),
11460 UNSPEC_PUSH_MULT));
11462 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11463 plus_constant (stack_pointer_rtx, -(count * 8)));
11464 RTX_FRAME_RELATED_P (tmp) = 1;
11465 XVECEXP (dwarf, 0, 0) = tmp;
11467 tmp = gen_rtx_SET (VOIDmode,
11468 gen_frame_mem (DFmode, stack_pointer_rtx),
11470 RTX_FRAME_RELATED_P (tmp) = 1;
11471 XVECEXP (dwarf, 0, 1) = tmp;
11473 for (i = 1; i < count; i++)
11475 reg = gen_rtx_REG (DFmode, base_reg);
11477 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11479 tmp = gen_rtx_SET (VOIDmode,
11480 gen_frame_mem (DFmode,
11481 plus_constant (stack_pointer_rtx,
11484 RTX_FRAME_RELATED_P (tmp) = 1;
11485 XVECEXP (dwarf, 0, i + 1) = tmp;
11488 par = emit_insn (par);
11489 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11490 RTX_FRAME_RELATED_P (par) = 1;
11495 /* Emit a call instruction with pattern PAT. ADDR is the address of
11496 the call target. */
11499 arm_emit_call_insn (rtx pat, rtx addr)
11503 insn = emit_call_insn (pat);
11505 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11506 If the call might use such an entry, add a use of the PIC register
11507 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11508 if (TARGET_VXWORKS_RTP
11510 && GET_CODE (addr) == SYMBOL_REF
11511 && (SYMBOL_REF_DECL (addr)
11512 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11513 : !SYMBOL_REF_LOCAL_P (addr)))
11515 require_pic_register ();
11516 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11520 /* Output a 'call' insn. */
11522 output_call (rtx *operands)
11524 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11526 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11527 if (REGNO (operands[0]) == LR_REGNUM)
11529 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11530 output_asm_insn ("mov%?\t%0, %|lr", operands);
11533 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11535 if (TARGET_INTERWORK || arm_arch4t)
11536 output_asm_insn ("bx%?\t%0", operands);
11538 output_asm_insn ("mov%?\t%|pc, %0", operands);
11543 /* Output a 'call' insn that is a reference in memory. */
11545 output_call_mem (rtx *operands)
11547 if (TARGET_INTERWORK && !arm_arch5)
11549 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11550 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11551 output_asm_insn ("bx%?\t%|ip", operands);
11553 else if (regno_use_in (LR_REGNUM, operands[0]))
11555 /* LR is used in the memory address. We load the address in the
11556 first instruction. It's safe to use IP as the target of the
11557 load since the call will kill it anyway. */
11558 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11560 output_asm_insn ("blx%?\t%|ip", operands);
11563 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11565 output_asm_insn ("bx%?\t%|ip", operands);
11567 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11572 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11573 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11580 /* Output a move from arm registers to an fpa registers.
11581 OPERANDS[0] is an fpa register.
11582 OPERANDS[1] is the first registers of an arm register pair. */
11584 output_mov_long_double_fpa_from_arm (rtx *operands)
11586 int arm_reg0 = REGNO (operands[1]);
11589 gcc_assert (arm_reg0 != IP_REGNUM);
11591 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11592 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11593 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11595 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11596 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11601 /* Output a move from an fpa register to arm registers.
11602 OPERANDS[0] is the first registers of an arm register pair.
11603 OPERANDS[1] is an fpa register. */
11605 output_mov_long_double_arm_from_fpa (rtx *operands)
11607 int arm_reg0 = REGNO (operands[0]);
11610 gcc_assert (arm_reg0 != IP_REGNUM);
11612 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11613 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11614 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11616 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11617 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11621 /* Output a move from arm registers to arm registers of a long double
11622 OPERANDS[0] is the destination.
11623 OPERANDS[1] is the source. */
11625 output_mov_long_double_arm_from_arm (rtx *operands)
11627 /* We have to be careful here because the two might overlap. */
11628 int dest_start = REGNO (operands[0]);
11629 int src_start = REGNO (operands[1]);
11633 if (dest_start < src_start)
11635 for (i = 0; i < 3; i++)
11637 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11638 ops[1] = gen_rtx_REG (SImode, src_start + i);
11639 output_asm_insn ("mov%?\t%0, %1", ops);
11644 for (i = 2; i >= 0; i--)
11646 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11647 ops[1] = gen_rtx_REG (SImode, src_start + i);
11648 output_asm_insn ("mov%?\t%0, %1", ops);
11656 arm_emit_movpair (rtx dest, rtx src)
11658 /* If the src is an immediate, simplify it. */
11659 if (CONST_INT_P (src))
11661 HOST_WIDE_INT val = INTVAL (src);
11662 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11663 if ((val >> 16) & 0x0000ffff)
11664 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11666 GEN_INT ((val >> 16) & 0x0000ffff));
11669 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11670 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11673 /* Output a move from arm registers to an fpa registers.
11674 OPERANDS[0] is an fpa register.
11675 OPERANDS[1] is the first registers of an arm register pair. */
11677 output_mov_double_fpa_from_arm (rtx *operands)
11679 int arm_reg0 = REGNO (operands[1]);
11682 gcc_assert (arm_reg0 != IP_REGNUM);
11684 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11685 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11686 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11687 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11691 /* Output a move from an fpa register to arm registers.
11692 OPERANDS[0] is the first registers of an arm register pair.
11693 OPERANDS[1] is an fpa register. */
11695 output_mov_double_arm_from_fpa (rtx *operands)
11697 int arm_reg0 = REGNO (operands[0]);
11700 gcc_assert (arm_reg0 != IP_REGNUM);
11702 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11703 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11704 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11705 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11709 /* Output a move between double words.
11710 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11711 or MEM<-REG and all MEMs must be offsettable addresses. */
11713 output_move_double (rtx *operands)
11715 enum rtx_code code0 = GET_CODE (operands[0]);
11716 enum rtx_code code1 = GET_CODE (operands[1]);
11721 unsigned int reg0 = REGNO (operands[0]);
11723 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11725 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11727 switch (GET_CODE (XEXP (operands[1], 0)))
11731 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11732 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11734 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11738 gcc_assert (TARGET_LDRD);
11739 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11744 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11746 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11751 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11753 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11757 gcc_assert (TARGET_LDRD);
11758 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11763 /* Autoicrement addressing modes should never have overlapping
11764 base and destination registers, and overlapping index registers
11765 are already prohibited, so this doesn't need to worry about
11767 otherops[0] = operands[0];
11768 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11769 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11771 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11773 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11775 /* Registers overlap so split out the increment. */
11776 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11777 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11781 /* Use a single insn if we can.
11782 FIXME: IWMMXT allows offsets larger than ldrd can
11783 handle, fix these up with a pair of ldr. */
11785 || GET_CODE (otherops[2]) != CONST_INT
11786 || (INTVAL (otherops[2]) > -256
11787 && INTVAL (otherops[2]) < 256))
11788 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11791 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11792 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11798 /* Use a single insn if we can.
11799 FIXME: IWMMXT allows offsets larger than ldrd can handle,
11800 fix these up with a pair of ldr. */
11802 || GET_CODE (otherops[2]) != CONST_INT
11803 || (INTVAL (otherops[2]) > -256
11804 && INTVAL (otherops[2]) < 256))
11805 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
11808 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11809 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11816 /* We might be able to use ldrd %0, %1 here. However the range is
11817 different to ldr/adr, and it is broken on some ARMv7-M
11818 implementations. */
11819 /* Use the second register of the pair to avoid problematic
11821 otherops[1] = operands[1];
11822 output_asm_insn ("adr%?\t%0, %1", otherops);
11823 operands[1] = otherops[0];
11825 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11827 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
11830 /* ??? This needs checking for thumb2. */
11832 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
11833 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
11835 otherops[0] = operands[0];
11836 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
11837 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
11839 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
11841 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
11843 switch ((int) INTVAL (otherops[2]))
11846 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
11851 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
11856 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
11860 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
11861 operands[1] = otherops[0];
11863 && (GET_CODE (otherops[2]) == REG
11865 || (GET_CODE (otherops[2]) == CONST_INT
11866 && INTVAL (otherops[2]) > -256
11867 && INTVAL (otherops[2]) < 256)))
11869 if (reg_overlap_mentioned_p (operands[0],
11873 /* Swap base and index registers over to
11874 avoid a conflict. */
11876 otherops[1] = otherops[2];
11879 /* If both registers conflict, it will usually
11880 have been fixed by a splitter. */
11881 if (reg_overlap_mentioned_p (operands[0], otherops[2])
11882 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
11884 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11885 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11889 otherops[0] = operands[0];
11890 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
11895 if (GET_CODE (otherops[2]) == CONST_INT)
11897 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
11898 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
11900 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11903 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11906 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
11909 return "ldr%(d%)\t%0, [%1]";
11911 return "ldm%(ia%)\t%1, %M0";
11915 otherops[1] = adjust_address (operands[1], SImode, 4);
11916 /* Take care of overlapping base/data reg. */
11917 if (reg_mentioned_p (operands[0], operands[1]))
11919 output_asm_insn ("ldr%?\t%0, %1", otherops);
11920 output_asm_insn ("ldr%?\t%0, %1", operands);
11924 output_asm_insn ("ldr%?\t%0, %1", operands);
11925 output_asm_insn ("ldr%?\t%0, %1", otherops);
11932 /* Constraints should ensure this. */
11933 gcc_assert (code0 == MEM && code1 == REG);
11934 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
11936 switch (GET_CODE (XEXP (operands[0], 0)))
11940 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
11942 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11946 gcc_assert (TARGET_LDRD);
11947 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
11952 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
11954 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
11959 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
11961 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
11965 gcc_assert (TARGET_LDRD);
11966 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
11971 otherops[0] = operands[1];
11972 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
11973 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
11975 /* IWMMXT allows offsets larger than ldrd can handle,
11976 fix these up with a pair of ldr. */
11978 && GET_CODE (otherops[2]) == CONST_INT
11979 && (INTVAL(otherops[2]) <= -256
11980 || INTVAL(otherops[2]) >= 256))
11982 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
11984 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11985 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11989 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11990 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11993 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
11994 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
11996 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12000 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12001 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12003 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12006 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12012 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12018 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12023 && (GET_CODE (otherops[2]) == REG
12025 || (GET_CODE (otherops[2]) == CONST_INT
12026 && INTVAL (otherops[2]) > -256
12027 && INTVAL (otherops[2]) < 256)))
12029 otherops[0] = operands[1];
12030 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12031 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12037 otherops[0] = adjust_address (operands[0], SImode, 4);
12038 otherops[1] = operands[1];
12039 output_asm_insn ("str%?\t%1, %0", operands);
12040 output_asm_insn ("str%?\t%H1, %0", otherops);
12047 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12048 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12051 output_move_quad (rtx *operands)
12053 if (REG_P (operands[0]))
12055 /* Load, or reg->reg move. */
12057 if (MEM_P (operands[1]))
12059 switch (GET_CODE (XEXP (operands[1], 0)))
12062 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12067 output_asm_insn ("adr%?\t%0, %1", operands);
12068 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12072 gcc_unreachable ();
12080 gcc_assert (REG_P (operands[1]));
12082 dest = REGNO (operands[0]);
12083 src = REGNO (operands[1]);
12085 /* This seems pretty dumb, but hopefully GCC won't try to do it
12088 for (i = 0; i < 4; i++)
12090 ops[0] = gen_rtx_REG (SImode, dest + i);
12091 ops[1] = gen_rtx_REG (SImode, src + i);
12092 output_asm_insn ("mov%?\t%0, %1", ops);
12095 for (i = 3; i >= 0; i--)
12097 ops[0] = gen_rtx_REG (SImode, dest + i);
12098 ops[1] = gen_rtx_REG (SImode, src + i);
12099 output_asm_insn ("mov%?\t%0, %1", ops);
12105 gcc_assert (MEM_P (operands[0]));
12106 gcc_assert (REG_P (operands[1]));
12107 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12109 switch (GET_CODE (XEXP (operands[0], 0)))
12112 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12116 gcc_unreachable ();
12123 /* Output a VFP load or store instruction. */
12126 output_move_vfp (rtx *operands)
12128 rtx reg, mem, addr, ops[2];
12129 int load = REG_P (operands[0]);
12130 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12131 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12134 enum machine_mode mode;
12136 reg = operands[!load];
12137 mem = operands[load];
12139 mode = GET_MODE (reg);
12141 gcc_assert (REG_P (reg));
12142 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12143 gcc_assert (mode == SFmode
12147 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12148 gcc_assert (MEM_P (mem));
12150 addr = XEXP (mem, 0);
12152 switch (GET_CODE (addr))
12155 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12156 ops[0] = XEXP (addr, 0);
12161 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12162 ops[0] = XEXP (addr, 0);
12167 templ = "f%s%c%%?\t%%%s0, %%1%s";
12173 sprintf (buff, templ,
12174 load ? "ld" : "st",
12177 integer_p ? "\t%@ int" : "");
12178 output_asm_insn (buff, ops);
12183 /* Output a Neon quad-word load or store, or a load or store for
12184 larger structure modes.
12186 WARNING: The ordering of elements is weird in big-endian mode,
12187 because we use VSTM, as required by the EABI. GCC RTL defines
12188 element ordering based on in-memory order. This can be differ
12189 from the architectural ordering of elements within a NEON register.
12190 The intrinsics defined in arm_neon.h use the NEON register element
12191 ordering, not the GCC RTL element ordering.
12193 For example, the in-memory ordering of a big-endian a quadword
12194 vector with 16-bit elements when stored from register pair {d0,d1}
12195 will be (lowest address first, d0[N] is NEON register element N):
12197 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12199 When necessary, quadword registers (dN, dN+1) are moved to ARM
12200 registers from rN in the order:
12202 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12204 So that STM/LDM can be used on vectors in ARM registers, and the
12205 same memory layout will result as if VSTM/VLDM were used. */
12208 output_move_neon (rtx *operands)
12210 rtx reg, mem, addr, ops[2];
12211 int regno, load = REG_P (operands[0]);
12214 enum machine_mode mode;
12216 reg = operands[!load];
12217 mem = operands[load];
12219 mode = GET_MODE (reg);
12221 gcc_assert (REG_P (reg));
12222 regno = REGNO (reg);
12223 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12224 || NEON_REGNO_OK_FOR_QUAD (regno));
12225 gcc_assert (VALID_NEON_DREG_MODE (mode)
12226 || VALID_NEON_QREG_MODE (mode)
12227 || VALID_NEON_STRUCT_MODE (mode));
12228 gcc_assert (MEM_P (mem));
12230 addr = XEXP (mem, 0);
12232 /* Strip off const from addresses like (const (plus (...))). */
12233 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12234 addr = XEXP (addr, 0);
12236 switch (GET_CODE (addr))
12239 templ = "v%smia%%?\t%%0!, %%h1";
12240 ops[0] = XEXP (addr, 0);
12245 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12246 templ = "v%smdb%%?\t%%0!, %%h1";
12247 ops[0] = XEXP (addr, 0);
12252 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12253 gcc_unreachable ();
12258 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12261 for (i = 0; i < nregs; i++)
12263 /* We're only using DImode here because it's a convenient size. */
12264 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12265 ops[1] = adjust_address (mem, SImode, 8 * i);
12266 if (reg_overlap_mentioned_p (ops[0], mem))
12268 gcc_assert (overlap == -1);
12273 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12274 output_asm_insn (buff, ops);
12279 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12280 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12281 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12282 output_asm_insn (buff, ops);
12289 templ = "v%smia%%?\t%%m0, %%h1";
12294 sprintf (buff, templ, load ? "ld" : "st");
12295 output_asm_insn (buff, ops);
12300 /* Output an ADD r, s, #n where n may be too big for one instruction.
12301 If adding zero to one register, output nothing. */
12303 output_add_immediate (rtx *operands)
12305 HOST_WIDE_INT n = INTVAL (operands[2]);
12307 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12310 output_multi_immediate (operands,
12311 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12314 output_multi_immediate (operands,
12315 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12322 /* Output a multiple immediate operation.
12323 OPERANDS is the vector of operands referred to in the output patterns.
12324 INSTR1 is the output pattern to use for the first constant.
12325 INSTR2 is the output pattern to use for subsequent constants.
12326 IMMED_OP is the index of the constant slot in OPERANDS.
12327 N is the constant value. */
12328 static const char *
12329 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12330 int immed_op, HOST_WIDE_INT n)
12332 #if HOST_BITS_PER_WIDE_INT > 32
12338 /* Quick and easy output. */
12339 operands[immed_op] = const0_rtx;
12340 output_asm_insn (instr1, operands);
12345 const char * instr = instr1;
12347 /* Note that n is never zero here (which would give no output). */
12348 for (i = 0; i < 32; i += 2)
12352 operands[immed_op] = GEN_INT (n & (255 << i));
12353 output_asm_insn (instr, operands);
12363 /* Return the name of a shifter operation. */
12364 static const char *
12365 arm_shift_nmem(enum rtx_code code)
12370 return ARM_LSL_NAME;
12386 /* Return the appropriate ARM instruction for the operation code.
12387 The returned result should not be overwritten. OP is the rtx of the
12388 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12391 arithmetic_instr (rtx op, int shift_first_arg)
12393 switch (GET_CODE (op))
12399 return shift_first_arg ? "rsb" : "sub";
12414 return arm_shift_nmem(GET_CODE(op));
12417 gcc_unreachable ();
12421 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12422 for the operation code. The returned result should not be overwritten.
12423 OP is the rtx code of the shift.
12424 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12426 static const char *
12427 shift_op (rtx op, HOST_WIDE_INT *amountp)
12430 enum rtx_code code = GET_CODE (op);
12432 switch (GET_CODE (XEXP (op, 1)))
12440 *amountp = INTVAL (XEXP (op, 1));
12444 gcc_unreachable ();
12450 gcc_assert (*amountp != -1);
12451 *amountp = 32 - *amountp;
12454 /* Fall through. */
12460 mnem = arm_shift_nmem(code);
12464 /* We never have to worry about the amount being other than a
12465 power of 2, since this case can never be reloaded from a reg. */
12466 gcc_assert (*amountp != -1);
12467 *amountp = int_log2 (*amountp);
12468 return ARM_LSL_NAME;
12471 gcc_unreachable ();
12474 if (*amountp != -1)
12476 /* This is not 100% correct, but follows from the desire to merge
12477 multiplication by a power of 2 with the recognizer for a
12478 shift. >=32 is not a valid shift for "lsl", so we must try and
12479 output a shift that produces the correct arithmetical result.
12480 Using lsr #32 is identical except for the fact that the carry bit
12481 is not set correctly if we set the flags; but we never use the
12482 carry bit from such an operation, so we can ignore that. */
12483 if (code == ROTATERT)
12484 /* Rotate is just modulo 32. */
12486 else if (*amountp != (*amountp & 31))
12488 if (code == ASHIFT)
12493 /* Shifts of 0 are no-ops. */
12501 /* Obtain the shift from the POWER of two. */
12503 static HOST_WIDE_INT
12504 int_log2 (HOST_WIDE_INT power)
12506 HOST_WIDE_INT shift = 0;
12508 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12510 gcc_assert (shift <= 31);
12517 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12518 because /bin/as is horribly restrictive. The judgement about
12519 whether or not each character is 'printable' (and can be output as
12520 is) or not (and must be printed with an octal escape) must be made
12521 with reference to the *host* character set -- the situation is
12522 similar to that discussed in the comments above pp_c_char in
12523 c-pretty-print.c. */
12525 #define MAX_ASCII_LEN 51
12528 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12531 int len_so_far = 0;
12533 fputs ("\t.ascii\t\"", stream);
12535 for (i = 0; i < len; i++)
12539 if (len_so_far >= MAX_ASCII_LEN)
12541 fputs ("\"\n\t.ascii\t\"", stream);
12547 if (c == '\\' || c == '\"')
12549 putc ('\\', stream);
12557 fprintf (stream, "\\%03o", c);
12562 fputs ("\"\n", stream);
12565 /* Compute the register save mask for registers 0 through 12
12566 inclusive. This code is used by arm_compute_save_reg_mask. */
12568 static unsigned long
12569 arm_compute_save_reg0_reg12_mask (void)
12571 unsigned long func_type = arm_current_func_type ();
12572 unsigned long save_reg_mask = 0;
12575 if (IS_INTERRUPT (func_type))
12577 unsigned int max_reg;
12578 /* Interrupt functions must not corrupt any registers,
12579 even call clobbered ones. If this is a leaf function
12580 we can just examine the registers used by the RTL, but
12581 otherwise we have to assume that whatever function is
12582 called might clobber anything, and so we have to save
12583 all the call-clobbered registers as well. */
12584 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12585 /* FIQ handlers have registers r8 - r12 banked, so
12586 we only need to check r0 - r7, Normal ISRs only
12587 bank r14 and r15, so we must check up to r12.
12588 r13 is the stack pointer which is always preserved,
12589 so we do not need to consider it here. */
12594 for (reg = 0; reg <= max_reg; reg++)
12595 if (df_regs_ever_live_p (reg)
12596 || (! current_function_is_leaf && call_used_regs[reg]))
12597 save_reg_mask |= (1 << reg);
12599 /* Also save the pic base register if necessary. */
12601 && !TARGET_SINGLE_PIC_BASE
12602 && arm_pic_register != INVALID_REGNUM
12603 && crtl->uses_pic_offset_table)
12604 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12608 /* In the normal case we only need to save those registers
12609 which are call saved and which are used by this function. */
12610 for (reg = 0; reg <= 11; reg++)
12611 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12612 save_reg_mask |= (1 << reg);
12614 /* Handle the frame pointer as a special case. */
12615 if (frame_pointer_needed)
12616 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12618 /* If we aren't loading the PIC register,
12619 don't stack it even though it may be live. */
12621 && !TARGET_SINGLE_PIC_BASE
12622 && arm_pic_register != INVALID_REGNUM
12623 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12624 || crtl->uses_pic_offset_table))
12625 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12627 /* The prologue will copy SP into R0, so save it. */
12628 if (IS_STACKALIGN (func_type))
12629 save_reg_mask |= 1;
12632 /* Save registers so the exception handler can modify them. */
12633 if (crtl->calls_eh_return)
12639 reg = EH_RETURN_DATA_REGNO (i);
12640 if (reg == INVALID_REGNUM)
12642 save_reg_mask |= 1 << reg;
12646 return save_reg_mask;
12650 /* Compute the number of bytes used to store the static chain register on the
12651 stack, above the stack frame. We need to know this accurately to get the
12652 alignment of the rest of the stack frame correct. */
12654 static int arm_compute_static_chain_stack_bytes (void)
12656 unsigned long func_type = arm_current_func_type ();
12657 int static_chain_stack_bytes = 0;
12659 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12660 IS_NESTED (func_type) &&
12661 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12662 static_chain_stack_bytes = 4;
12664 return static_chain_stack_bytes;
12668 /* Compute a bit mask of which registers need to be
12669 saved on the stack for the current function.
12670 This is used by arm_get_frame_offsets, which may add extra registers. */
12672 static unsigned long
12673 arm_compute_save_reg_mask (void)
12675 unsigned int save_reg_mask = 0;
12676 unsigned long func_type = arm_current_func_type ();
12679 if (IS_NAKED (func_type))
12680 /* This should never really happen. */
12683 /* If we are creating a stack frame, then we must save the frame pointer,
12684 IP (which will hold the old stack pointer), LR and the PC. */
12685 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12687 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12690 | (1 << PC_REGNUM);
12692 /* Volatile functions do not return, so there
12693 is no need to save any other registers. */
12694 if (IS_VOLATILE (func_type))
12695 return save_reg_mask;
12697 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12699 /* Decide if we need to save the link register.
12700 Interrupt routines have their own banked link register,
12701 so they never need to save it.
12702 Otherwise if we do not use the link register we do not need to save
12703 it. If we are pushing other registers onto the stack however, we
12704 can save an instruction in the epilogue by pushing the link register
12705 now and then popping it back into the PC. This incurs extra memory
12706 accesses though, so we only do it when optimizing for size, and only
12707 if we know that we will not need a fancy return sequence. */
12708 if (df_regs_ever_live_p (LR_REGNUM)
12711 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12712 && !crtl->calls_eh_return))
12713 save_reg_mask |= 1 << LR_REGNUM;
12715 if (cfun->machine->lr_save_eliminated)
12716 save_reg_mask &= ~ (1 << LR_REGNUM);
12718 if (TARGET_REALLY_IWMMXT
12719 && ((bit_count (save_reg_mask)
12720 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12721 arm_compute_static_chain_stack_bytes())
12724 /* The total number of registers that are going to be pushed
12725 onto the stack is odd. We need to ensure that the stack
12726 is 64-bit aligned before we start to save iWMMXt registers,
12727 and also before we start to create locals. (A local variable
12728 might be a double or long long which we will load/store using
12729 an iWMMXt instruction). Therefore we need to push another
12730 ARM register, so that the stack will be 64-bit aligned. We
12731 try to avoid using the arg registers (r0 -r3) as they might be
12732 used to pass values in a tail call. */
12733 for (reg = 4; reg <= 12; reg++)
12734 if ((save_reg_mask & (1 << reg)) == 0)
12738 save_reg_mask |= (1 << reg);
12741 cfun->machine->sibcall_blocked = 1;
12742 save_reg_mask |= (1 << 3);
12746 /* We may need to push an additional register for use initializing the
12747 PIC base register. */
12748 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12749 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12751 reg = thumb_find_work_register (1 << 4);
12752 if (!call_used_regs[reg])
12753 save_reg_mask |= (1 << reg);
12756 return save_reg_mask;
12760 /* Compute a bit mask of which registers need to be
12761 saved on the stack for the current function. */
12762 static unsigned long
12763 thumb1_compute_save_reg_mask (void)
12765 unsigned long mask;
12769 for (reg = 0; reg < 12; reg ++)
12770 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12774 && !TARGET_SINGLE_PIC_BASE
12775 && arm_pic_register != INVALID_REGNUM
12776 && crtl->uses_pic_offset_table)
12777 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12779 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12780 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12781 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12783 /* LR will also be pushed if any lo regs are pushed. */
12784 if (mask & 0xff || thumb_force_lr_save ())
12785 mask |= (1 << LR_REGNUM);
12787 /* Make sure we have a low work register if we need one.
12788 We will need one if we are going to push a high register,
12789 but we are not currently intending to push a low register. */
12790 if ((mask & 0xff) == 0
12791 && ((mask & 0x0f00) || TARGET_BACKTRACE))
12793 /* Use thumb_find_work_register to choose which register
12794 we will use. If the register is live then we will
12795 have to push it. Use LAST_LO_REGNUM as our fallback
12796 choice for the register to select. */
12797 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
12798 /* Make sure the register returned by thumb_find_work_register is
12799 not part of the return value. */
12800 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
12801 reg = LAST_LO_REGNUM;
12803 if (! call_used_regs[reg])
12807 /* The 504 below is 8 bytes less than 512 because there are two possible
12808 alignment words. We can't tell here if they will be present or not so we
12809 have to play it safe and assume that they are. */
12810 if ((CALLER_INTERWORKING_SLOT_SIZE +
12811 ROUND_UP_WORD (get_frame_size ()) +
12812 crtl->outgoing_args_size) >= 504)
12814 /* This is the same as the code in thumb1_expand_prologue() which
12815 determines which register to use for stack decrement. */
12816 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
12817 if (mask & (1 << reg))
12820 if (reg > LAST_LO_REGNUM)
12822 /* Make sure we have a register available for stack decrement. */
12823 mask |= 1 << LAST_LO_REGNUM;
12831 /* Return the number of bytes required to save VFP registers. */
12833 arm_get_vfp_saved_size (void)
12835 unsigned int regno;
12840 /* Space for saved VFP registers. */
12841 if (TARGET_HARD_FLOAT && TARGET_VFP)
12844 for (regno = FIRST_VFP_REGNUM;
12845 regno < LAST_VFP_REGNUM;
12848 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
12849 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
12853 /* Workaround ARM10 VFPr1 bug. */
12854 if (count == 2 && !arm_arch6)
12856 saved += count * 8;
12865 if (count == 2 && !arm_arch6)
12867 saved += count * 8;
12874 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
12875 everything bar the final return instruction. */
12877 output_return_instruction (rtx operand, int really_return, int reverse)
12879 char conditional[10];
12882 unsigned long live_regs_mask;
12883 unsigned long func_type;
12884 arm_stack_offsets *offsets;
12886 func_type = arm_current_func_type ();
12888 if (IS_NAKED (func_type))
12891 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12893 /* If this function was declared non-returning, and we have
12894 found a tail call, then we have to trust that the called
12895 function won't return. */
12900 /* Otherwise, trap an attempted return by aborting. */
12902 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
12904 assemble_external_libcall (ops[1]);
12905 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
12911 gcc_assert (!cfun->calls_alloca || really_return);
12913 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
12915 cfun->machine->return_used_this_function = 1;
12917 offsets = arm_get_frame_offsets ();
12918 live_regs_mask = offsets->saved_regs_mask;
12920 if (live_regs_mask)
12922 const char * return_reg;
12924 /* If we do not have any special requirements for function exit
12925 (e.g. interworking) then we can load the return address
12926 directly into the PC. Otherwise we must load it into LR. */
12928 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
12929 return_reg = reg_names[PC_REGNUM];
12931 return_reg = reg_names[LR_REGNUM];
12933 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
12935 /* There are three possible reasons for the IP register
12936 being saved. 1) a stack frame was created, in which case
12937 IP contains the old stack pointer, or 2) an ISR routine
12938 corrupted it, or 3) it was saved to align the stack on
12939 iWMMXt. In case 1, restore IP into SP, otherwise just
12941 if (frame_pointer_needed)
12943 live_regs_mask &= ~ (1 << IP_REGNUM);
12944 live_regs_mask |= (1 << SP_REGNUM);
12947 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
12950 /* On some ARM architectures it is faster to use LDR rather than
12951 LDM to load a single register. On other architectures, the
12952 cost is the same. In 26 bit mode, or for exception handlers,
12953 we have to use LDM to load the PC so that the CPSR is also
12955 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12956 if (live_regs_mask == (1U << reg))
12959 if (reg <= LAST_ARM_REGNUM
12960 && (reg != LR_REGNUM
12962 || ! IS_INTERRUPT (func_type)))
12964 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
12965 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
12972 /* Generate the load multiple instruction to restore the
12973 registers. Note we can get here, even if
12974 frame_pointer_needed is true, but only if sp already
12975 points to the base of the saved core registers. */
12976 if (live_regs_mask & (1 << SP_REGNUM))
12978 unsigned HOST_WIDE_INT stack_adjust;
12980 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
12981 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
12983 if (stack_adjust && arm_arch5 && TARGET_ARM)
12984 if (TARGET_UNIFIED_ASM)
12985 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
12987 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
12990 /* If we can't use ldmib (SA110 bug),
12991 then try to pop r3 instead. */
12993 live_regs_mask |= 1 << 3;
12995 if (TARGET_UNIFIED_ASM)
12996 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
12998 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13002 if (TARGET_UNIFIED_ASM)
13003 sprintf (instr, "pop%s\t{", conditional);
13005 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13007 p = instr + strlen (instr);
13009 for (reg = 0; reg <= SP_REGNUM; reg++)
13010 if (live_regs_mask & (1 << reg))
13012 int l = strlen (reg_names[reg]);
13018 memcpy (p, ", ", 2);
13022 memcpy (p, "%|", 2);
13023 memcpy (p + 2, reg_names[reg], l);
13027 if (live_regs_mask & (1 << LR_REGNUM))
13029 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13030 /* If returning from an interrupt, restore the CPSR. */
13031 if (IS_INTERRUPT (func_type))
13038 output_asm_insn (instr, & operand);
13040 /* See if we need to generate an extra instruction to
13041 perform the actual function return. */
13043 && func_type != ARM_FT_INTERWORKED
13044 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13046 /* The return has already been handled
13047 by loading the LR into the PC. */
13054 switch ((int) ARM_FUNC_TYPE (func_type))
13058 /* ??? This is wrong for unified assembly syntax. */
13059 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13062 case ARM_FT_INTERWORKED:
13063 sprintf (instr, "bx%s\t%%|lr", conditional);
13066 case ARM_FT_EXCEPTION:
13067 /* ??? This is wrong for unified assembly syntax. */
13068 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13072 /* Use bx if it's available. */
13073 if (arm_arch5 || arm_arch4t)
13074 sprintf (instr, "bx%s\t%%|lr", conditional);
13076 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13080 output_asm_insn (instr, & operand);
13086 /* Write the function name into the code section, directly preceding
13087 the function prologue.
13089 Code will be output similar to this:
13091 .ascii "arm_poke_function_name", 0
13094 .word 0xff000000 + (t1 - t0)
13095 arm_poke_function_name
13097 stmfd sp!, {fp, ip, lr, pc}
13100 When performing a stack backtrace, code can inspect the value
13101 of 'pc' stored at 'fp' + 0. If the trace function then looks
13102 at location pc - 12 and the top 8 bits are set, then we know
13103 that there is a function name embedded immediately preceding this
13104 location and has length ((pc[-3]) & 0xff000000).
13106 We assume that pc is declared as a pointer to an unsigned long.
13108 It is of no benefit to output the function name if we are assembling
13109 a leaf function. These function types will not contain a stack
13110 backtrace structure, therefore it is not possible to determine the
13113 arm_poke_function_name (FILE *stream, const char *name)
13115 unsigned long alignlength;
13116 unsigned long length;
13119 length = strlen (name) + 1;
13120 alignlength = ROUND_UP_WORD (length);
13122 ASM_OUTPUT_ASCII (stream, name, length);
13123 ASM_OUTPUT_ALIGN (stream, 2);
13124 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13125 assemble_aligned_integer (UNITS_PER_WORD, x);
13128 /* Place some comments into the assembler stream
13129 describing the current function. */
13131 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13133 unsigned long func_type;
13137 thumb1_output_function_prologue (f, frame_size);
13141 /* Sanity check. */
13142 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13144 func_type = arm_current_func_type ();
13146 switch ((int) ARM_FUNC_TYPE (func_type))
13149 case ARM_FT_NORMAL:
13151 case ARM_FT_INTERWORKED:
13152 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13155 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13158 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13160 case ARM_FT_EXCEPTION:
13161 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13165 if (IS_NAKED (func_type))
13166 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13168 if (IS_VOLATILE (func_type))
13169 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13171 if (IS_NESTED (func_type))
13172 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13173 if (IS_STACKALIGN (func_type))
13174 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13176 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13178 crtl->args.pretend_args_size, frame_size);
13180 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13181 frame_pointer_needed,
13182 cfun->machine->uses_anonymous_args);
13184 if (cfun->machine->lr_save_eliminated)
13185 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13187 if (crtl->calls_eh_return)
13188 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13193 arm_output_epilogue (rtx sibling)
13196 unsigned long saved_regs_mask;
13197 unsigned long func_type;
13198 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13199 frame that is $fp + 4 for a non-variadic function. */
13200 int floats_offset = 0;
13202 FILE * f = asm_out_file;
13203 unsigned int lrm_count = 0;
13204 int really_return = (sibling == NULL);
13206 arm_stack_offsets *offsets;
13208 /* If we have already generated the return instruction
13209 then it is futile to generate anything else. */
13210 if (use_return_insn (FALSE, sibling) &&
13211 (cfun->machine->return_used_this_function != 0))
13214 func_type = arm_current_func_type ();
13216 if (IS_NAKED (func_type))
13217 /* Naked functions don't have epilogues. */
13220 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13224 /* A volatile function should never return. Call abort. */
13225 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13226 assemble_external_libcall (op);
13227 output_asm_insn ("bl\t%a0", &op);
13232 /* If we are throwing an exception, then we really must be doing a
13233 return, so we can't tail-call. */
13234 gcc_assert (!crtl->calls_eh_return || really_return);
13236 offsets = arm_get_frame_offsets ();
13237 saved_regs_mask = offsets->saved_regs_mask;
13240 lrm_count = bit_count (saved_regs_mask);
13242 floats_offset = offsets->saved_args;
13243 /* Compute how far away the floats will be. */
13244 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13245 if (saved_regs_mask & (1 << reg))
13246 floats_offset += 4;
13248 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13250 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13251 int vfp_offset = offsets->frame;
13253 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13255 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13256 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13258 floats_offset += 12;
13259 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13260 reg, FP_REGNUM, floats_offset - vfp_offset);
13265 start_reg = LAST_FPA_REGNUM;
13267 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13269 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13271 floats_offset += 12;
13273 /* We can't unstack more than four registers at once. */
13274 if (start_reg - reg == 3)
13276 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13277 reg, FP_REGNUM, floats_offset - vfp_offset);
13278 start_reg = reg - 1;
13283 if (reg != start_reg)
13284 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13285 reg + 1, start_reg - reg,
13286 FP_REGNUM, floats_offset - vfp_offset);
13287 start_reg = reg - 1;
13291 /* Just in case the last register checked also needs unstacking. */
13292 if (reg != start_reg)
13293 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13294 reg + 1, start_reg - reg,
13295 FP_REGNUM, floats_offset - vfp_offset);
13298 if (TARGET_HARD_FLOAT && TARGET_VFP)
13302 /* The fldmd insns do not have base+offset addressing
13303 modes, so we use IP to hold the address. */
13304 saved_size = arm_get_vfp_saved_size ();
13306 if (saved_size > 0)
13308 floats_offset += saved_size;
13309 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13310 FP_REGNUM, floats_offset - vfp_offset);
13312 start_reg = FIRST_VFP_REGNUM;
13313 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13315 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13316 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13318 if (start_reg != reg)
13319 vfp_output_fldmd (f, IP_REGNUM,
13320 (start_reg - FIRST_VFP_REGNUM) / 2,
13321 (reg - start_reg) / 2);
13322 start_reg = reg + 2;
13325 if (start_reg != reg)
13326 vfp_output_fldmd (f, IP_REGNUM,
13327 (start_reg - FIRST_VFP_REGNUM) / 2,
13328 (reg - start_reg) / 2);
13333 /* The frame pointer is guaranteed to be non-double-word aligned.
13334 This is because it is set to (old_stack_pointer - 4) and the
13335 old_stack_pointer was double word aligned. Thus the offset to
13336 the iWMMXt registers to be loaded must also be non-double-word
13337 sized, so that the resultant address *is* double-word aligned.
13338 We can ignore floats_offset since that was already included in
13339 the live_regs_mask. */
13340 lrm_count += (lrm_count % 2 ? 2 : 1);
13342 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13343 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13345 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13346 reg, FP_REGNUM, lrm_count * 4);
13351 /* saved_regs_mask should contain the IP, which at the time of stack
13352 frame generation actually contains the old stack pointer. So a
13353 quick way to unwind the stack is just pop the IP register directly
13354 into the stack pointer. */
13355 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13356 saved_regs_mask &= ~ (1 << IP_REGNUM);
13357 saved_regs_mask |= (1 << SP_REGNUM);
13359 /* There are two registers left in saved_regs_mask - LR and PC. We
13360 only need to restore the LR register (the return address), but to
13361 save time we can load it directly into the PC, unless we need a
13362 special function exit sequence, or we are not really returning. */
13364 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13365 && !crtl->calls_eh_return)
13366 /* Delete the LR from the register mask, so that the LR on
13367 the stack is loaded into the PC in the register mask. */
13368 saved_regs_mask &= ~ (1 << LR_REGNUM);
13370 saved_regs_mask &= ~ (1 << PC_REGNUM);
13372 /* We must use SP as the base register, because SP is one of the
13373 registers being restored. If an interrupt or page fault
13374 happens in the ldm instruction, the SP might or might not
13375 have been restored. That would be bad, as then SP will no
13376 longer indicate the safe area of stack, and we can get stack
13377 corruption. Using SP as the base register means that it will
13378 be reset correctly to the original value, should an interrupt
13379 occur. If the stack pointer already points at the right
13380 place, then omit the subtraction. */
13381 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13382 || cfun->calls_alloca)
13383 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13384 4 * bit_count (saved_regs_mask));
13385 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13387 if (IS_INTERRUPT (func_type))
13388 /* Interrupt handlers will have pushed the
13389 IP onto the stack, so restore it now. */
13390 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13394 /* This branch is executed for ARM mode (non-apcs frames) and
13395 Thumb-2 mode. Frame layout is essentially the same for those
13396 cases, except that in ARM mode frame pointer points to the
13397 first saved register, while in Thumb-2 mode the frame pointer points
13398 to the last saved register.
13400 It is possible to make frame pointer point to last saved
13401 register in both cases, and remove some conditionals below.
13402 That means that fp setup in prologue would be just "mov fp, sp"
13403 and sp restore in epilogue would be just "mov sp, fp", whereas
13404 now we have to use add/sub in those cases. However, the value
13405 of that would be marginal, as both mov and add/sub are 32-bit
13406 in ARM mode, and it would require extra conditionals
13407 in arm_expand_prologue to distingish ARM-apcs-frame case
13408 (where frame pointer is required to point at first register)
13409 and ARM-non-apcs-frame. Therefore, such change is postponed
13410 until real need arise. */
13411 unsigned HOST_WIDE_INT amount;
13413 /* Restore stack pointer if necessary. */
13414 if (TARGET_ARM && frame_pointer_needed)
13416 operands[0] = stack_pointer_rtx;
13417 operands[1] = hard_frame_pointer_rtx;
13419 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13420 output_add_immediate (operands);
13424 if (frame_pointer_needed)
13426 /* For Thumb-2 restore sp from the frame pointer.
13427 Operand restrictions mean we have to incrememnt FP, then copy
13429 amount = offsets->locals_base - offsets->saved_regs;
13430 operands[0] = hard_frame_pointer_rtx;
13434 unsigned long count;
13435 operands[0] = stack_pointer_rtx;
13436 amount = offsets->outgoing_args - offsets->saved_regs;
13437 /* pop call clobbered registers if it avoids a
13438 separate stack adjustment. */
13439 count = offsets->saved_regs - offsets->saved_args;
13442 && !crtl->calls_eh_return
13443 && bit_count(saved_regs_mask) * 4 == count
13444 && !IS_INTERRUPT (func_type)
13445 && !crtl->tail_call_emit)
13447 unsigned long mask;
13448 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13450 mask &= ~saved_regs_mask;
13452 while (bit_count (mask) * 4 > amount)
13454 while ((mask & (1 << reg)) == 0)
13456 mask &= ~(1 << reg);
13458 if (bit_count (mask) * 4 == amount) {
13460 saved_regs_mask |= mask;
13467 operands[1] = operands[0];
13468 operands[2] = GEN_INT (amount);
13469 output_add_immediate (operands);
13471 if (frame_pointer_needed)
13472 asm_fprintf (f, "\tmov\t%r, %r\n",
13473 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13476 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13478 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13479 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13480 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13485 start_reg = FIRST_FPA_REGNUM;
13487 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13489 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13491 if (reg - start_reg == 3)
13493 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13494 start_reg, SP_REGNUM);
13495 start_reg = reg + 1;
13500 if (reg != start_reg)
13501 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13502 start_reg, reg - start_reg,
13505 start_reg = reg + 1;
13509 /* Just in case the last register checked also needs unstacking. */
13510 if (reg != start_reg)
13511 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13512 start_reg, reg - start_reg, SP_REGNUM);
13515 if (TARGET_HARD_FLOAT && TARGET_VFP)
13517 start_reg = FIRST_VFP_REGNUM;
13518 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13520 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13521 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13523 if (start_reg != reg)
13524 vfp_output_fldmd (f, SP_REGNUM,
13525 (start_reg - FIRST_VFP_REGNUM) / 2,
13526 (reg - start_reg) / 2);
13527 start_reg = reg + 2;
13530 if (start_reg != reg)
13531 vfp_output_fldmd (f, SP_REGNUM,
13532 (start_reg - FIRST_VFP_REGNUM) / 2,
13533 (reg - start_reg) / 2);
13536 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13537 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13538 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13540 /* If we can, restore the LR into the PC. */
13541 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13542 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13543 && !IS_STACKALIGN (func_type)
13545 && crtl->args.pretend_args_size == 0
13546 && saved_regs_mask & (1 << LR_REGNUM)
13547 && !crtl->calls_eh_return)
13549 saved_regs_mask &= ~ (1 << LR_REGNUM);
13550 saved_regs_mask |= (1 << PC_REGNUM);
13551 rfe = IS_INTERRUPT (func_type);
13556 /* Load the registers off the stack. If we only have one register
13557 to load use the LDR instruction - it is faster. For Thumb-2
13558 always use pop and the assembler will pick the best instruction.*/
13559 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13560 && !IS_INTERRUPT(func_type))
13562 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13564 else if (saved_regs_mask)
13566 if (saved_regs_mask & (1 << SP_REGNUM))
13567 /* Note - write back to the stack register is not enabled
13568 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13569 in the list of registers and if we add writeback the
13570 instruction becomes UNPREDICTABLE. */
13571 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13573 else if (TARGET_ARM)
13574 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13577 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13580 if (crtl->args.pretend_args_size)
13582 /* Unwind the pre-pushed regs. */
13583 operands[0] = operands[1] = stack_pointer_rtx;
13584 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13585 output_add_immediate (operands);
13589 /* We may have already restored PC directly from the stack. */
13590 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13593 /* Stack adjustment for exception handler. */
13594 if (crtl->calls_eh_return)
13595 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13596 ARM_EH_STACKADJ_REGNUM);
13598 /* Generate the return instruction. */
13599 switch ((int) ARM_FUNC_TYPE (func_type))
13603 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13606 case ARM_FT_EXCEPTION:
13607 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13610 case ARM_FT_INTERWORKED:
13611 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13615 if (IS_STACKALIGN (func_type))
13617 /* See comment in arm_expand_prologue. */
13618 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13620 if (arm_arch5 || arm_arch4t)
13621 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13623 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13631 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13632 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13634 arm_stack_offsets *offsets;
13640 /* Emit any call-via-reg trampolines that are needed for v4t support
13641 of call_reg and call_value_reg type insns. */
13642 for (regno = 0; regno < LR_REGNUM; regno++)
13644 rtx label = cfun->machine->call_via[regno];
13648 switch_to_section (function_section (current_function_decl));
13649 targetm.asm_out.internal_label (asm_out_file, "L",
13650 CODE_LABEL_NUMBER (label));
13651 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13655 /* ??? Probably not safe to set this here, since it assumes that a
13656 function will be emitted as assembly immediately after we generate
13657 RTL for it. This does not happen for inline functions. */
13658 cfun->machine->return_used_this_function = 0;
13660 else /* TARGET_32BIT */
13662 /* We need to take into account any stack-frame rounding. */
13663 offsets = arm_get_frame_offsets ();
13665 gcc_assert (!use_return_insn (FALSE, NULL)
13666 || (cfun->machine->return_used_this_function != 0)
13667 || offsets->saved_regs == offsets->outgoing_args
13668 || frame_pointer_needed);
13670 /* Reset the ARM-specific per-function variables. */
13671 after_arm_reorg = 0;
13675 /* Generate and emit an insn that we will recognize as a push_multi.
13676 Unfortunately, since this insn does not reflect very well the actual
13677 semantics of the operation, we need to annotate the insn for the benefit
13678 of DWARF2 frame unwind information. */
13680 emit_multi_reg_push (unsigned long mask)
13683 int num_dwarf_regs;
13687 int dwarf_par_index;
13690 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13691 if (mask & (1 << i))
13694 gcc_assert (num_regs && num_regs <= 16);
13696 /* We don't record the PC in the dwarf frame information. */
13697 num_dwarf_regs = num_regs;
13698 if (mask & (1 << PC_REGNUM))
13701 /* For the body of the insn we are going to generate an UNSPEC in
13702 parallel with several USEs. This allows the insn to be recognized
13703 by the push_multi pattern in the arm.md file. The insn looks
13704 something like this:
13707 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
13708 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13709 (use (reg:SI 11 fp))
13710 (use (reg:SI 12 ip))
13711 (use (reg:SI 14 lr))
13712 (use (reg:SI 15 pc))
13715 For the frame note however, we try to be more explicit and actually
13716 show each register being stored into the stack frame, plus a (single)
13717 decrement of the stack pointer. We do it this way in order to be
13718 friendly to the stack unwinding code, which only wants to see a single
13719 stack decrement per instruction. The RTL we generate for the note looks
13720 something like this:
13723 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13724 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13725 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
13726 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
13727 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
13730 This sequence is used both by the code to support stack unwinding for
13731 exceptions handlers and the code to generate dwarf2 frame debugging. */
13733 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13734 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13735 dwarf_par_index = 1;
13737 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13739 if (mask & (1 << i))
13741 reg = gen_rtx_REG (SImode, i);
13743 XVECEXP (par, 0, 0)
13744 = gen_rtx_SET (VOIDmode,
13745 gen_frame_mem (BLKmode,
13746 gen_rtx_PRE_DEC (BLKmode,
13747 stack_pointer_rtx)),
13748 gen_rtx_UNSPEC (BLKmode,
13749 gen_rtvec (1, reg),
13750 UNSPEC_PUSH_MULT));
13752 if (i != PC_REGNUM)
13754 tmp = gen_rtx_SET (VOIDmode,
13755 gen_frame_mem (SImode, stack_pointer_rtx),
13757 RTX_FRAME_RELATED_P (tmp) = 1;
13758 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13766 for (j = 1, i++; j < num_regs; i++)
13768 if (mask & (1 << i))
13770 reg = gen_rtx_REG (SImode, i);
13772 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
13774 if (i != PC_REGNUM)
13777 = gen_rtx_SET (VOIDmode,
13778 gen_frame_mem (SImode,
13779 plus_constant (stack_pointer_rtx,
13782 RTX_FRAME_RELATED_P (tmp) = 1;
13783 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
13790 par = emit_insn (par);
13792 tmp = gen_rtx_SET (VOIDmode,
13794 plus_constant (stack_pointer_rtx, -4 * num_regs));
13795 RTX_FRAME_RELATED_P (tmp) = 1;
13796 XVECEXP (dwarf, 0, 0) = tmp;
13798 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13803 /* Calculate the size of the return value that is passed in registers. */
13805 arm_size_return_regs (void)
13807 enum machine_mode mode;
13809 if (crtl->return_rtx != 0)
13810 mode = GET_MODE (crtl->return_rtx);
13812 mode = DECL_MODE (DECL_RESULT (current_function_decl));
13814 return GET_MODE_SIZE (mode);
13818 emit_sfm (int base_reg, int count)
13825 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13826 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13828 reg = gen_rtx_REG (XFmode, base_reg++);
13830 XVECEXP (par, 0, 0)
13831 = gen_rtx_SET (VOIDmode,
13832 gen_frame_mem (BLKmode,
13833 gen_rtx_PRE_DEC (BLKmode,
13834 stack_pointer_rtx)),
13835 gen_rtx_UNSPEC (BLKmode,
13836 gen_rtvec (1, reg),
13837 UNSPEC_PUSH_MULT));
13838 tmp = gen_rtx_SET (VOIDmode,
13839 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
13840 RTX_FRAME_RELATED_P (tmp) = 1;
13841 XVECEXP (dwarf, 0, 1) = tmp;
13843 for (i = 1; i < count; i++)
13845 reg = gen_rtx_REG (XFmode, base_reg++);
13846 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13848 tmp = gen_rtx_SET (VOIDmode,
13849 gen_frame_mem (XFmode,
13850 plus_constant (stack_pointer_rtx,
13853 RTX_FRAME_RELATED_P (tmp) = 1;
13854 XVECEXP (dwarf, 0, i + 1) = tmp;
13857 tmp = gen_rtx_SET (VOIDmode,
13859 plus_constant (stack_pointer_rtx, -12 * count));
13861 RTX_FRAME_RELATED_P (tmp) = 1;
13862 XVECEXP (dwarf, 0, 0) = tmp;
13864 par = emit_insn (par);
13865 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13871 /* Return true if the current function needs to save/restore LR. */
13874 thumb_force_lr_save (void)
13876 return !cfun->machine->lr_save_eliminated
13877 && (!leaf_function_p ()
13878 || thumb_far_jump_used_p ()
13879 || df_regs_ever_live_p (LR_REGNUM));
13883 /* Compute the distance from register FROM to register TO.
13884 These can be the arg pointer (26), the soft frame pointer (25),
13885 the stack pointer (13) or the hard frame pointer (11).
13886 In thumb mode r7 is used as the soft frame pointer, if needed.
13887 Typical stack layout looks like this:
13889 old stack pointer -> | |
13892 | | saved arguments for
13893 | | vararg functions
13896 hard FP & arg pointer -> | | \
13904 soft frame pointer -> | | /
13909 locals base pointer -> | | /
13914 current stack pointer -> | | /
13917 For a given function some or all of these stack components
13918 may not be needed, giving rise to the possibility of
13919 eliminating some of the registers.
13921 The values returned by this function must reflect the behavior
13922 of arm_expand_prologue() and arm_compute_save_reg_mask().
13924 The sign of the number returned reflects the direction of stack
13925 growth, so the values are positive for all eliminations except
13926 from the soft frame pointer to the hard frame pointer.
13928 SFP may point just inside the local variables block to ensure correct
13932 /* Calculate stack offsets. These are used to calculate register elimination
13933 offsets and in prologue/epilogue code. Also calculates which registers
13934 should be saved. */
13936 static arm_stack_offsets *
13937 arm_get_frame_offsets (void)
13939 struct arm_stack_offsets *offsets;
13940 unsigned long func_type;
13944 HOST_WIDE_INT frame_size;
13947 offsets = &cfun->machine->stack_offsets;
13949 /* We need to know if we are a leaf function. Unfortunately, it
13950 is possible to be called after start_sequence has been called,
13951 which causes get_insns to return the insns for the sequence,
13952 not the function, which will cause leaf_function_p to return
13953 the incorrect result.
13955 to know about leaf functions once reload has completed, and the
13956 frame size cannot be changed after that time, so we can safely
13957 use the cached value. */
13959 if (reload_completed)
13962 /* Initially this is the size of the local variables. It will translated
13963 into an offset once we have determined the size of preceding data. */
13964 frame_size = ROUND_UP_WORD (get_frame_size ());
13966 leaf = leaf_function_p ();
13968 /* Space for variadic functions. */
13969 offsets->saved_args = crtl->args.pretend_args_size;
13971 /* In Thumb mode this is incorrect, but never used. */
13972 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
13973 arm_compute_static_chain_stack_bytes();
13977 unsigned int regno;
13979 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
13980 core_saved = bit_count (offsets->saved_regs_mask) * 4;
13981 saved = core_saved;
13983 /* We know that SP will be doubleword aligned on entry, and we must
13984 preserve that condition at any subroutine call. We also require the
13985 soft frame pointer to be doubleword aligned. */
13987 if (TARGET_REALLY_IWMMXT)
13989 /* Check for the call-saved iWMMXt registers. */
13990 for (regno = FIRST_IWMMXT_REGNUM;
13991 regno <= LAST_IWMMXT_REGNUM;
13993 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
13997 func_type = arm_current_func_type ();
13998 if (! IS_VOLATILE (func_type))
14000 /* Space for saved FPA registers. */
14001 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14002 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14005 /* Space for saved VFP registers. */
14006 if (TARGET_HARD_FLOAT && TARGET_VFP)
14007 saved += arm_get_vfp_saved_size ();
14010 else /* TARGET_THUMB1 */
14012 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14013 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14014 saved = core_saved;
14015 if (TARGET_BACKTRACE)
14019 /* Saved registers include the stack frame. */
14020 offsets->saved_regs = offsets->saved_args + saved +
14021 arm_compute_static_chain_stack_bytes();
14022 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14023 /* A leaf function does not need any stack alignment if it has nothing
14025 if (leaf && frame_size == 0)
14027 offsets->outgoing_args = offsets->soft_frame;
14028 offsets->locals_base = offsets->soft_frame;
14032 /* Ensure SFP has the correct alignment. */
14033 if (ARM_DOUBLEWORD_ALIGN
14034 && (offsets->soft_frame & 7))
14036 offsets->soft_frame += 4;
14037 /* Try to align stack by pushing an extra reg. Don't bother doing this
14038 when there is a stack frame as the alignment will be rolled into
14039 the normal stack adjustment. */
14040 if (frame_size + crtl->outgoing_args_size == 0)
14044 /* If it is safe to use r3, then do so. This sometimes
14045 generates better code on Thumb-2 by avoiding the need to
14046 use 32-bit push/pop instructions. */
14047 if (!crtl->tail_call_emit
14048 && arm_size_return_regs () <= 12)
14053 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14055 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14064 offsets->saved_regs += 4;
14065 offsets->saved_regs_mask |= (1 << reg);
14070 offsets->locals_base = offsets->soft_frame + frame_size;
14071 offsets->outgoing_args = (offsets->locals_base
14072 + crtl->outgoing_args_size);
14074 if (ARM_DOUBLEWORD_ALIGN)
14076 /* Ensure SP remains doubleword aligned. */
14077 if (offsets->outgoing_args & 7)
14078 offsets->outgoing_args += 4;
14079 gcc_assert (!(offsets->outgoing_args & 7));
14086 /* Calculate the relative offsets for the different stack pointers. Positive
14087 offsets are in the direction of stack growth. */
14090 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14092 arm_stack_offsets *offsets;
14094 offsets = arm_get_frame_offsets ();
14096 /* OK, now we have enough information to compute the distances.
14097 There must be an entry in these switch tables for each pair
14098 of registers in ELIMINABLE_REGS, even if some of the entries
14099 seem to be redundant or useless. */
14102 case ARG_POINTER_REGNUM:
14105 case THUMB_HARD_FRAME_POINTER_REGNUM:
14108 case FRAME_POINTER_REGNUM:
14109 /* This is the reverse of the soft frame pointer
14110 to hard frame pointer elimination below. */
14111 return offsets->soft_frame - offsets->saved_args;
14113 case ARM_HARD_FRAME_POINTER_REGNUM:
14114 /* This is only non-zero in the case where the static chain register
14115 is stored above the frame. */
14116 return offsets->frame - offsets->saved_args - 4;
14118 case STACK_POINTER_REGNUM:
14119 /* If nothing has been pushed on the stack at all
14120 then this will return -4. This *is* correct! */
14121 return offsets->outgoing_args - (offsets->saved_args + 4);
14124 gcc_unreachable ();
14126 gcc_unreachable ();
14128 case FRAME_POINTER_REGNUM:
14131 case THUMB_HARD_FRAME_POINTER_REGNUM:
14134 case ARM_HARD_FRAME_POINTER_REGNUM:
14135 /* The hard frame pointer points to the top entry in the
14136 stack frame. The soft frame pointer to the bottom entry
14137 in the stack frame. If there is no stack frame at all,
14138 then they are identical. */
14140 return offsets->frame - offsets->soft_frame;
14142 case STACK_POINTER_REGNUM:
14143 return offsets->outgoing_args - offsets->soft_frame;
14146 gcc_unreachable ();
14148 gcc_unreachable ();
14151 /* You cannot eliminate from the stack pointer.
14152 In theory you could eliminate from the hard frame
14153 pointer to the stack pointer, but this will never
14154 happen, since if a stack frame is not needed the
14155 hard frame pointer will never be used. */
14156 gcc_unreachable ();
14160 /* Given FROM and TO register numbers, say whether this elimination is
14161 allowed. Frame pointer elimination is automatically handled.
14163 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14164 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14165 pointer, we must eliminate FRAME_POINTER_REGNUM into
14166 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14167 ARG_POINTER_REGNUM. */
14170 arm_can_eliminate (const int from, const int to)
14172 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14173 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14174 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14175 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14179 /* Emit RTL to save coprocessor registers on function entry. Returns the
14180 number of bytes pushed. */
14183 arm_save_coproc_regs(void)
14185 int saved_size = 0;
14187 unsigned start_reg;
14190 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14191 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14193 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14194 insn = gen_rtx_MEM (V2SImode, insn);
14195 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14196 RTX_FRAME_RELATED_P (insn) = 1;
14200 /* Save any floating point call-saved registers used by this
14202 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
14204 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14205 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14207 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14208 insn = gen_rtx_MEM (XFmode, insn);
14209 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14210 RTX_FRAME_RELATED_P (insn) = 1;
14216 start_reg = LAST_FPA_REGNUM;
14218 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14220 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14222 if (start_reg - reg == 3)
14224 insn = emit_sfm (reg, 4);
14225 RTX_FRAME_RELATED_P (insn) = 1;
14227 start_reg = reg - 1;
14232 if (start_reg != reg)
14234 insn = emit_sfm (reg + 1, start_reg - reg);
14235 RTX_FRAME_RELATED_P (insn) = 1;
14236 saved_size += (start_reg - reg) * 12;
14238 start_reg = reg - 1;
14242 if (start_reg != reg)
14244 insn = emit_sfm (reg + 1, start_reg - reg);
14245 saved_size += (start_reg - reg) * 12;
14246 RTX_FRAME_RELATED_P (insn) = 1;
14249 if (TARGET_HARD_FLOAT && TARGET_VFP)
14251 start_reg = FIRST_VFP_REGNUM;
14253 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14255 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14256 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14258 if (start_reg != reg)
14259 saved_size += vfp_emit_fstmd (start_reg,
14260 (reg - start_reg) / 2);
14261 start_reg = reg + 2;
14264 if (start_reg != reg)
14265 saved_size += vfp_emit_fstmd (start_reg,
14266 (reg - start_reg) / 2);
14272 /* Set the Thumb frame pointer from the stack pointer. */
14275 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14277 HOST_WIDE_INT amount;
14280 amount = offsets->outgoing_args - offsets->locals_base;
14282 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14283 stack_pointer_rtx, GEN_INT (amount)));
14286 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14287 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14288 expects the first two operands to be the same. */
14291 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14293 hard_frame_pointer_rtx));
14297 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14298 hard_frame_pointer_rtx,
14299 stack_pointer_rtx));
14301 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14302 plus_constant (stack_pointer_rtx, amount));
14303 RTX_FRAME_RELATED_P (dwarf) = 1;
14304 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14307 RTX_FRAME_RELATED_P (insn) = 1;
14310 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14313 arm_expand_prologue (void)
14318 unsigned long live_regs_mask;
14319 unsigned long func_type;
14321 int saved_pretend_args = 0;
14322 int saved_regs = 0;
14323 unsigned HOST_WIDE_INT args_to_push;
14324 arm_stack_offsets *offsets;
14326 func_type = arm_current_func_type ();
14328 /* Naked functions don't have prologues. */
14329 if (IS_NAKED (func_type))
14332 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14333 args_to_push = crtl->args.pretend_args_size;
14335 /* Compute which register we will have to save onto the stack. */
14336 offsets = arm_get_frame_offsets ();
14337 live_regs_mask = offsets->saved_regs_mask;
14339 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14341 if (IS_STACKALIGN (func_type))
14346 /* Handle a word-aligned stack pointer. We generate the following:
14351 <save and restore r0 in normal prologue/epilogue>
14355 The unwinder doesn't need to know about the stack realignment.
14356 Just tell it we saved SP in r0. */
14357 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14359 r0 = gen_rtx_REG (SImode, 0);
14360 r1 = gen_rtx_REG (SImode, 1);
14361 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14362 compiler won't choke. */
14363 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14364 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14365 insn = gen_movsi (r0, stack_pointer_rtx);
14366 RTX_FRAME_RELATED_P (insn) = 1;
14367 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14369 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14370 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14373 /* For APCS frames, if IP register is clobbered
14374 when creating frame, save that register in a special
14376 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14378 if (IS_INTERRUPT (func_type))
14380 /* Interrupt functions must not corrupt any registers.
14381 Creating a frame pointer however, corrupts the IP
14382 register, so we must push it first. */
14383 insn = emit_multi_reg_push (1 << IP_REGNUM);
14385 /* Do not set RTX_FRAME_RELATED_P on this insn.
14386 The dwarf stack unwinding code only wants to see one
14387 stack decrement per function, and this is not it. If
14388 this instruction is labeled as being part of the frame
14389 creation sequence then dwarf2out_frame_debug_expr will
14390 die when it encounters the assignment of IP to FP
14391 later on, since the use of SP here establishes SP as
14392 the CFA register and not IP.
14394 Anyway this instruction is not really part of the stack
14395 frame creation although it is part of the prologue. */
14397 else if (IS_NESTED (func_type))
14399 /* The Static chain register is the same as the IP register
14400 used as a scratch register during stack frame creation.
14401 To get around this need to find somewhere to store IP
14402 whilst the frame is being created. We try the following
14405 1. The last argument register.
14406 2. A slot on the stack above the frame. (This only
14407 works if the function is not a varargs function).
14408 3. Register r3, after pushing the argument registers
14411 Note - we only need to tell the dwarf2 backend about the SP
14412 adjustment in the second variant; the static chain register
14413 doesn't need to be unwound, as it doesn't contain a value
14414 inherited from the caller. */
14416 if (df_regs_ever_live_p (3) == false)
14417 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14418 else if (args_to_push == 0)
14422 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14425 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14426 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14429 /* Just tell the dwarf backend that we adjusted SP. */
14430 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14431 plus_constant (stack_pointer_rtx,
14433 RTX_FRAME_RELATED_P (insn) = 1;
14434 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14438 /* Store the args on the stack. */
14439 if (cfun->machine->uses_anonymous_args)
14440 insn = emit_multi_reg_push
14441 ((0xf0 >> (args_to_push / 4)) & 0xf);
14444 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14445 GEN_INT (- args_to_push)));
14447 RTX_FRAME_RELATED_P (insn) = 1;
14449 saved_pretend_args = 1;
14450 fp_offset = args_to_push;
14453 /* Now reuse r3 to preserve IP. */
14454 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14458 insn = emit_set_insn (ip_rtx,
14459 plus_constant (stack_pointer_rtx, fp_offset));
14460 RTX_FRAME_RELATED_P (insn) = 1;
14465 /* Push the argument registers, or reserve space for them. */
14466 if (cfun->machine->uses_anonymous_args)
14467 insn = emit_multi_reg_push
14468 ((0xf0 >> (args_to_push / 4)) & 0xf);
14471 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14472 GEN_INT (- args_to_push)));
14473 RTX_FRAME_RELATED_P (insn) = 1;
14476 /* If this is an interrupt service routine, and the link register
14477 is going to be pushed, and we're not generating extra
14478 push of IP (needed when frame is needed and frame layout if apcs),
14479 subtracting four from LR now will mean that the function return
14480 can be done with a single instruction. */
14481 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14482 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14483 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14486 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14488 emit_set_insn (lr, plus_constant (lr, -4));
14491 if (live_regs_mask)
14493 saved_regs += bit_count (live_regs_mask) * 4;
14494 if (optimize_size && !frame_pointer_needed
14495 && saved_regs == offsets->saved_regs - offsets->saved_args)
14497 /* If no coprocessor registers are being pushed and we don't have
14498 to worry about a frame pointer then push extra registers to
14499 create the stack frame. This is done is a way that does not
14500 alter the frame layout, so is independent of the epilogue. */
14504 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14506 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14507 if (frame && n * 4 >= frame)
14510 live_regs_mask |= (1 << n) - 1;
14511 saved_regs += frame;
14514 insn = emit_multi_reg_push (live_regs_mask);
14515 RTX_FRAME_RELATED_P (insn) = 1;
14518 if (! IS_VOLATILE (func_type))
14519 saved_regs += arm_save_coproc_regs ();
14521 if (frame_pointer_needed && TARGET_ARM)
14523 /* Create the new frame pointer. */
14524 if (TARGET_APCS_FRAME)
14526 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14527 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14528 RTX_FRAME_RELATED_P (insn) = 1;
14530 if (IS_NESTED (func_type))
14532 /* Recover the static chain register. */
14533 if (!df_regs_ever_live_p (3)
14534 || saved_pretend_args)
14535 insn = gen_rtx_REG (SImode, 3);
14536 else /* if (crtl->args.pretend_args_size == 0) */
14538 insn = plus_constant (hard_frame_pointer_rtx, 4);
14539 insn = gen_frame_mem (SImode, insn);
14541 emit_set_insn (ip_rtx, insn);
14542 /* Add a USE to stop propagate_one_insn() from barfing. */
14543 emit_insn (gen_prologue_use (ip_rtx));
14548 insn = GEN_INT (saved_regs - 4);
14549 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14550 stack_pointer_rtx, insn));
14551 RTX_FRAME_RELATED_P (insn) = 1;
14555 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14557 /* This add can produce multiple insns for a large constant, so we
14558 need to get tricky. */
14559 rtx last = get_last_insn ();
14561 amount = GEN_INT (offsets->saved_args + saved_regs
14562 - offsets->outgoing_args);
14564 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14568 last = last ? NEXT_INSN (last) : get_insns ();
14569 RTX_FRAME_RELATED_P (last) = 1;
14571 while (last != insn);
14573 /* If the frame pointer is needed, emit a special barrier that
14574 will prevent the scheduler from moving stores to the frame
14575 before the stack adjustment. */
14576 if (frame_pointer_needed)
14577 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14578 hard_frame_pointer_rtx));
14582 if (frame_pointer_needed && TARGET_THUMB2)
14583 thumb_set_frame_pointer (offsets);
14585 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14587 unsigned long mask;
14589 mask = live_regs_mask;
14590 mask &= THUMB2_WORK_REGS;
14591 if (!IS_NESTED (func_type))
14592 mask |= (1 << IP_REGNUM);
14593 arm_load_pic_register (mask);
14596 /* If we are profiling, make sure no instructions are scheduled before
14597 the call to mcount. Similarly if the user has requested no
14598 scheduling in the prolog. Similarly if we want non-call exceptions
14599 using the EABI unwinder, to prevent faulting instructions from being
14600 swapped with a stack adjustment. */
14601 if (crtl->profile || !TARGET_SCHED_PROLOG
14602 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14603 emit_insn (gen_blockage ());
14605 /* If the link register is being kept alive, with the return address in it,
14606 then make sure that it does not get reused by the ce2 pass. */
14607 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14608 cfun->machine->lr_save_eliminated = 1;
14611 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14613 arm_print_condition (FILE *stream)
14615 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14617 /* Branch conversion is not implemented for Thumb-2. */
14620 output_operand_lossage ("predicated Thumb instruction");
14623 if (current_insn_predicate != NULL)
14625 output_operand_lossage
14626 ("predicated instruction in conditional sequence");
14630 fputs (arm_condition_codes[arm_current_cc], stream);
14632 else if (current_insn_predicate)
14634 enum arm_cond_code code;
14638 output_operand_lossage ("predicated Thumb instruction");
14642 code = get_arm_condition_code (current_insn_predicate);
14643 fputs (arm_condition_codes[code], stream);
14648 /* If CODE is 'd', then the X is a condition operand and the instruction
14649 should only be executed if the condition is true.
14650 if CODE is 'D', then the X is a condition operand and the instruction
14651 should only be executed if the condition is false: however, if the mode
14652 of the comparison is CCFPEmode, then always execute the instruction -- we
14653 do this because in these circumstances !GE does not necessarily imply LT;
14654 in these cases the instruction pattern will take care to make sure that
14655 an instruction containing %d will follow, thereby undoing the effects of
14656 doing this instruction unconditionally.
14657 If CODE is 'N' then X is a floating point operand that must be negated
14659 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14660 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14662 arm_print_operand (FILE *stream, rtx x, int code)
14667 fputs (ASM_COMMENT_START, stream);
14671 fputs (user_label_prefix, stream);
14675 fputs (REGISTER_PREFIX, stream);
14679 arm_print_condition (stream);
14683 /* Nothing in unified syntax, otherwise the current condition code. */
14684 if (!TARGET_UNIFIED_ASM)
14685 arm_print_condition (stream);
14689 /* The current condition code in unified syntax, otherwise nothing. */
14690 if (TARGET_UNIFIED_ASM)
14691 arm_print_condition (stream);
14695 /* The current condition code for a condition code setting instruction.
14696 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14697 if (TARGET_UNIFIED_ASM)
14699 fputc('s', stream);
14700 arm_print_condition (stream);
14704 arm_print_condition (stream);
14705 fputc('s', stream);
14710 /* If the instruction is conditionally executed then print
14711 the current condition code, otherwise print 's'. */
14712 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14713 if (current_insn_predicate)
14714 arm_print_condition (stream);
14716 fputc('s', stream);
14719 /* %# is a "break" sequence. It doesn't output anything, but is used to
14720 separate e.g. operand numbers from following text, if that text consists
14721 of further digits which we don't want to be part of the operand
14729 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14730 r = REAL_VALUE_NEGATE (r);
14731 fprintf (stream, "%s", fp_const_from_val (&r));
14735 /* An integer or symbol address without a preceding # sign. */
14737 switch (GET_CODE (x))
14740 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14744 output_addr_const (stream, x);
14748 gcc_unreachable ();
14753 if (GET_CODE (x) == CONST_INT)
14756 val = ARM_SIGN_EXTEND (~INTVAL (x));
14757 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14761 putc ('~', stream);
14762 output_addr_const (stream, x);
14767 /* The low 16 bits of an immediate constant. */
14768 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
14772 fprintf (stream, "%s", arithmetic_instr (x, 1));
14775 /* Truncate Cirrus shift counts. */
14777 if (GET_CODE (x) == CONST_INT)
14779 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
14782 arm_print_operand (stream, x, 0);
14786 fprintf (stream, "%s", arithmetic_instr (x, 0));
14794 if (!shift_operator (x, SImode))
14796 output_operand_lossage ("invalid shift operand");
14800 shift = shift_op (x, &val);
14804 fprintf (stream, ", %s ", shift);
14806 arm_print_operand (stream, XEXP (x, 1), 0);
14808 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
14813 /* An explanation of the 'Q', 'R' and 'H' register operands:
14815 In a pair of registers containing a DI or DF value the 'Q'
14816 operand returns the register number of the register containing
14817 the least significant part of the value. The 'R' operand returns
14818 the register number of the register containing the most
14819 significant part of the value.
14821 The 'H' operand returns the higher of the two register numbers.
14822 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
14823 same as the 'Q' operand, since the most significant part of the
14824 value is held in the lower number register. The reverse is true
14825 on systems where WORDS_BIG_ENDIAN is false.
14827 The purpose of these operands is to distinguish between cases
14828 where the endian-ness of the values is important (for example
14829 when they are added together), and cases where the endian-ness
14830 is irrelevant, but the order of register operations is important.
14831 For example when loading a value from memory into a register
14832 pair, the endian-ness does not matter. Provided that the value
14833 from the lower memory address is put into the lower numbered
14834 register, and the value from the higher address is put into the
14835 higher numbered register, the load will work regardless of whether
14836 the value being loaded is big-wordian or little-wordian. The
14837 order of the two register loads can matter however, if the address
14838 of the memory location is actually held in one of the registers
14839 being overwritten by the load. */
14841 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14843 output_operand_lossage ("invalid operand for code '%c'", code);
14847 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
14851 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14853 output_operand_lossage ("invalid operand for code '%c'", code);
14857 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
14861 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14863 output_operand_lossage ("invalid operand for code '%c'", code);
14867 asm_fprintf (stream, "%r", REGNO (x) + 1);
14871 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14873 output_operand_lossage ("invalid operand for code '%c'", code);
14877 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
14881 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14883 output_operand_lossage ("invalid operand for code '%c'", code);
14887 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
14891 asm_fprintf (stream, "%r",
14892 GET_CODE (XEXP (x, 0)) == REG
14893 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
14897 asm_fprintf (stream, "{%r-%r}",
14899 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
14902 /* Like 'M', but writing doubleword vector registers, for use by Neon
14906 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
14907 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
14909 asm_fprintf (stream, "{d%d}", regno);
14911 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
14916 /* CONST_TRUE_RTX means always -- that's the default. */
14917 if (x == const_true_rtx)
14920 if (!COMPARISON_P (x))
14922 output_operand_lossage ("invalid operand for code '%c'", code);
14926 fputs (arm_condition_codes[get_arm_condition_code (x)],
14931 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
14932 want to do that. */
14933 if (x == const_true_rtx)
14935 output_operand_lossage ("instruction never executed");
14938 if (!COMPARISON_P (x))
14940 output_operand_lossage ("invalid operand for code '%c'", code);
14944 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
14945 (get_arm_condition_code (x))],
14949 /* Cirrus registers can be accessed in a variety of ways:
14950 single floating point (f)
14951 double floating point (d)
14953 64bit integer (dx). */
14954 case 'W': /* Cirrus register in F mode. */
14955 case 'X': /* Cirrus register in D mode. */
14956 case 'Y': /* Cirrus register in FX mode. */
14957 case 'Z': /* Cirrus register in DX mode. */
14958 gcc_assert (GET_CODE (x) == REG
14959 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
14961 fprintf (stream, "mv%s%s",
14963 : code == 'X' ? "d"
14964 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
14968 /* Print cirrus register in the mode specified by the register's mode. */
14971 int mode = GET_MODE (x);
14973 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
14975 output_operand_lossage ("invalid operand for code '%c'", code);
14979 fprintf (stream, "mv%s%s",
14980 mode == DFmode ? "d"
14981 : mode == SImode ? "fx"
14982 : mode == DImode ? "dx"
14983 : "f", reg_names[REGNO (x)] + 2);
14989 if (GET_CODE (x) != REG
14990 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
14991 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
14992 /* Bad value for wCG register number. */
14994 output_operand_lossage ("invalid operand for code '%c'", code);
14999 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15002 /* Print an iWMMXt control register name. */
15004 if (GET_CODE (x) != CONST_INT
15006 || INTVAL (x) >= 16)
15007 /* Bad value for wC register number. */
15009 output_operand_lossage ("invalid operand for code '%c'", code);
15015 static const char * wc_reg_names [16] =
15017 "wCID", "wCon", "wCSSF", "wCASF",
15018 "wC4", "wC5", "wC6", "wC7",
15019 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15020 "wC12", "wC13", "wC14", "wC15"
15023 fprintf (stream, wc_reg_names [INTVAL (x)]);
15027 /* Print a VFP/Neon double precision or quad precision register name. */
15031 int mode = GET_MODE (x);
15032 int is_quad = (code == 'q');
15035 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15037 output_operand_lossage ("invalid operand for code '%c'", code);
15041 if (GET_CODE (x) != REG
15042 || !IS_VFP_REGNUM (REGNO (x)))
15044 output_operand_lossage ("invalid operand for code '%c'", code);
15049 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15050 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15052 output_operand_lossage ("invalid operand for code '%c'", code);
15056 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15057 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15061 /* These two codes print the low/high doubleword register of a Neon quad
15062 register, respectively. For pair-structure types, can also print
15063 low/high quadword registers. */
15067 int mode = GET_MODE (x);
15070 if ((GET_MODE_SIZE (mode) != 16
15071 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15073 output_operand_lossage ("invalid operand for code '%c'", code);
15078 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15080 output_operand_lossage ("invalid operand for code '%c'", code);
15084 if (GET_MODE_SIZE (mode) == 16)
15085 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15086 + (code == 'f' ? 1 : 0));
15088 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15089 + (code == 'f' ? 1 : 0));
15093 /* Print a VFPv3 floating-point constant, represented as an integer
15097 int index = vfp3_const_double_index (x);
15098 gcc_assert (index != -1);
15099 fprintf (stream, "%d", index);
15103 /* Print bits representing opcode features for Neon.
15105 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15106 and polynomials as unsigned.
15108 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15110 Bit 2 is 1 for rounding functions, 0 otherwise. */
15112 /* Identify the type as 's', 'u', 'p' or 'f'. */
15115 HOST_WIDE_INT bits = INTVAL (x);
15116 fputc ("uspf"[bits & 3], stream);
15120 /* Likewise, but signed and unsigned integers are both 'i'. */
15123 HOST_WIDE_INT bits = INTVAL (x);
15124 fputc ("iipf"[bits & 3], stream);
15128 /* As for 'T', but emit 'u' instead of 'p'. */
15131 HOST_WIDE_INT bits = INTVAL (x);
15132 fputc ("usuf"[bits & 3], stream);
15136 /* Bit 2: rounding (vs none). */
15139 HOST_WIDE_INT bits = INTVAL (x);
15140 fputs ((bits & 4) != 0 ? "r" : "", stream);
15144 /* Memory operand for vld1/vst1 instruction. */
15148 bool postinc = FALSE;
15149 gcc_assert (GET_CODE (x) == MEM);
15150 addr = XEXP (x, 0);
15151 if (GET_CODE (addr) == POST_INC)
15154 addr = XEXP (addr, 0);
15156 asm_fprintf (stream, "[%r]", REGNO (addr));
15158 fputs("!", stream);
15162 /* Register specifier for vld1.16/vst1.16. Translate the S register
15163 number into a D register number and element index. */
15166 int mode = GET_MODE (x);
15169 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15171 output_operand_lossage ("invalid operand for code '%c'", code);
15176 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15178 output_operand_lossage ("invalid operand for code '%c'", code);
15182 regno = regno - FIRST_VFP_REGNUM;
15183 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15190 output_operand_lossage ("missing operand");
15194 switch (GET_CODE (x))
15197 asm_fprintf (stream, "%r", REGNO (x));
15201 output_memory_reference_mode = GET_MODE (x);
15202 output_address (XEXP (x, 0));
15209 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15210 sizeof (fpstr), 0, 1);
15211 fprintf (stream, "#%s", fpstr);
15214 fprintf (stream, "#%s", fp_immediate_constant (x));
15218 gcc_assert (GET_CODE (x) != NEG);
15219 fputc ('#', stream);
15220 if (GET_CODE (x) == HIGH)
15222 fputs (":lower16:", stream);
15226 output_addr_const (stream, x);
15232 /* Target hook for assembling integer objects. The ARM version needs to
15233 handle word-sized values specially. */
15235 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15237 enum machine_mode mode;
15239 if (size == UNITS_PER_WORD && aligned_p)
15241 fputs ("\t.word\t", asm_out_file);
15242 output_addr_const (asm_out_file, x);
15244 /* Mark symbols as position independent. We only do this in the
15245 .text segment, not in the .data segment. */
15246 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15247 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15249 /* See legitimize_pic_address for an explanation of the
15250 TARGET_VXWORKS_RTP check. */
15251 if (TARGET_VXWORKS_RTP
15252 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15253 fputs ("(GOT)", asm_out_file);
15255 fputs ("(GOTOFF)", asm_out_file);
15257 fputc ('\n', asm_out_file);
15261 mode = GET_MODE (x);
15263 if (arm_vector_mode_supported_p (mode))
15267 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15269 units = CONST_VECTOR_NUNITS (x);
15270 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15272 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15273 for (i = 0; i < units; i++)
15275 rtx elt = CONST_VECTOR_ELT (x, i);
15277 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15280 for (i = 0; i < units; i++)
15282 rtx elt = CONST_VECTOR_ELT (x, i);
15283 REAL_VALUE_TYPE rval;
15285 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15288 (rval, GET_MODE_INNER (mode),
15289 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15295 return default_assemble_integer (x, size, aligned_p);
15299 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15303 if (!TARGET_AAPCS_BASED)
15306 default_named_section_asm_out_constructor
15307 : default_named_section_asm_out_destructor) (symbol, priority);
15311 /* Put these in the .init_array section, using a special relocation. */
15312 if (priority != DEFAULT_INIT_PRIORITY)
15315 sprintf (buf, "%s.%.5u",
15316 is_ctor ? ".init_array" : ".fini_array",
15318 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15325 switch_to_section (s);
15326 assemble_align (POINTER_SIZE);
15327 fputs ("\t.word\t", asm_out_file);
15328 output_addr_const (asm_out_file, symbol);
15329 fputs ("(target1)\n", asm_out_file);
15332 /* Add a function to the list of static constructors. */
15335 arm_elf_asm_constructor (rtx symbol, int priority)
15337 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15340 /* Add a function to the list of static destructors. */
15343 arm_elf_asm_destructor (rtx symbol, int priority)
15345 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15348 /* A finite state machine takes care of noticing whether or not instructions
15349 can be conditionally executed, and thus decrease execution time and code
15350 size by deleting branch instructions. The fsm is controlled by
15351 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15353 /* The state of the fsm controlling condition codes are:
15354 0: normal, do nothing special
15355 1: make ASM_OUTPUT_OPCODE not output this instruction
15356 2: make ASM_OUTPUT_OPCODE not output this instruction
15357 3: make instructions conditional
15358 4: make instructions conditional
15360 State transitions (state->state by whom under condition):
15361 0 -> 1 final_prescan_insn if the `target' is a label
15362 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15363 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15364 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15365 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15366 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15367 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15368 (the target insn is arm_target_insn).
15370 If the jump clobbers the conditions then we use states 2 and 4.
15372 A similar thing can be done with conditional return insns.
15374 XXX In case the `target' is an unconditional branch, this conditionalising
15375 of the instructions always reduces code size, but not always execution
15376 time. But then, I want to reduce the code size to somewhere near what
15377 /bin/cc produces. */
15379 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15380 instructions. When a COND_EXEC instruction is seen the subsequent
15381 instructions are scanned so that multiple conditional instructions can be
15382 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15383 specify the length and true/false mask for the IT block. These will be
15384 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15386 /* Returns the index of the ARM condition code string in
15387 `arm_condition_codes'. COMPARISON should be an rtx like
15388 `(eq (...) (...))'. */
15389 static enum arm_cond_code
15390 get_arm_condition_code (rtx comparison)
15392 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15393 enum arm_cond_code code;
15394 enum rtx_code comp_code = GET_CODE (comparison);
15396 if (GET_MODE_CLASS (mode) != MODE_CC)
15397 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15398 XEXP (comparison, 1));
15402 case CC_DNEmode: code = ARM_NE; goto dominance;
15403 case CC_DEQmode: code = ARM_EQ; goto dominance;
15404 case CC_DGEmode: code = ARM_GE; goto dominance;
15405 case CC_DGTmode: code = ARM_GT; goto dominance;
15406 case CC_DLEmode: code = ARM_LE; goto dominance;
15407 case CC_DLTmode: code = ARM_LT; goto dominance;
15408 case CC_DGEUmode: code = ARM_CS; goto dominance;
15409 case CC_DGTUmode: code = ARM_HI; goto dominance;
15410 case CC_DLEUmode: code = ARM_LS; goto dominance;
15411 case CC_DLTUmode: code = ARM_CC;
15414 gcc_assert (comp_code == EQ || comp_code == NE);
15416 if (comp_code == EQ)
15417 return ARM_INVERSE_CONDITION_CODE (code);
15423 case NE: return ARM_NE;
15424 case EQ: return ARM_EQ;
15425 case GE: return ARM_PL;
15426 case LT: return ARM_MI;
15427 default: gcc_unreachable ();
15433 case NE: return ARM_NE;
15434 case EQ: return ARM_EQ;
15435 default: gcc_unreachable ();
15441 case NE: return ARM_MI;
15442 case EQ: return ARM_PL;
15443 default: gcc_unreachable ();
15448 /* These encodings assume that AC=1 in the FPA system control
15449 byte. This allows us to handle all cases except UNEQ and
15453 case GE: return ARM_GE;
15454 case GT: return ARM_GT;
15455 case LE: return ARM_LS;
15456 case LT: return ARM_MI;
15457 case NE: return ARM_NE;
15458 case EQ: return ARM_EQ;
15459 case ORDERED: return ARM_VC;
15460 case UNORDERED: return ARM_VS;
15461 case UNLT: return ARM_LT;
15462 case UNLE: return ARM_LE;
15463 case UNGT: return ARM_HI;
15464 case UNGE: return ARM_PL;
15465 /* UNEQ and LTGT do not have a representation. */
15466 case UNEQ: /* Fall through. */
15467 case LTGT: /* Fall through. */
15468 default: gcc_unreachable ();
15474 case NE: return ARM_NE;
15475 case EQ: return ARM_EQ;
15476 case GE: return ARM_LE;
15477 case GT: return ARM_LT;
15478 case LE: return ARM_GE;
15479 case LT: return ARM_GT;
15480 case GEU: return ARM_LS;
15481 case GTU: return ARM_CC;
15482 case LEU: return ARM_CS;
15483 case LTU: return ARM_HI;
15484 default: gcc_unreachable ();
15490 case LTU: return ARM_CS;
15491 case GEU: return ARM_CC;
15492 default: gcc_unreachable ();
15498 case NE: return ARM_NE;
15499 case EQ: return ARM_EQ;
15500 case GE: return ARM_GE;
15501 case GT: return ARM_GT;
15502 case LE: return ARM_LE;
15503 case LT: return ARM_LT;
15504 case GEU: return ARM_CS;
15505 case GTU: return ARM_HI;
15506 case LEU: return ARM_LS;
15507 case LTU: return ARM_CC;
15508 default: gcc_unreachable ();
15511 default: gcc_unreachable ();
15515 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15518 thumb2_final_prescan_insn (rtx insn)
15520 rtx first_insn = insn;
15521 rtx body = PATTERN (insn);
15523 enum arm_cond_code code;
15527 /* Remove the previous insn from the count of insns to be output. */
15528 if (arm_condexec_count)
15529 arm_condexec_count--;
15531 /* Nothing to do if we are already inside a conditional block. */
15532 if (arm_condexec_count)
15535 if (GET_CODE (body) != COND_EXEC)
15538 /* Conditional jumps are implemented directly. */
15539 if (GET_CODE (insn) == JUMP_INSN)
15542 predicate = COND_EXEC_TEST (body);
15543 arm_current_cc = get_arm_condition_code (predicate);
15545 n = get_attr_ce_count (insn);
15546 arm_condexec_count = 1;
15547 arm_condexec_mask = (1 << n) - 1;
15548 arm_condexec_masklen = n;
15549 /* See if subsequent instructions can be combined into the same block. */
15552 insn = next_nonnote_insn (insn);
15554 /* Jumping into the middle of an IT block is illegal, so a label or
15555 barrier terminates the block. */
15556 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15559 body = PATTERN (insn);
15560 /* USE and CLOBBER aren't really insns, so just skip them. */
15561 if (GET_CODE (body) == USE
15562 || GET_CODE (body) == CLOBBER)
15565 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15566 if (GET_CODE (body) != COND_EXEC)
15568 /* Allow up to 4 conditionally executed instructions in a block. */
15569 n = get_attr_ce_count (insn);
15570 if (arm_condexec_masklen + n > 4)
15573 predicate = COND_EXEC_TEST (body);
15574 code = get_arm_condition_code (predicate);
15575 mask = (1 << n) - 1;
15576 if (arm_current_cc == code)
15577 arm_condexec_mask |= (mask << arm_condexec_masklen);
15578 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15581 arm_condexec_count++;
15582 arm_condexec_masklen += n;
15584 /* A jump must be the last instruction in a conditional block. */
15585 if (GET_CODE(insn) == JUMP_INSN)
15588 /* Restore recog_data (getting the attributes of other insns can
15589 destroy this array, but final.c assumes that it remains intact
15590 across this call). */
15591 extract_constrain_insn_cached (first_insn);
15595 arm_final_prescan_insn (rtx insn)
15597 /* BODY will hold the body of INSN. */
15598 rtx body = PATTERN (insn);
15600 /* This will be 1 if trying to repeat the trick, and things need to be
15601 reversed if it appears to fail. */
15604 /* If we start with a return insn, we only succeed if we find another one. */
15605 int seeking_return = 0;
15607 /* START_INSN will hold the insn from where we start looking. This is the
15608 first insn after the following code_label if REVERSE is true. */
15609 rtx start_insn = insn;
15611 /* If in state 4, check if the target branch is reached, in order to
15612 change back to state 0. */
15613 if (arm_ccfsm_state == 4)
15615 if (insn == arm_target_insn)
15617 arm_target_insn = NULL;
15618 arm_ccfsm_state = 0;
15623 /* If in state 3, it is possible to repeat the trick, if this insn is an
15624 unconditional branch to a label, and immediately following this branch
15625 is the previous target label which is only used once, and the label this
15626 branch jumps to is not too far off. */
15627 if (arm_ccfsm_state == 3)
15629 if (simplejump_p (insn))
15631 start_insn = next_nonnote_insn (start_insn);
15632 if (GET_CODE (start_insn) == BARRIER)
15634 /* XXX Isn't this always a barrier? */
15635 start_insn = next_nonnote_insn (start_insn);
15637 if (GET_CODE (start_insn) == CODE_LABEL
15638 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15639 && LABEL_NUSES (start_insn) == 1)
15644 else if (GET_CODE (body) == RETURN)
15646 start_insn = next_nonnote_insn (start_insn);
15647 if (GET_CODE (start_insn) == BARRIER)
15648 start_insn = next_nonnote_insn (start_insn);
15649 if (GET_CODE (start_insn) == CODE_LABEL
15650 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15651 && LABEL_NUSES (start_insn) == 1)
15654 seeking_return = 1;
15663 gcc_assert (!arm_ccfsm_state || reverse);
15664 if (GET_CODE (insn) != JUMP_INSN)
15667 /* This jump might be paralleled with a clobber of the condition codes
15668 the jump should always come first */
15669 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15670 body = XVECEXP (body, 0, 0);
15673 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15674 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15677 int fail = FALSE, succeed = FALSE;
15678 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15679 int then_not_else = TRUE;
15680 rtx this_insn = start_insn, label = 0;
15682 /* Register the insn jumped to. */
15685 if (!seeking_return)
15686 label = XEXP (SET_SRC (body), 0);
15688 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15689 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15690 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15692 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15693 then_not_else = FALSE;
15695 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15696 seeking_return = 1;
15697 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15699 seeking_return = 1;
15700 then_not_else = FALSE;
15703 gcc_unreachable ();
15705 /* See how many insns this branch skips, and what kind of insns. If all
15706 insns are okay, and the label or unconditional branch to the same
15707 label is not too far away, succeed. */
15708 for (insns_skipped = 0;
15709 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15713 this_insn = next_nonnote_insn (this_insn);
15717 switch (GET_CODE (this_insn))
15720 /* Succeed if it is the target label, otherwise fail since
15721 control falls in from somewhere else. */
15722 if (this_insn == label)
15724 arm_ccfsm_state = 1;
15732 /* Succeed if the following insn is the target label.
15734 If return insns are used then the last insn in a function
15735 will be a barrier. */
15736 this_insn = next_nonnote_insn (this_insn);
15737 if (this_insn && this_insn == label)
15739 arm_ccfsm_state = 1;
15747 /* The AAPCS says that conditional calls should not be
15748 used since they make interworking inefficient (the
15749 linker can't transform BL<cond> into BLX). That's
15750 only a problem if the machine has BLX. */
15757 /* Succeed if the following insn is the target label, or
15758 if the following two insns are a barrier and the
15760 this_insn = next_nonnote_insn (this_insn);
15761 if (this_insn && GET_CODE (this_insn) == BARRIER)
15762 this_insn = next_nonnote_insn (this_insn);
15764 if (this_insn && this_insn == label
15765 && insns_skipped < max_insns_skipped)
15767 arm_ccfsm_state = 1;
15775 /* If this is an unconditional branch to the same label, succeed.
15776 If it is to another label, do nothing. If it is conditional,
15778 /* XXX Probably, the tests for SET and the PC are
15781 scanbody = PATTERN (this_insn);
15782 if (GET_CODE (scanbody) == SET
15783 && GET_CODE (SET_DEST (scanbody)) == PC)
15785 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
15786 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
15788 arm_ccfsm_state = 2;
15791 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
15794 /* Fail if a conditional return is undesirable (e.g. on a
15795 StrongARM), but still allow this if optimizing for size. */
15796 else if (GET_CODE (scanbody) == RETURN
15797 && !use_return_insn (TRUE, NULL)
15800 else if (GET_CODE (scanbody) == RETURN
15803 arm_ccfsm_state = 2;
15806 else if (GET_CODE (scanbody) == PARALLEL)
15808 switch (get_attr_conds (this_insn))
15818 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
15823 /* Instructions using or affecting the condition codes make it
15825 scanbody = PATTERN (this_insn);
15826 if (!(GET_CODE (scanbody) == SET
15827 || GET_CODE (scanbody) == PARALLEL)
15828 || get_attr_conds (this_insn) != CONDS_NOCOND)
15831 /* A conditional cirrus instruction must be followed by
15832 a non Cirrus instruction. However, since we
15833 conditionalize instructions in this function and by
15834 the time we get here we can't add instructions
15835 (nops), because shorten_branches() has already been
15836 called, we will disable conditionalizing Cirrus
15837 instructions to be safe. */
15838 if (GET_CODE (scanbody) != USE
15839 && GET_CODE (scanbody) != CLOBBER
15840 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
15850 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
15851 arm_target_label = CODE_LABEL_NUMBER (label);
15854 gcc_assert (seeking_return || arm_ccfsm_state == 2);
15856 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
15858 this_insn = next_nonnote_insn (this_insn);
15859 gcc_assert (!this_insn
15860 || (GET_CODE (this_insn) != BARRIER
15861 && GET_CODE (this_insn) != CODE_LABEL));
15865 /* Oh, dear! we ran off the end.. give up. */
15866 extract_constrain_insn_cached (insn);
15867 arm_ccfsm_state = 0;
15868 arm_target_insn = NULL;
15871 arm_target_insn = this_insn;
15874 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
15877 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
15879 if (reverse || then_not_else)
15880 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
15883 /* Restore recog_data (getting the attributes of other insns can
15884 destroy this array, but final.c assumes that it remains intact
15885 across this call. */
15886 extract_constrain_insn_cached (insn);
15890 /* Output IT instructions. */
15892 thumb2_asm_output_opcode (FILE * stream)
15897 if (arm_condexec_mask)
15899 for (n = 0; n < arm_condexec_masklen; n++)
15900 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
15902 asm_fprintf(stream, "i%s\t%s\n\t", buff,
15903 arm_condition_codes[arm_current_cc]);
15904 arm_condexec_mask = 0;
15908 /* Returns true if REGNO is a valid register
15909 for holding a quantity of type MODE. */
15911 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
15913 if (GET_MODE_CLASS (mode) == MODE_CC)
15914 return (regno == CC_REGNUM
15915 || (TARGET_HARD_FLOAT && TARGET_VFP
15916 && regno == VFPCC_REGNUM));
15919 /* For the Thumb we only allow values bigger than SImode in
15920 registers 0 - 6, so that there is always a second low
15921 register available to hold the upper part of the value.
15922 We probably we ought to ensure that the register is the
15923 start of an even numbered register pair. */
15924 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
15926 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
15927 && IS_CIRRUS_REGNUM (regno))
15928 /* We have outlawed SI values in Cirrus registers because they
15929 reside in the lower 32 bits, but SF values reside in the
15930 upper 32 bits. This causes gcc all sorts of grief. We can't
15931 even split the registers into pairs because Cirrus SI values
15932 get sign extended to 64bits-- aldyh. */
15933 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
15935 if (TARGET_HARD_FLOAT && TARGET_VFP
15936 && IS_VFP_REGNUM (regno))
15938 if (mode == SFmode || mode == SImode)
15939 return VFP_REGNO_OK_FOR_SINGLE (regno);
15941 if (mode == DFmode)
15942 return VFP_REGNO_OK_FOR_DOUBLE (regno);
15944 /* VFP registers can hold HFmode values, but there is no point in
15945 putting them there unless we have the NEON extensions for
15946 loading/storing them, too. */
15947 if (mode == HFmode)
15948 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
15951 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
15952 || (VALID_NEON_QREG_MODE (mode)
15953 && NEON_REGNO_OK_FOR_QUAD (regno))
15954 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
15955 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
15956 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
15957 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
15958 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
15963 if (TARGET_REALLY_IWMMXT)
15965 if (IS_IWMMXT_GR_REGNUM (regno))
15966 return mode == SImode;
15968 if (IS_IWMMXT_REGNUM (regno))
15969 return VALID_IWMMXT_REG_MODE (mode);
15972 /* We allow almost any value to be stored in the general registers.
15973 Restrict doubleword quantities to even register pairs so that we can
15974 use ldrd. Do not allow very large Neon structure opaque modes in
15975 general registers; they would use too many. */
15976 if (regno <= LAST_ARM_REGNUM)
15977 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
15978 && ARM_NUM_REGS (mode) <= 4;
15980 if (regno == FRAME_POINTER_REGNUM
15981 || regno == ARG_POINTER_REGNUM)
15982 /* We only allow integers in the fake hard registers. */
15983 return GET_MODE_CLASS (mode) == MODE_INT;
15985 /* The only registers left are the FPA registers
15986 which we only allow to hold FP values. */
15987 return (TARGET_HARD_FLOAT && TARGET_FPA
15988 && GET_MODE_CLASS (mode) == MODE_FLOAT
15989 && regno >= FIRST_FPA_REGNUM
15990 && regno <= LAST_FPA_REGNUM);
15993 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
15994 not used in arm mode. */
15997 arm_regno_class (int regno)
16001 if (regno == STACK_POINTER_REGNUM)
16003 if (regno == CC_REGNUM)
16010 if (TARGET_THUMB2 && regno < 8)
16013 if ( regno <= LAST_ARM_REGNUM
16014 || regno == FRAME_POINTER_REGNUM
16015 || regno == ARG_POINTER_REGNUM)
16016 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16018 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16019 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16021 if (IS_CIRRUS_REGNUM (regno))
16022 return CIRRUS_REGS;
16024 if (IS_VFP_REGNUM (regno))
16026 if (regno <= D7_VFP_REGNUM)
16027 return VFP_D0_D7_REGS;
16028 else if (regno <= LAST_LO_VFP_REGNUM)
16029 return VFP_LO_REGS;
16031 return VFP_HI_REGS;
16034 if (IS_IWMMXT_REGNUM (regno))
16035 return IWMMXT_REGS;
16037 if (IS_IWMMXT_GR_REGNUM (regno))
16038 return IWMMXT_GR_REGS;
16043 /* Handle a special case when computing the offset
16044 of an argument from the frame pointer. */
16046 arm_debugger_arg_offset (int value, rtx addr)
16050 /* We are only interested if dbxout_parms() failed to compute the offset. */
16054 /* We can only cope with the case where the address is held in a register. */
16055 if (GET_CODE (addr) != REG)
16058 /* If we are using the frame pointer to point at the argument, then
16059 an offset of 0 is correct. */
16060 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16063 /* If we are using the stack pointer to point at the
16064 argument, then an offset of 0 is correct. */
16065 /* ??? Check this is consistent with thumb2 frame layout. */
16066 if ((TARGET_THUMB || !frame_pointer_needed)
16067 && REGNO (addr) == SP_REGNUM)
16070 /* Oh dear. The argument is pointed to by a register rather
16071 than being held in a register, or being stored at a known
16072 offset from the frame pointer. Since GDB only understands
16073 those two kinds of argument we must translate the address
16074 held in the register into an offset from the frame pointer.
16075 We do this by searching through the insns for the function
16076 looking to see where this register gets its value. If the
16077 register is initialized from the frame pointer plus an offset
16078 then we are in luck and we can continue, otherwise we give up.
16080 This code is exercised by producing debugging information
16081 for a function with arguments like this:
16083 double func (double a, double b, int c, double d) {return d;}
16085 Without this code the stab for parameter 'd' will be set to
16086 an offset of 0 from the frame pointer, rather than 8. */
16088 /* The if() statement says:
16090 If the insn is a normal instruction
16091 and if the insn is setting the value in a register
16092 and if the register being set is the register holding the address of the argument
16093 and if the address is computing by an addition
16094 that involves adding to a register
16095 which is the frame pointer
16100 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16102 if ( GET_CODE (insn) == INSN
16103 && GET_CODE (PATTERN (insn)) == SET
16104 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16105 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16106 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16107 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16108 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16111 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16120 warning (0, "unable to compute real location of stacked parameter");
16121 value = 8; /* XXX magic hack */
16127 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16130 if ((MASK) & insn_flags) \
16131 add_builtin_function ((NAME), (TYPE), (CODE), \
16132 BUILT_IN_MD, NULL, NULL_TREE); \
16136 struct builtin_description
16138 const unsigned int mask;
16139 const enum insn_code icode;
16140 const char * const name;
16141 const enum arm_builtins code;
16142 const enum rtx_code comparison;
16143 const unsigned int flag;
16146 static const struct builtin_description bdesc_2arg[] =
16148 #define IWMMXT_BUILTIN(code, string, builtin) \
16149 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16150 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16152 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16153 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16154 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16155 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16156 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16157 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16158 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16159 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16160 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16161 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16162 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16163 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16164 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16165 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16166 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16167 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16168 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16169 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16170 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16171 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16172 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16173 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16174 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16175 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16176 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16177 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16178 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16179 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16180 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16181 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16182 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16183 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16184 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16185 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16186 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16187 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16188 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16189 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16190 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16191 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16192 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16193 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16194 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16195 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16196 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16197 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16198 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16199 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16200 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16201 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16202 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16203 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16204 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16205 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16206 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16207 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16208 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16209 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16211 #define IWMMXT_BUILTIN2(code, builtin) \
16212 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16214 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16215 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16216 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16217 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16218 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16219 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16220 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16221 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16222 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16223 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16224 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16225 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16226 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16227 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16228 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16229 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16230 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16231 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16232 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16233 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16234 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16235 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16236 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16237 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16238 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16239 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16240 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16241 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16242 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16243 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16244 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16245 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16248 static const struct builtin_description bdesc_1arg[] =
16250 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16251 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16252 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16253 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16254 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16255 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16256 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16257 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16258 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16259 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16260 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16261 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16262 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16263 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16264 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16265 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16266 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16267 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16270 /* Set up all the iWMMXt builtins. This is
16271 not called if TARGET_IWMMXT is zero. */
16274 arm_init_iwmmxt_builtins (void)
16276 const struct builtin_description * d;
16278 tree endlink = void_list_node;
16280 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16281 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16282 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16285 = build_function_type (integer_type_node,
16286 tree_cons (NULL_TREE, integer_type_node, endlink));
16287 tree v8qi_ftype_v8qi_v8qi_int
16288 = build_function_type (V8QI_type_node,
16289 tree_cons (NULL_TREE, V8QI_type_node,
16290 tree_cons (NULL_TREE, V8QI_type_node,
16291 tree_cons (NULL_TREE,
16294 tree v4hi_ftype_v4hi_int
16295 = build_function_type (V4HI_type_node,
16296 tree_cons (NULL_TREE, V4HI_type_node,
16297 tree_cons (NULL_TREE, integer_type_node,
16299 tree v2si_ftype_v2si_int
16300 = build_function_type (V2SI_type_node,
16301 tree_cons (NULL_TREE, V2SI_type_node,
16302 tree_cons (NULL_TREE, integer_type_node,
16304 tree v2si_ftype_di_di
16305 = build_function_type (V2SI_type_node,
16306 tree_cons (NULL_TREE, long_long_integer_type_node,
16307 tree_cons (NULL_TREE, long_long_integer_type_node,
16309 tree di_ftype_di_int
16310 = build_function_type (long_long_integer_type_node,
16311 tree_cons (NULL_TREE, long_long_integer_type_node,
16312 tree_cons (NULL_TREE, integer_type_node,
16314 tree di_ftype_di_int_int
16315 = build_function_type (long_long_integer_type_node,
16316 tree_cons (NULL_TREE, long_long_integer_type_node,
16317 tree_cons (NULL_TREE, integer_type_node,
16318 tree_cons (NULL_TREE,
16321 tree int_ftype_v8qi
16322 = build_function_type (integer_type_node,
16323 tree_cons (NULL_TREE, V8QI_type_node,
16325 tree int_ftype_v4hi
16326 = build_function_type (integer_type_node,
16327 tree_cons (NULL_TREE, V4HI_type_node,
16329 tree int_ftype_v2si
16330 = build_function_type (integer_type_node,
16331 tree_cons (NULL_TREE, V2SI_type_node,
16333 tree int_ftype_v8qi_int
16334 = build_function_type (integer_type_node,
16335 tree_cons (NULL_TREE, V8QI_type_node,
16336 tree_cons (NULL_TREE, integer_type_node,
16338 tree int_ftype_v4hi_int
16339 = build_function_type (integer_type_node,
16340 tree_cons (NULL_TREE, V4HI_type_node,
16341 tree_cons (NULL_TREE, integer_type_node,
16343 tree int_ftype_v2si_int
16344 = build_function_type (integer_type_node,
16345 tree_cons (NULL_TREE, V2SI_type_node,
16346 tree_cons (NULL_TREE, integer_type_node,
16348 tree v8qi_ftype_v8qi_int_int
16349 = build_function_type (V8QI_type_node,
16350 tree_cons (NULL_TREE, V8QI_type_node,
16351 tree_cons (NULL_TREE, integer_type_node,
16352 tree_cons (NULL_TREE,
16355 tree v4hi_ftype_v4hi_int_int
16356 = build_function_type (V4HI_type_node,
16357 tree_cons (NULL_TREE, V4HI_type_node,
16358 tree_cons (NULL_TREE, integer_type_node,
16359 tree_cons (NULL_TREE,
16362 tree v2si_ftype_v2si_int_int
16363 = build_function_type (V2SI_type_node,
16364 tree_cons (NULL_TREE, V2SI_type_node,
16365 tree_cons (NULL_TREE, integer_type_node,
16366 tree_cons (NULL_TREE,
16369 /* Miscellaneous. */
16370 tree v8qi_ftype_v4hi_v4hi
16371 = build_function_type (V8QI_type_node,
16372 tree_cons (NULL_TREE, V4HI_type_node,
16373 tree_cons (NULL_TREE, V4HI_type_node,
16375 tree v4hi_ftype_v2si_v2si
16376 = build_function_type (V4HI_type_node,
16377 tree_cons (NULL_TREE, V2SI_type_node,
16378 tree_cons (NULL_TREE, V2SI_type_node,
16380 tree v2si_ftype_v4hi_v4hi
16381 = build_function_type (V2SI_type_node,
16382 tree_cons (NULL_TREE, V4HI_type_node,
16383 tree_cons (NULL_TREE, V4HI_type_node,
16385 tree v2si_ftype_v8qi_v8qi
16386 = build_function_type (V2SI_type_node,
16387 tree_cons (NULL_TREE, V8QI_type_node,
16388 tree_cons (NULL_TREE, V8QI_type_node,
16390 tree v4hi_ftype_v4hi_di
16391 = build_function_type (V4HI_type_node,
16392 tree_cons (NULL_TREE, V4HI_type_node,
16393 tree_cons (NULL_TREE,
16394 long_long_integer_type_node,
16396 tree v2si_ftype_v2si_di
16397 = build_function_type (V2SI_type_node,
16398 tree_cons (NULL_TREE, V2SI_type_node,
16399 tree_cons (NULL_TREE,
16400 long_long_integer_type_node,
16402 tree void_ftype_int_int
16403 = build_function_type (void_type_node,
16404 tree_cons (NULL_TREE, integer_type_node,
16405 tree_cons (NULL_TREE, integer_type_node,
16408 = build_function_type (long_long_unsigned_type_node, endlink);
16410 = build_function_type (long_long_integer_type_node,
16411 tree_cons (NULL_TREE, V8QI_type_node,
16414 = build_function_type (long_long_integer_type_node,
16415 tree_cons (NULL_TREE, V4HI_type_node,
16418 = build_function_type (long_long_integer_type_node,
16419 tree_cons (NULL_TREE, V2SI_type_node,
16421 tree v2si_ftype_v4hi
16422 = build_function_type (V2SI_type_node,
16423 tree_cons (NULL_TREE, V4HI_type_node,
16425 tree v4hi_ftype_v8qi
16426 = build_function_type (V4HI_type_node,
16427 tree_cons (NULL_TREE, V8QI_type_node,
16430 tree di_ftype_di_v4hi_v4hi
16431 = build_function_type (long_long_unsigned_type_node,
16432 tree_cons (NULL_TREE,
16433 long_long_unsigned_type_node,
16434 tree_cons (NULL_TREE, V4HI_type_node,
16435 tree_cons (NULL_TREE,
16439 tree di_ftype_v4hi_v4hi
16440 = build_function_type (long_long_unsigned_type_node,
16441 tree_cons (NULL_TREE, V4HI_type_node,
16442 tree_cons (NULL_TREE, V4HI_type_node,
16445 /* Normal vector binops. */
16446 tree v8qi_ftype_v8qi_v8qi
16447 = build_function_type (V8QI_type_node,
16448 tree_cons (NULL_TREE, V8QI_type_node,
16449 tree_cons (NULL_TREE, V8QI_type_node,
16451 tree v4hi_ftype_v4hi_v4hi
16452 = build_function_type (V4HI_type_node,
16453 tree_cons (NULL_TREE, V4HI_type_node,
16454 tree_cons (NULL_TREE, V4HI_type_node,
16456 tree v2si_ftype_v2si_v2si
16457 = build_function_type (V2SI_type_node,
16458 tree_cons (NULL_TREE, V2SI_type_node,
16459 tree_cons (NULL_TREE, V2SI_type_node,
16461 tree di_ftype_di_di
16462 = build_function_type (long_long_unsigned_type_node,
16463 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16464 tree_cons (NULL_TREE,
16465 long_long_unsigned_type_node,
16468 /* Add all builtins that are more or less simple operations on two
16470 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16472 /* Use one of the operands; the target can have a different mode for
16473 mask-generating compares. */
16474 enum machine_mode mode;
16480 mode = insn_data[d->icode].operand[1].mode;
16485 type = v8qi_ftype_v8qi_v8qi;
16488 type = v4hi_ftype_v4hi_v4hi;
16491 type = v2si_ftype_v2si_v2si;
16494 type = di_ftype_di_di;
16498 gcc_unreachable ();
16501 def_mbuiltin (d->mask, d->name, type, d->code);
16504 /* Add the remaining MMX insns with somewhat more complicated types. */
16505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16507 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16510 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16511 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16512 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16516 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16520 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16521 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16523 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16524 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16525 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16526 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16532 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16534 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16535 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16537 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16539 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16540 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16541 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16542 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16544 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16545 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16546 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16547 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16548 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16550 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16551 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16552 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16554 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16555 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16556 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16558 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16560 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16562 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16564 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16565 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16567 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16572 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16577 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16585 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16592 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16593 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16597 arm_init_tls_builtins (void)
16601 ftype = build_function_type (ptr_type_node, void_list_node);
16602 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16603 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16605 TREE_NOTHROW (decl) = 1;
16606 TREE_READONLY (decl) = 1;
16609 enum neon_builtin_type_bits {
16625 #define v8qi_UP T_V8QI
16626 #define v4hi_UP T_V4HI
16627 #define v2si_UP T_V2SI
16628 #define v2sf_UP T_V2SF
16630 #define v16qi_UP T_V16QI
16631 #define v8hi_UP T_V8HI
16632 #define v4si_UP T_V4SI
16633 #define v4sf_UP T_V4SF
16634 #define v2di_UP T_V2DI
16639 #define UP(X) X##_UP
16674 NEON_LOADSTRUCTLANE,
16676 NEON_STORESTRUCTLANE,
16685 const neon_itype itype;
16687 const enum insn_code codes[T_MAX];
16688 const unsigned int num_vars;
16689 unsigned int base_fcode;
16690 } neon_builtin_datum;
16692 #define CF(N,X) CODE_FOR_neon_##N##X
16694 #define VAR1(T, N, A) \
16695 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16696 #define VAR2(T, N, A, B) \
16697 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16698 #define VAR3(T, N, A, B, C) \
16699 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16700 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16701 #define VAR4(T, N, A, B, C, D) \
16702 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16703 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16704 #define VAR5(T, N, A, B, C, D, E) \
16705 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16706 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16707 #define VAR6(T, N, A, B, C, D, E, F) \
16708 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16709 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16710 #define VAR7(T, N, A, B, C, D, E, F, G) \
16711 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16712 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16714 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16715 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16717 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16718 CF (N, G), CF (N, H) }, 8, 0
16719 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
16720 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16721 | UP (H) | UP (I), \
16722 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16723 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
16724 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
16725 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16726 | UP (H) | UP (I) | UP (J), \
16727 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16728 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
16730 /* The mode entries in the following table correspond to the "key" type of the
16731 instruction variant, i.e. equivalent to that which would be specified after
16732 the assembler mnemonic, which usually refers to the last vector operand.
16733 (Signed/unsigned/polynomial types are not differentiated between though, and
16734 are all mapped onto the same mode for a given element size.) The modes
16735 listed per instruction should be the same as those defined for that
16736 instruction's pattern in neon.md.
16737 WARNING: Variants should be listed in the same increasing order as
16738 neon_builtin_type_bits. */
16740 static neon_builtin_datum neon_builtin_data[] =
16742 { VAR10 (BINOP, vadd,
16743 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16744 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
16745 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
16746 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16747 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16748 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
16749 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16750 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16751 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
16752 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16753 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
16754 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
16755 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
16756 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
16757 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
16758 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
16759 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
16760 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
16761 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
16762 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
16763 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
16764 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
16765 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16766 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16767 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16768 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
16769 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
16770 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
16771 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16772 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16773 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16774 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
16775 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16776 { VAR10 (BINOP, vsub,
16777 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16778 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
16779 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
16780 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16781 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16782 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
16783 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16784 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16785 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16786 { VAR2 (BINOP, vcage, v2sf, v4sf) },
16787 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
16788 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16789 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16790 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
16791 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16792 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
16793 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16794 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16795 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
16796 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16797 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16798 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
16799 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
16800 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
16801 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
16802 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16803 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16804 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16805 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16806 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16807 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16808 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16809 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16810 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
16811 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
16812 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
16813 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16814 /* FIXME: vget_lane supports more variants than this! */
16815 { VAR10 (GETLANE, vget_lane,
16816 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16817 { VAR10 (SETLANE, vset_lane,
16818 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16819 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
16820 { VAR10 (DUP, vdup_n,
16821 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16822 { VAR10 (DUPLANE, vdup_lane,
16823 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16824 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
16825 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
16826 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
16827 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
16828 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
16829 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
16830 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
16831 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16832 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16833 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
16834 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
16835 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16836 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
16837 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
16838 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16839 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16840 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
16841 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
16842 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16843 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
16844 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
16845 { VAR10 (BINOP, vext,
16846 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16847 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16848 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
16849 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
16850 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
16851 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
16852 { VAR10 (SELECT, vbsl,
16853 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16854 { VAR1 (VTBL, vtbl1, v8qi) },
16855 { VAR1 (VTBL, vtbl2, v8qi) },
16856 { VAR1 (VTBL, vtbl3, v8qi) },
16857 { VAR1 (VTBL, vtbl4, v8qi) },
16858 { VAR1 (VTBX, vtbx1, v8qi) },
16859 { VAR1 (VTBX, vtbx2, v8qi) },
16860 { VAR1 (VTBX, vtbx3, v8qi) },
16861 { VAR1 (VTBX, vtbx4, v8qi) },
16862 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16863 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16864 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16865 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
16866 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
16867 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
16868 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
16869 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
16870 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
16871 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
16872 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
16873 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
16874 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
16875 { VAR10 (LOAD1, vld1,
16876 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16877 { VAR10 (LOAD1LANE, vld1_lane,
16878 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16879 { VAR10 (LOAD1, vld1_dup,
16880 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16881 { VAR10 (STORE1, vst1,
16882 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16883 { VAR10 (STORE1LANE, vst1_lane,
16884 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16885 { VAR9 (LOADSTRUCT,
16886 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16887 { VAR7 (LOADSTRUCTLANE, vld2_lane,
16888 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16889 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
16890 { VAR9 (STORESTRUCT, vst2,
16891 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16892 { VAR7 (STORESTRUCTLANE, vst2_lane,
16893 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16894 { VAR9 (LOADSTRUCT,
16895 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16896 { VAR7 (LOADSTRUCTLANE, vld3_lane,
16897 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16898 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
16899 { VAR9 (STORESTRUCT, vst3,
16900 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16901 { VAR7 (STORESTRUCTLANE, vst3_lane,
16902 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16903 { VAR9 (LOADSTRUCT, vld4,
16904 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16905 { VAR7 (LOADSTRUCTLANE, vld4_lane,
16906 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16907 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
16908 { VAR9 (STORESTRUCT, vst4,
16909 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16910 { VAR7 (STORESTRUCTLANE, vst4_lane,
16911 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16912 { VAR10 (LOGICBINOP, vand,
16913 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16914 { VAR10 (LOGICBINOP, vorr,
16915 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16916 { VAR10 (BINOP, veor,
16917 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16918 { VAR10 (LOGICBINOP, vbic,
16919 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16920 { VAR10 (LOGICBINOP, vorn,
16921 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
16937 arm_init_neon_builtins (void)
16939 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
16941 tree neon_intQI_type_node;
16942 tree neon_intHI_type_node;
16943 tree neon_polyQI_type_node;
16944 tree neon_polyHI_type_node;
16945 tree neon_intSI_type_node;
16946 tree neon_intDI_type_node;
16947 tree neon_float_type_node;
16949 tree intQI_pointer_node;
16950 tree intHI_pointer_node;
16951 tree intSI_pointer_node;
16952 tree intDI_pointer_node;
16953 tree float_pointer_node;
16955 tree const_intQI_node;
16956 tree const_intHI_node;
16957 tree const_intSI_node;
16958 tree const_intDI_node;
16959 tree const_float_node;
16961 tree const_intQI_pointer_node;
16962 tree const_intHI_pointer_node;
16963 tree const_intSI_pointer_node;
16964 tree const_intDI_pointer_node;
16965 tree const_float_pointer_node;
16967 tree V8QI_type_node;
16968 tree V4HI_type_node;
16969 tree V2SI_type_node;
16970 tree V2SF_type_node;
16971 tree V16QI_type_node;
16972 tree V8HI_type_node;
16973 tree V4SI_type_node;
16974 tree V4SF_type_node;
16975 tree V2DI_type_node;
16977 tree intUQI_type_node;
16978 tree intUHI_type_node;
16979 tree intUSI_type_node;
16980 tree intUDI_type_node;
16982 tree intEI_type_node;
16983 tree intOI_type_node;
16984 tree intCI_type_node;
16985 tree intXI_type_node;
16987 tree V8QI_pointer_node;
16988 tree V4HI_pointer_node;
16989 tree V2SI_pointer_node;
16990 tree V2SF_pointer_node;
16991 tree V16QI_pointer_node;
16992 tree V8HI_pointer_node;
16993 tree V4SI_pointer_node;
16994 tree V4SF_pointer_node;
16995 tree V2DI_pointer_node;
16997 tree void_ftype_pv8qi_v8qi_v8qi;
16998 tree void_ftype_pv4hi_v4hi_v4hi;
16999 tree void_ftype_pv2si_v2si_v2si;
17000 tree void_ftype_pv2sf_v2sf_v2sf;
17001 tree void_ftype_pdi_di_di;
17002 tree void_ftype_pv16qi_v16qi_v16qi;
17003 tree void_ftype_pv8hi_v8hi_v8hi;
17004 tree void_ftype_pv4si_v4si_v4si;
17005 tree void_ftype_pv4sf_v4sf_v4sf;
17006 tree void_ftype_pv2di_v2di_v2di;
17008 tree reinterp_ftype_dreg[5][5];
17009 tree reinterp_ftype_qreg[5][5];
17010 tree dreg_types[5], qreg_types[5];
17012 /* Create distinguished type nodes for NEON vector element types,
17013 and pointers to values of such types, so we can detect them later. */
17014 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17015 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17016 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17017 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17018 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17019 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17020 neon_float_type_node = make_node (REAL_TYPE);
17021 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17022 layout_type (neon_float_type_node);
17024 /* Define typedefs which exactly correspond to the modes we are basing vector
17025 types on. If you change these names you'll need to change
17026 the table used by arm_mangle_type too. */
17027 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17028 "__builtin_neon_qi");
17029 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17030 "__builtin_neon_hi");
17031 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17032 "__builtin_neon_si");
17033 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17034 "__builtin_neon_sf");
17035 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17036 "__builtin_neon_di");
17037 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17038 "__builtin_neon_poly8");
17039 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17040 "__builtin_neon_poly16");
17042 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17043 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17044 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17045 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17046 float_pointer_node = build_pointer_type (neon_float_type_node);
17048 /* Next create constant-qualified versions of the above types. */
17049 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17051 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17053 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17055 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17057 const_float_node = build_qualified_type (neon_float_type_node,
17060 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17061 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17062 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17063 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17064 const_float_pointer_node = build_pointer_type (const_float_node);
17066 /* Now create vector types based on our NEON element types. */
17067 /* 64-bit vectors. */
17069 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17071 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17073 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17075 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17076 /* 128-bit vectors. */
17078 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17080 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17082 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17084 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17086 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17088 /* Unsigned integer types for various mode sizes. */
17089 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17090 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17091 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17092 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17094 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17095 "__builtin_neon_uqi");
17096 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17097 "__builtin_neon_uhi");
17098 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17099 "__builtin_neon_usi");
17100 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17101 "__builtin_neon_udi");
17103 /* Opaque integer types for structures of vectors. */
17104 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17105 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17106 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17107 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17109 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17110 "__builtin_neon_ti");
17111 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17112 "__builtin_neon_ei");
17113 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17114 "__builtin_neon_oi");
17115 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17116 "__builtin_neon_ci");
17117 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17118 "__builtin_neon_xi");
17120 /* Pointers to vector types. */
17121 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17122 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17123 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17124 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17125 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17126 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17127 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17128 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17129 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17131 /* Operations which return results as pairs. */
17132 void_ftype_pv8qi_v8qi_v8qi =
17133 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17134 V8QI_type_node, NULL);
17135 void_ftype_pv4hi_v4hi_v4hi =
17136 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17137 V4HI_type_node, NULL);
17138 void_ftype_pv2si_v2si_v2si =
17139 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17140 V2SI_type_node, NULL);
17141 void_ftype_pv2sf_v2sf_v2sf =
17142 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17143 V2SF_type_node, NULL);
17144 void_ftype_pdi_di_di =
17145 build_function_type_list (void_type_node, intDI_pointer_node,
17146 neon_intDI_type_node, neon_intDI_type_node, NULL);
17147 void_ftype_pv16qi_v16qi_v16qi =
17148 build_function_type_list (void_type_node, V16QI_pointer_node,
17149 V16QI_type_node, V16QI_type_node, NULL);
17150 void_ftype_pv8hi_v8hi_v8hi =
17151 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17152 V8HI_type_node, NULL);
17153 void_ftype_pv4si_v4si_v4si =
17154 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17155 V4SI_type_node, NULL);
17156 void_ftype_pv4sf_v4sf_v4sf =
17157 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17158 V4SF_type_node, NULL);
17159 void_ftype_pv2di_v2di_v2di =
17160 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17161 V2DI_type_node, NULL);
17163 dreg_types[0] = V8QI_type_node;
17164 dreg_types[1] = V4HI_type_node;
17165 dreg_types[2] = V2SI_type_node;
17166 dreg_types[3] = V2SF_type_node;
17167 dreg_types[4] = neon_intDI_type_node;
17169 qreg_types[0] = V16QI_type_node;
17170 qreg_types[1] = V8HI_type_node;
17171 qreg_types[2] = V4SI_type_node;
17172 qreg_types[3] = V4SF_type_node;
17173 qreg_types[4] = V2DI_type_node;
17175 for (i = 0; i < 5; i++)
17178 for (j = 0; j < 5; j++)
17180 reinterp_ftype_dreg[i][j]
17181 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17182 reinterp_ftype_qreg[i][j]
17183 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17187 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17189 neon_builtin_datum *d = &neon_builtin_data[i];
17190 unsigned int j, codeidx = 0;
17192 d->base_fcode = fcode;
17194 for (j = 0; j < T_MAX; j++)
17196 const char* const modenames[] = {
17197 "v8qi", "v4hi", "v2si", "v2sf", "di",
17198 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17202 enum insn_code icode;
17203 int is_load = 0, is_store = 0;
17205 if ((d->bits & (1 << j)) == 0)
17208 icode = d->codes[codeidx++];
17213 case NEON_LOAD1LANE:
17214 case NEON_LOADSTRUCT:
17215 case NEON_LOADSTRUCTLANE:
17217 /* Fall through. */
17219 case NEON_STORE1LANE:
17220 case NEON_STORESTRUCT:
17221 case NEON_STORESTRUCTLANE:
17224 /* Fall through. */
17227 case NEON_LOGICBINOP:
17228 case NEON_SHIFTINSERT:
17235 case NEON_SHIFTIMM:
17236 case NEON_SHIFTACC:
17242 case NEON_LANEMULL:
17243 case NEON_LANEMULH:
17245 case NEON_SCALARMUL:
17246 case NEON_SCALARMULL:
17247 case NEON_SCALARMULH:
17248 case NEON_SCALARMAC:
17254 tree return_type = void_type_node, args = void_list_node;
17256 /* Build a function type directly from the insn_data for this
17257 builtin. The build_function_type() function takes care of
17258 removing duplicates for us. */
17259 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17263 if (is_load && k == 1)
17265 /* Neon load patterns always have the memory operand
17266 (a SImode pointer) in the operand 1 position. We
17267 want a const pointer to the element type in that
17269 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17275 eltype = const_intQI_pointer_node;
17280 eltype = const_intHI_pointer_node;
17285 eltype = const_intSI_pointer_node;
17290 eltype = const_float_pointer_node;
17295 eltype = const_intDI_pointer_node;
17298 default: gcc_unreachable ();
17301 else if (is_store && k == 0)
17303 /* Similarly, Neon store patterns use operand 0 as
17304 the memory location to store to (a SImode pointer).
17305 Use a pointer to the element type of the store in
17307 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17313 eltype = intQI_pointer_node;
17318 eltype = intHI_pointer_node;
17323 eltype = intSI_pointer_node;
17328 eltype = float_pointer_node;
17333 eltype = intDI_pointer_node;
17336 default: gcc_unreachable ();
17341 switch (insn_data[icode].operand[k].mode)
17343 case VOIDmode: eltype = void_type_node; break;
17345 case QImode: eltype = neon_intQI_type_node; break;
17346 case HImode: eltype = neon_intHI_type_node; break;
17347 case SImode: eltype = neon_intSI_type_node; break;
17348 case SFmode: eltype = neon_float_type_node; break;
17349 case DImode: eltype = neon_intDI_type_node; break;
17350 case TImode: eltype = intTI_type_node; break;
17351 case EImode: eltype = intEI_type_node; break;
17352 case OImode: eltype = intOI_type_node; break;
17353 case CImode: eltype = intCI_type_node; break;
17354 case XImode: eltype = intXI_type_node; break;
17355 /* 64-bit vectors. */
17356 case V8QImode: eltype = V8QI_type_node; break;
17357 case V4HImode: eltype = V4HI_type_node; break;
17358 case V2SImode: eltype = V2SI_type_node; break;
17359 case V2SFmode: eltype = V2SF_type_node; break;
17360 /* 128-bit vectors. */
17361 case V16QImode: eltype = V16QI_type_node; break;
17362 case V8HImode: eltype = V8HI_type_node; break;
17363 case V4SImode: eltype = V4SI_type_node; break;
17364 case V4SFmode: eltype = V4SF_type_node; break;
17365 case V2DImode: eltype = V2DI_type_node; break;
17366 default: gcc_unreachable ();
17370 if (k == 0 && !is_store)
17371 return_type = eltype;
17373 args = tree_cons (NULL_TREE, eltype, args);
17376 ftype = build_function_type (return_type, args);
17380 case NEON_RESULTPAIR:
17382 switch (insn_data[icode].operand[1].mode)
17384 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17385 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17386 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17387 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17388 case DImode: ftype = void_ftype_pdi_di_di; break;
17389 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17390 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17391 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17392 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17393 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17394 default: gcc_unreachable ();
17399 case NEON_REINTERP:
17401 /* We iterate over 5 doubleword types, then 5 quadword
17404 switch (insn_data[icode].operand[0].mode)
17406 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17407 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17408 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17409 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17410 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17411 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17412 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17413 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17414 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17415 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17416 default: gcc_unreachable ();
17422 gcc_unreachable ();
17425 gcc_assert (ftype != NULL);
17427 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17429 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17436 arm_init_fp16_builtins (void)
17438 tree fp16_type = make_node (REAL_TYPE);
17439 TYPE_PRECISION (fp16_type) = 16;
17440 layout_type (fp16_type);
17441 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17445 arm_init_builtins (void)
17447 arm_init_tls_builtins ();
17449 if (TARGET_REALLY_IWMMXT)
17450 arm_init_iwmmxt_builtins ();
17453 arm_init_neon_builtins ();
17455 if (arm_fp16_format)
17456 arm_init_fp16_builtins ();
17459 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17461 static const char *
17462 arm_invalid_parameter_type (const_tree t)
17464 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17465 return N_("function parameters cannot have __fp16 type");
17469 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17471 static const char *
17472 arm_invalid_return_type (const_tree t)
17474 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17475 return N_("functions cannot return __fp16 type");
17479 /* Implement TARGET_PROMOTED_TYPE. */
17482 arm_promoted_type (const_tree t)
17484 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17485 return float_type_node;
17489 /* Implement TARGET_CONVERT_TO_TYPE.
17490 Specifically, this hook implements the peculiarity of the ARM
17491 half-precision floating-point C semantics that requires conversions between
17492 __fp16 to or from double to do an intermediate conversion to float. */
17495 arm_convert_to_type (tree type, tree expr)
17497 tree fromtype = TREE_TYPE (expr);
17498 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17500 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17501 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17502 return convert (type, convert (float_type_node, expr));
17506 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17507 This simply adds HFmode as a supported mode; even though we don't
17508 implement arithmetic on this type directly, it's supported by
17509 optabs conversions, much the way the double-word arithmetic is
17510 special-cased in the default hook. */
17513 arm_scalar_mode_supported_p (enum machine_mode mode)
17515 if (mode == HFmode)
17516 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17518 return default_scalar_mode_supported_p (mode);
17521 /* Errors in the source file can cause expand_expr to return const0_rtx
17522 where we expect a vector. To avoid crashing, use one of the vector
17523 clear instructions. */
17526 safe_vector_operand (rtx x, enum machine_mode mode)
17528 if (x != const0_rtx)
17530 x = gen_reg_rtx (mode);
17532 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17533 : gen_rtx_SUBREG (DImode, x, 0)));
17537 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17540 arm_expand_binop_builtin (enum insn_code icode,
17541 tree exp, rtx target)
17544 tree arg0 = CALL_EXPR_ARG (exp, 0);
17545 tree arg1 = CALL_EXPR_ARG (exp, 1);
17546 rtx op0 = expand_normal (arg0);
17547 rtx op1 = expand_normal (arg1);
17548 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17549 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17550 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17552 if (VECTOR_MODE_P (mode0))
17553 op0 = safe_vector_operand (op0, mode0);
17554 if (VECTOR_MODE_P (mode1))
17555 op1 = safe_vector_operand (op1, mode1);
17558 || GET_MODE (target) != tmode
17559 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17560 target = gen_reg_rtx (tmode);
17562 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17564 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17565 op0 = copy_to_mode_reg (mode0, op0);
17566 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17567 op1 = copy_to_mode_reg (mode1, op1);
17569 pat = GEN_FCN (icode) (target, op0, op1);
17576 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17579 arm_expand_unop_builtin (enum insn_code icode,
17580 tree exp, rtx target, int do_load)
17583 tree arg0 = CALL_EXPR_ARG (exp, 0);
17584 rtx op0 = expand_normal (arg0);
17585 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17586 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17589 || GET_MODE (target) != tmode
17590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17591 target = gen_reg_rtx (tmode);
17593 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17596 if (VECTOR_MODE_P (mode0))
17597 op0 = safe_vector_operand (op0, mode0);
17599 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17600 op0 = copy_to_mode_reg (mode0, op0);
17603 pat = GEN_FCN (icode) (target, op0);
17611 neon_builtin_compare (const void *a, const void *b)
17613 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17614 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17615 unsigned int soughtcode = key->base_fcode;
17617 if (soughtcode >= memb->base_fcode
17618 && soughtcode < memb->base_fcode + memb->num_vars)
17620 else if (soughtcode < memb->base_fcode)
17626 static enum insn_code
17627 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17629 neon_builtin_datum key, *found;
17632 key.base_fcode = fcode;
17633 found = (neon_builtin_datum *)
17634 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17635 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17636 gcc_assert (found);
17637 idx = fcode - (int) found->base_fcode;
17638 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17641 *itype = found->itype;
17643 return found->codes[idx];
17647 NEON_ARG_COPY_TO_REG,
17652 #define NEON_MAX_BUILTIN_ARGS 5
17654 /* Expand a Neon builtin. */
17656 arm_expand_neon_args (rtx target, int icode, int have_retval,
17661 tree arg[NEON_MAX_BUILTIN_ARGS];
17662 rtx op[NEON_MAX_BUILTIN_ARGS];
17663 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17664 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17669 || GET_MODE (target) != tmode
17670 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17671 target = gen_reg_rtx (tmode);
17673 va_start (ap, exp);
17677 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17679 if (thisarg == NEON_ARG_STOP)
17683 arg[argc] = CALL_EXPR_ARG (exp, argc);
17684 op[argc] = expand_normal (arg[argc]);
17685 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17689 case NEON_ARG_COPY_TO_REG:
17690 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17691 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17692 (op[argc], mode[argc]))
17693 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17696 case NEON_ARG_CONSTANT:
17697 /* FIXME: This error message is somewhat unhelpful. */
17698 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17699 (op[argc], mode[argc]))
17700 error ("argument must be a constant");
17703 case NEON_ARG_STOP:
17704 gcc_unreachable ();
17717 pat = GEN_FCN (icode) (target, op[0]);
17721 pat = GEN_FCN (icode) (target, op[0], op[1]);
17725 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
17729 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
17733 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
17737 gcc_unreachable ();
17743 pat = GEN_FCN (icode) (op[0]);
17747 pat = GEN_FCN (icode) (op[0], op[1]);
17751 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
17755 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
17759 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
17763 gcc_unreachable ();
17774 /* Expand a Neon builtin. These are "special" because they don't have symbolic
17775 constants defined per-instruction or per instruction-variant. Instead, the
17776 required info is looked up in the table neon_builtin_data. */
17778 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
17781 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
17788 return arm_expand_neon_args (target, icode, 1, exp,
17789 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17793 case NEON_SCALARMUL:
17794 case NEON_SCALARMULL:
17795 case NEON_SCALARMULH:
17796 case NEON_SHIFTINSERT:
17797 case NEON_LOGICBINOP:
17798 return arm_expand_neon_args (target, icode, 1, exp,
17799 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17803 return arm_expand_neon_args (target, icode, 1, exp,
17804 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17805 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17809 case NEON_SHIFTIMM:
17810 return arm_expand_neon_args (target, icode, 1, exp,
17811 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
17815 return arm_expand_neon_args (target, icode, 1, exp,
17816 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17820 case NEON_REINTERP:
17821 return arm_expand_neon_args (target, icode, 1, exp,
17822 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17826 return arm_expand_neon_args (target, icode, 1, exp,
17827 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17829 case NEON_RESULTPAIR:
17830 return arm_expand_neon_args (target, icode, 0, exp,
17831 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17835 case NEON_LANEMULL:
17836 case NEON_LANEMULH:
17837 return arm_expand_neon_args (target, icode, 1, exp,
17838 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17839 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17842 return arm_expand_neon_args (target, icode, 1, exp,
17843 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17844 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17846 case NEON_SHIFTACC:
17847 return arm_expand_neon_args (target, icode, 1, exp,
17848 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17849 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17851 case NEON_SCALARMAC:
17852 return arm_expand_neon_args (target, icode, 1, exp,
17853 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17854 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17858 return arm_expand_neon_args (target, icode, 1, exp,
17859 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17863 case NEON_LOADSTRUCT:
17864 return arm_expand_neon_args (target, icode, 1, exp,
17865 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17867 case NEON_LOAD1LANE:
17868 case NEON_LOADSTRUCTLANE:
17869 return arm_expand_neon_args (target, icode, 1, exp,
17870 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17874 case NEON_STORESTRUCT:
17875 return arm_expand_neon_args (target, icode, 0, exp,
17876 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17878 case NEON_STORE1LANE:
17879 case NEON_STORESTRUCTLANE:
17880 return arm_expand_neon_args (target, icode, 0, exp,
17881 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17885 gcc_unreachable ();
17888 /* Emit code to reinterpret one Neon type as another, without altering bits. */
17890 neon_reinterpret (rtx dest, rtx src)
17892 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
17895 /* Emit code to place a Neon pair result in memory locations (with equal
17898 neon_emit_pair_result_insn (enum machine_mode mode,
17899 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
17902 rtx mem = gen_rtx_MEM (mode, destaddr);
17903 rtx tmp1 = gen_reg_rtx (mode);
17904 rtx tmp2 = gen_reg_rtx (mode);
17906 emit_insn (intfn (tmp1, op1, tmp2, op2));
17908 emit_move_insn (mem, tmp1);
17909 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
17910 emit_move_insn (mem, tmp2);
17913 /* Set up operands for a register copy from src to dest, taking care not to
17914 clobber registers in the process.
17915 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
17916 be called with a large N, so that should be OK. */
17919 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
17921 unsigned int copied = 0, opctr = 0;
17922 unsigned int done = (1 << count) - 1;
17925 while (copied != done)
17927 for (i = 0; i < count; i++)
17931 for (j = 0; good && j < count; j++)
17932 if (i != j && (copied & (1 << j)) == 0
17933 && reg_overlap_mentioned_p (src[j], dest[i]))
17938 operands[opctr++] = dest[i];
17939 operands[opctr++] = src[i];
17945 gcc_assert (opctr == count * 2);
17948 /* Expand an expression EXP that calls a built-in function,
17949 with result going to TARGET if that's convenient
17950 (and in mode MODE if that's convenient).
17951 SUBTARGET may be used as the target for computing one of EXP's operands.
17952 IGNORE is nonzero if the value is to be ignored. */
17955 arm_expand_builtin (tree exp,
17957 rtx subtarget ATTRIBUTE_UNUSED,
17958 enum machine_mode mode ATTRIBUTE_UNUSED,
17959 int ignore ATTRIBUTE_UNUSED)
17961 const struct builtin_description * d;
17962 enum insn_code icode;
17963 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17971 int fcode = DECL_FUNCTION_CODE (fndecl);
17973 enum machine_mode tmode;
17974 enum machine_mode mode0;
17975 enum machine_mode mode1;
17976 enum machine_mode mode2;
17978 if (fcode >= ARM_BUILTIN_NEON_BASE)
17979 return arm_expand_neon_builtin (fcode, exp, target);
17983 case ARM_BUILTIN_TEXTRMSB:
17984 case ARM_BUILTIN_TEXTRMUB:
17985 case ARM_BUILTIN_TEXTRMSH:
17986 case ARM_BUILTIN_TEXTRMUH:
17987 case ARM_BUILTIN_TEXTRMSW:
17988 case ARM_BUILTIN_TEXTRMUW:
17989 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
17990 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
17991 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
17992 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
17993 : CODE_FOR_iwmmxt_textrmw);
17995 arg0 = CALL_EXPR_ARG (exp, 0);
17996 arg1 = CALL_EXPR_ARG (exp, 1);
17997 op0 = expand_normal (arg0);
17998 op1 = expand_normal (arg1);
17999 tmode = insn_data[icode].operand[0].mode;
18000 mode0 = insn_data[icode].operand[1].mode;
18001 mode1 = insn_data[icode].operand[2].mode;
18003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18004 op0 = copy_to_mode_reg (mode0, op0);
18005 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18007 /* @@@ better error message */
18008 error ("selector must be an immediate");
18009 return gen_reg_rtx (tmode);
18012 || GET_MODE (target) != tmode
18013 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18014 target = gen_reg_rtx (tmode);
18015 pat = GEN_FCN (icode) (target, op0, op1);
18021 case ARM_BUILTIN_TINSRB:
18022 case ARM_BUILTIN_TINSRH:
18023 case ARM_BUILTIN_TINSRW:
18024 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18025 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18026 : CODE_FOR_iwmmxt_tinsrw);
18027 arg0 = CALL_EXPR_ARG (exp, 0);
18028 arg1 = CALL_EXPR_ARG (exp, 1);
18029 arg2 = CALL_EXPR_ARG (exp, 2);
18030 op0 = expand_normal (arg0);
18031 op1 = expand_normal (arg1);
18032 op2 = expand_normal (arg2);
18033 tmode = insn_data[icode].operand[0].mode;
18034 mode0 = insn_data[icode].operand[1].mode;
18035 mode1 = insn_data[icode].operand[2].mode;
18036 mode2 = insn_data[icode].operand[3].mode;
18038 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18039 op0 = copy_to_mode_reg (mode0, op0);
18040 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18041 op1 = copy_to_mode_reg (mode1, op1);
18042 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18044 /* @@@ better error message */
18045 error ("selector must be an immediate");
18049 || GET_MODE (target) != tmode
18050 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18051 target = gen_reg_rtx (tmode);
18052 pat = GEN_FCN (icode) (target, op0, op1, op2);
18058 case ARM_BUILTIN_SETWCX:
18059 arg0 = CALL_EXPR_ARG (exp, 0);
18060 arg1 = CALL_EXPR_ARG (exp, 1);
18061 op0 = force_reg (SImode, expand_normal (arg0));
18062 op1 = expand_normal (arg1);
18063 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18066 case ARM_BUILTIN_GETWCX:
18067 arg0 = CALL_EXPR_ARG (exp, 0);
18068 op0 = expand_normal (arg0);
18069 target = gen_reg_rtx (SImode);
18070 emit_insn (gen_iwmmxt_tmrc (target, op0));
18073 case ARM_BUILTIN_WSHUFH:
18074 icode = CODE_FOR_iwmmxt_wshufh;
18075 arg0 = CALL_EXPR_ARG (exp, 0);
18076 arg1 = CALL_EXPR_ARG (exp, 1);
18077 op0 = expand_normal (arg0);
18078 op1 = expand_normal (arg1);
18079 tmode = insn_data[icode].operand[0].mode;
18080 mode1 = insn_data[icode].operand[1].mode;
18081 mode2 = insn_data[icode].operand[2].mode;
18083 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18084 op0 = copy_to_mode_reg (mode1, op0);
18085 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18087 /* @@@ better error message */
18088 error ("mask must be an immediate");
18092 || GET_MODE (target) != tmode
18093 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18094 target = gen_reg_rtx (tmode);
18095 pat = GEN_FCN (icode) (target, op0, op1);
18101 case ARM_BUILTIN_WSADB:
18102 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18103 case ARM_BUILTIN_WSADH:
18104 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18105 case ARM_BUILTIN_WSADBZ:
18106 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18107 case ARM_BUILTIN_WSADHZ:
18108 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18110 /* Several three-argument builtins. */
18111 case ARM_BUILTIN_WMACS:
18112 case ARM_BUILTIN_WMACU:
18113 case ARM_BUILTIN_WALIGN:
18114 case ARM_BUILTIN_TMIA:
18115 case ARM_BUILTIN_TMIAPH:
18116 case ARM_BUILTIN_TMIATT:
18117 case ARM_BUILTIN_TMIATB:
18118 case ARM_BUILTIN_TMIABT:
18119 case ARM_BUILTIN_TMIABB:
18120 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18121 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18122 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18123 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18124 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18125 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18126 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18127 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18128 : CODE_FOR_iwmmxt_walign);
18129 arg0 = CALL_EXPR_ARG (exp, 0);
18130 arg1 = CALL_EXPR_ARG (exp, 1);
18131 arg2 = CALL_EXPR_ARG (exp, 2);
18132 op0 = expand_normal (arg0);
18133 op1 = expand_normal (arg1);
18134 op2 = expand_normal (arg2);
18135 tmode = insn_data[icode].operand[0].mode;
18136 mode0 = insn_data[icode].operand[1].mode;
18137 mode1 = insn_data[icode].operand[2].mode;
18138 mode2 = insn_data[icode].operand[3].mode;
18140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18141 op0 = copy_to_mode_reg (mode0, op0);
18142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18143 op1 = copy_to_mode_reg (mode1, op1);
18144 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18145 op2 = copy_to_mode_reg (mode2, op2);
18147 || GET_MODE (target) != tmode
18148 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18149 target = gen_reg_rtx (tmode);
18150 pat = GEN_FCN (icode) (target, op0, op1, op2);
18156 case ARM_BUILTIN_WZERO:
18157 target = gen_reg_rtx (DImode);
18158 emit_insn (gen_iwmmxt_clrdi (target));
18161 case ARM_BUILTIN_THREAD_POINTER:
18162 return arm_load_tp (target);
18168 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18169 if (d->code == (const enum arm_builtins) fcode)
18170 return arm_expand_binop_builtin (d->icode, exp, target);
18172 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18173 if (d->code == (const enum arm_builtins) fcode)
18174 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18176 /* @@@ Should really do something sensible here. */
18180 /* Return the number (counting from 0) of
18181 the least significant set bit in MASK. */
18184 number_of_first_bit_set (unsigned mask)
18189 (mask & (1 << bit)) == 0;
18196 /* Emit code to push or pop registers to or from the stack. F is the
18197 assembly file. MASK is the registers to push or pop. PUSH is
18198 nonzero if we should push, and zero if we should pop. For debugging
18199 output, if pushing, adjust CFA_OFFSET by the amount of space added
18200 to the stack. REAL_REGS should have the same number of bits set as
18201 MASK, and will be used instead (in the same order) to describe which
18202 registers were saved - this is used to mark the save slots when we
18203 push high registers after moving them to low registers. */
18205 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18206 unsigned long real_regs)
18209 int lo_mask = mask & 0xFF;
18210 int pushed_words = 0;
18214 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18216 /* Special case. Do not generate a POP PC statement here, do it in
18218 thumb_exit (f, -1);
18222 if (ARM_EABI_UNWIND_TABLES && push)
18224 fprintf (f, "\t.save\t{");
18225 for (regno = 0; regno < 15; regno++)
18227 if (real_regs & (1 << regno))
18229 if (real_regs & ((1 << regno) -1))
18231 asm_fprintf (f, "%r", regno);
18234 fprintf (f, "}\n");
18237 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18239 /* Look at the low registers first. */
18240 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18244 asm_fprintf (f, "%r", regno);
18246 if ((lo_mask & ~1) != 0)
18253 if (push && (mask & (1 << LR_REGNUM)))
18255 /* Catch pushing the LR. */
18259 asm_fprintf (f, "%r", LR_REGNUM);
18263 else if (!push && (mask & (1 << PC_REGNUM)))
18265 /* Catch popping the PC. */
18266 if (TARGET_INTERWORK || TARGET_BACKTRACE
18267 || crtl->calls_eh_return)
18269 /* The PC is never poped directly, instead
18270 it is popped into r3 and then BX is used. */
18271 fprintf (f, "}\n");
18273 thumb_exit (f, -1);
18282 asm_fprintf (f, "%r", PC_REGNUM);
18286 fprintf (f, "}\n");
18288 if (push && pushed_words && dwarf2out_do_frame ())
18290 char *l = dwarf2out_cfi_label (false);
18291 int pushed_mask = real_regs;
18293 *cfa_offset += pushed_words * 4;
18294 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18297 pushed_mask = real_regs;
18298 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18300 if (pushed_mask & 1)
18301 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18306 /* Generate code to return from a thumb function.
18307 If 'reg_containing_return_addr' is -1, then the return address is
18308 actually on the stack, at the stack pointer. */
18310 thumb_exit (FILE *f, int reg_containing_return_addr)
18312 unsigned regs_available_for_popping;
18313 unsigned regs_to_pop;
18315 unsigned available;
18319 int restore_a4 = FALSE;
18321 /* Compute the registers we need to pop. */
18325 if (reg_containing_return_addr == -1)
18327 regs_to_pop |= 1 << LR_REGNUM;
18331 if (TARGET_BACKTRACE)
18333 /* Restore the (ARM) frame pointer and stack pointer. */
18334 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18338 /* If there is nothing to pop then just emit the BX instruction and
18340 if (pops_needed == 0)
18342 if (crtl->calls_eh_return)
18343 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18345 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18348 /* Otherwise if we are not supporting interworking and we have not created
18349 a backtrace structure and the function was not entered in ARM mode then
18350 just pop the return address straight into the PC. */
18351 else if (!TARGET_INTERWORK
18352 && !TARGET_BACKTRACE
18353 && !is_called_in_ARM_mode (current_function_decl)
18354 && !crtl->calls_eh_return)
18356 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18360 /* Find out how many of the (return) argument registers we can corrupt. */
18361 regs_available_for_popping = 0;
18363 /* If returning via __builtin_eh_return, the bottom three registers
18364 all contain information needed for the return. */
18365 if (crtl->calls_eh_return)
18369 /* If we can deduce the registers used from the function's
18370 return value. This is more reliable that examining
18371 df_regs_ever_live_p () because that will be set if the register is
18372 ever used in the function, not just if the register is used
18373 to hold a return value. */
18375 if (crtl->return_rtx != 0)
18376 mode = GET_MODE (crtl->return_rtx);
18378 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18380 size = GET_MODE_SIZE (mode);
18384 /* In a void function we can use any argument register.
18385 In a function that returns a structure on the stack
18386 we can use the second and third argument registers. */
18387 if (mode == VOIDmode)
18388 regs_available_for_popping =
18389 (1 << ARG_REGISTER (1))
18390 | (1 << ARG_REGISTER (2))
18391 | (1 << ARG_REGISTER (3));
18393 regs_available_for_popping =
18394 (1 << ARG_REGISTER (2))
18395 | (1 << ARG_REGISTER (3));
18397 else if (size <= 4)
18398 regs_available_for_popping =
18399 (1 << ARG_REGISTER (2))
18400 | (1 << ARG_REGISTER (3));
18401 else if (size <= 8)
18402 regs_available_for_popping =
18403 (1 << ARG_REGISTER (3));
18406 /* Match registers to be popped with registers into which we pop them. */
18407 for (available = regs_available_for_popping,
18408 required = regs_to_pop;
18409 required != 0 && available != 0;
18410 available &= ~(available & - available),
18411 required &= ~(required & - required))
18414 /* If we have any popping registers left over, remove them. */
18416 regs_available_for_popping &= ~available;
18418 /* Otherwise if we need another popping register we can use
18419 the fourth argument register. */
18420 else if (pops_needed)
18422 /* If we have not found any free argument registers and
18423 reg a4 contains the return address, we must move it. */
18424 if (regs_available_for_popping == 0
18425 && reg_containing_return_addr == LAST_ARG_REGNUM)
18427 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18428 reg_containing_return_addr = LR_REGNUM;
18430 else if (size > 12)
18432 /* Register a4 is being used to hold part of the return value,
18433 but we have dire need of a free, low register. */
18436 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18439 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18441 /* The fourth argument register is available. */
18442 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18448 /* Pop as many registers as we can. */
18449 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18450 regs_available_for_popping);
18452 /* Process the registers we popped. */
18453 if (reg_containing_return_addr == -1)
18455 /* The return address was popped into the lowest numbered register. */
18456 regs_to_pop &= ~(1 << LR_REGNUM);
18458 reg_containing_return_addr =
18459 number_of_first_bit_set (regs_available_for_popping);
18461 /* Remove this register for the mask of available registers, so that
18462 the return address will not be corrupted by further pops. */
18463 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18466 /* If we popped other registers then handle them here. */
18467 if (regs_available_for_popping)
18471 /* Work out which register currently contains the frame pointer. */
18472 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18474 /* Move it into the correct place. */
18475 asm_fprintf (f, "\tmov\t%r, %r\n",
18476 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18478 /* (Temporarily) remove it from the mask of popped registers. */
18479 regs_available_for_popping &= ~(1 << frame_pointer);
18480 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18482 if (regs_available_for_popping)
18486 /* We popped the stack pointer as well,
18487 find the register that contains it. */
18488 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18490 /* Move it into the stack register. */
18491 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18493 /* At this point we have popped all necessary registers, so
18494 do not worry about restoring regs_available_for_popping
18495 to its correct value:
18497 assert (pops_needed == 0)
18498 assert (regs_available_for_popping == (1 << frame_pointer))
18499 assert (regs_to_pop == (1 << STACK_POINTER)) */
18503 /* Since we have just move the popped value into the frame
18504 pointer, the popping register is available for reuse, and
18505 we know that we still have the stack pointer left to pop. */
18506 regs_available_for_popping |= (1 << frame_pointer);
18510 /* If we still have registers left on the stack, but we no longer have
18511 any registers into which we can pop them, then we must move the return
18512 address into the link register and make available the register that
18514 if (regs_available_for_popping == 0 && pops_needed > 0)
18516 regs_available_for_popping |= 1 << reg_containing_return_addr;
18518 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18519 reg_containing_return_addr);
18521 reg_containing_return_addr = LR_REGNUM;
18524 /* If we have registers left on the stack then pop some more.
18525 We know that at most we will want to pop FP and SP. */
18526 if (pops_needed > 0)
18531 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18532 regs_available_for_popping);
18534 /* We have popped either FP or SP.
18535 Move whichever one it is into the correct register. */
18536 popped_into = number_of_first_bit_set (regs_available_for_popping);
18537 move_to = number_of_first_bit_set (regs_to_pop);
18539 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18541 regs_to_pop &= ~(1 << move_to);
18546 /* If we still have not popped everything then we must have only
18547 had one register available to us and we are now popping the SP. */
18548 if (pops_needed > 0)
18552 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18553 regs_available_for_popping);
18555 popped_into = number_of_first_bit_set (regs_available_for_popping);
18557 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18559 assert (regs_to_pop == (1 << STACK_POINTER))
18560 assert (pops_needed == 1)
18564 /* If necessary restore the a4 register. */
18567 if (reg_containing_return_addr != LR_REGNUM)
18569 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18570 reg_containing_return_addr = LR_REGNUM;
18573 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18576 if (crtl->calls_eh_return)
18577 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18579 /* Return to caller. */
18580 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18585 thumb1_final_prescan_insn (rtx insn)
18587 if (flag_print_asm_name)
18588 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18589 INSN_ADDRESSES (INSN_UID (insn)));
18593 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18595 unsigned HOST_WIDE_INT mask = 0xff;
18598 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18599 if (val == 0) /* XXX */
18602 for (i = 0; i < 25; i++)
18603 if ((val & (mask << i)) == val)
18609 /* Returns nonzero if the current function contains,
18610 or might contain a far jump. */
18612 thumb_far_jump_used_p (void)
18616 /* This test is only important for leaf functions. */
18617 /* assert (!leaf_function_p ()); */
18619 /* If we have already decided that far jumps may be used,
18620 do not bother checking again, and always return true even if
18621 it turns out that they are not being used. Once we have made
18622 the decision that far jumps are present (and that hence the link
18623 register will be pushed onto the stack) we cannot go back on it. */
18624 if (cfun->machine->far_jump_used)
18627 /* If this function is not being called from the prologue/epilogue
18628 generation code then it must be being called from the
18629 INITIAL_ELIMINATION_OFFSET macro. */
18630 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18632 /* In this case we know that we are being asked about the elimination
18633 of the arg pointer register. If that register is not being used,
18634 then there are no arguments on the stack, and we do not have to
18635 worry that a far jump might force the prologue to push the link
18636 register, changing the stack offsets. In this case we can just
18637 return false, since the presence of far jumps in the function will
18638 not affect stack offsets.
18640 If the arg pointer is live (or if it was live, but has now been
18641 eliminated and so set to dead) then we do have to test to see if
18642 the function might contain a far jump. This test can lead to some
18643 false negatives, since before reload is completed, then length of
18644 branch instructions is not known, so gcc defaults to returning their
18645 longest length, which in turn sets the far jump attribute to true.
18647 A false negative will not result in bad code being generated, but it
18648 will result in a needless push and pop of the link register. We
18649 hope that this does not occur too often.
18651 If we need doubleword stack alignment this could affect the other
18652 elimination offsets so we can't risk getting it wrong. */
18653 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18654 cfun->machine->arg_pointer_live = 1;
18655 else if (!cfun->machine->arg_pointer_live)
18659 /* Check to see if the function contains a branch
18660 insn with the far jump attribute set. */
18661 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18663 if (GET_CODE (insn) == JUMP_INSN
18664 /* Ignore tablejump patterns. */
18665 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18666 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18667 && get_attr_far_jump (insn) == FAR_JUMP_YES
18670 /* Record the fact that we have decided that
18671 the function does use far jumps. */
18672 cfun->machine->far_jump_used = 1;
18680 /* Return nonzero if FUNC must be entered in ARM mode. */
18682 is_called_in_ARM_mode (tree func)
18684 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18686 /* Ignore the problem about functions whose address is taken. */
18687 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18691 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18697 /* The bits which aren't usefully expanded as rtl. */
18699 thumb_unexpanded_epilogue (void)
18701 arm_stack_offsets *offsets;
18703 unsigned long live_regs_mask = 0;
18704 int high_regs_pushed = 0;
18705 int had_to_push_lr;
18708 if (cfun->machine->return_used_this_function != 0)
18711 if (IS_NAKED (arm_current_func_type ()))
18714 offsets = arm_get_frame_offsets ();
18715 live_regs_mask = offsets->saved_regs_mask;
18716 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18718 /* If we can deduce the registers used from the function's return value.
18719 This is more reliable that examining df_regs_ever_live_p () because that
18720 will be set if the register is ever used in the function, not just if
18721 the register is used to hold a return value. */
18722 size = arm_size_return_regs ();
18724 /* The prolog may have pushed some high registers to use as
18725 work registers. e.g. the testsuite file:
18726 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
18727 compiles to produce:
18728 push {r4, r5, r6, r7, lr}
18732 as part of the prolog. We have to undo that pushing here. */
18734 if (high_regs_pushed)
18736 unsigned long mask = live_regs_mask & 0xff;
18739 /* The available low registers depend on the size of the value we are
18747 /* Oh dear! We have no low registers into which we can pop
18750 ("no low registers available for popping high registers");
18752 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
18753 if (live_regs_mask & (1 << next_hi_reg))
18756 while (high_regs_pushed)
18758 /* Find lo register(s) into which the high register(s) can
18760 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18762 if (mask & (1 << regno))
18763 high_regs_pushed--;
18764 if (high_regs_pushed == 0)
18768 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
18770 /* Pop the values into the low register(s). */
18771 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
18773 /* Move the value(s) into the high registers. */
18774 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18776 if (mask & (1 << regno))
18778 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
18781 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
18782 if (live_regs_mask & (1 << next_hi_reg))
18787 live_regs_mask &= ~0x0f00;
18790 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
18791 live_regs_mask &= 0xff;
18793 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
18795 /* Pop the return address into the PC. */
18796 if (had_to_push_lr)
18797 live_regs_mask |= 1 << PC_REGNUM;
18799 /* Either no argument registers were pushed or a backtrace
18800 structure was created which includes an adjusted stack
18801 pointer, so just pop everything. */
18802 if (live_regs_mask)
18803 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18806 /* We have either just popped the return address into the
18807 PC or it is was kept in LR for the entire function. */
18808 if (!had_to_push_lr)
18809 thumb_exit (asm_out_file, LR_REGNUM);
18813 /* Pop everything but the return address. */
18814 if (live_regs_mask)
18815 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18818 if (had_to_push_lr)
18822 /* We have no free low regs, so save one. */
18823 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
18827 /* Get the return address into a temporary register. */
18828 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
18829 1 << LAST_ARG_REGNUM);
18833 /* Move the return address to lr. */
18834 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
18836 /* Restore the low register. */
18837 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
18842 regno = LAST_ARG_REGNUM;
18847 /* Remove the argument registers that were pushed onto the stack. */
18848 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
18849 SP_REGNUM, SP_REGNUM,
18850 crtl->args.pretend_args_size);
18852 thumb_exit (asm_out_file, regno);
18858 /* Functions to save and restore machine-specific function data. */
18859 static struct machine_function *
18860 arm_init_machine_status (void)
18862 struct machine_function *machine;
18863 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
18865 #if ARM_FT_UNKNOWN != 0
18866 machine->func_type = ARM_FT_UNKNOWN;
18871 /* Return an RTX indicating where the return address to the
18872 calling function can be found. */
18874 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
18879 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
18882 /* Do anything needed before RTL is emitted for each function. */
18884 arm_init_expanders (void)
18886 /* Arrange to initialize and mark the machine per-function status. */
18887 init_machine_status = arm_init_machine_status;
18889 /* This is to stop the combine pass optimizing away the alignment
18890 adjustment of va_arg. */
18891 /* ??? It is claimed that this should not be necessary. */
18893 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
18897 /* Like arm_compute_initial_elimination offset. Simpler because there
18898 isn't an ABI specified frame pointer for Thumb. Instead, we set it
18899 to point at the base of the local variables after static stack
18900 space for a function has been allocated. */
18903 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
18905 arm_stack_offsets *offsets;
18907 offsets = arm_get_frame_offsets ();
18911 case ARG_POINTER_REGNUM:
18914 case STACK_POINTER_REGNUM:
18915 return offsets->outgoing_args - offsets->saved_args;
18917 case FRAME_POINTER_REGNUM:
18918 return offsets->soft_frame - offsets->saved_args;
18920 case ARM_HARD_FRAME_POINTER_REGNUM:
18921 return offsets->saved_regs - offsets->saved_args;
18923 case THUMB_HARD_FRAME_POINTER_REGNUM:
18924 return offsets->locals_base - offsets->saved_args;
18927 gcc_unreachable ();
18931 case FRAME_POINTER_REGNUM:
18934 case STACK_POINTER_REGNUM:
18935 return offsets->outgoing_args - offsets->soft_frame;
18937 case ARM_HARD_FRAME_POINTER_REGNUM:
18938 return offsets->saved_regs - offsets->soft_frame;
18940 case THUMB_HARD_FRAME_POINTER_REGNUM:
18941 return offsets->locals_base - offsets->soft_frame;
18944 gcc_unreachable ();
18949 gcc_unreachable ();
18953 /* Generate the rest of a function's prologue. */
18955 thumb1_expand_prologue (void)
18959 HOST_WIDE_INT amount;
18960 arm_stack_offsets *offsets;
18961 unsigned long func_type;
18963 unsigned long live_regs_mask;
18965 func_type = arm_current_func_type ();
18967 /* Naked functions don't have prologues. */
18968 if (IS_NAKED (func_type))
18971 if (IS_INTERRUPT (func_type))
18973 error ("interrupt Service Routines cannot be coded in Thumb mode");
18977 offsets = arm_get_frame_offsets ();
18978 live_regs_mask = offsets->saved_regs_mask;
18979 /* Load the pic register before setting the frame pointer,
18980 so we can use r7 as a temporary work register. */
18981 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18982 arm_load_pic_register (live_regs_mask);
18984 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18985 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
18986 stack_pointer_rtx);
18988 amount = offsets->outgoing_args - offsets->saved_regs;
18993 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18994 GEN_INT (- amount)));
18995 RTX_FRAME_RELATED_P (insn) = 1;
19001 /* The stack decrement is too big for an immediate value in a single
19002 insn. In theory we could issue multiple subtracts, but after
19003 three of them it becomes more space efficient to place the full
19004 value in the constant pool and load into a register. (Also the
19005 ARM debugger really likes to see only one stack decrement per
19006 function). So instead we look for a scratch register into which
19007 we can load the decrement, and then we subtract this from the
19008 stack pointer. Unfortunately on the thumb the only available
19009 scratch registers are the argument registers, and we cannot use
19010 these as they may hold arguments to the function. Instead we
19011 attempt to locate a call preserved register which is used by this
19012 function. If we can find one, then we know that it will have
19013 been pushed at the start of the prologue and so we can corrupt
19015 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19016 if (live_regs_mask & (1 << regno))
19019 gcc_assert(regno <= LAST_LO_REGNUM);
19021 reg = gen_rtx_REG (SImode, regno);
19023 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19025 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19026 stack_pointer_rtx, reg));
19027 RTX_FRAME_RELATED_P (insn) = 1;
19028 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19029 plus_constant (stack_pointer_rtx,
19031 RTX_FRAME_RELATED_P (dwarf) = 1;
19032 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19036 if (frame_pointer_needed)
19037 thumb_set_frame_pointer (offsets);
19039 /* If we are profiling, make sure no instructions are scheduled before
19040 the call to mcount. Similarly if the user has requested no
19041 scheduling in the prolog. Similarly if we want non-call exceptions
19042 using the EABI unwinder, to prevent faulting instructions from being
19043 swapped with a stack adjustment. */
19044 if (crtl->profile || !TARGET_SCHED_PROLOG
19045 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19046 emit_insn (gen_blockage ());
19048 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19049 if (live_regs_mask & 0xff)
19050 cfun->machine->lr_save_eliminated = 0;
19055 thumb1_expand_epilogue (void)
19057 HOST_WIDE_INT amount;
19058 arm_stack_offsets *offsets;
19061 /* Naked functions don't have prologues. */
19062 if (IS_NAKED (arm_current_func_type ()))
19065 offsets = arm_get_frame_offsets ();
19066 amount = offsets->outgoing_args - offsets->saved_regs;
19068 if (frame_pointer_needed)
19070 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19071 amount = offsets->locals_base - offsets->saved_regs;
19074 gcc_assert (amount >= 0);
19078 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19079 GEN_INT (amount)));
19082 /* r3 is always free in the epilogue. */
19083 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19085 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19086 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19090 /* Emit a USE (stack_pointer_rtx), so that
19091 the stack adjustment will not be deleted. */
19092 emit_insn (gen_prologue_use (stack_pointer_rtx));
19094 if (crtl->profile || !TARGET_SCHED_PROLOG)
19095 emit_insn (gen_blockage ());
19097 /* Emit a clobber for each insn that will be restored in the epilogue,
19098 so that flow2 will get register lifetimes correct. */
19099 for (regno = 0; regno < 13; regno++)
19100 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19101 emit_clobber (gen_rtx_REG (SImode, regno));
19103 if (! df_regs_ever_live_p (LR_REGNUM))
19104 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19108 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19110 arm_stack_offsets *offsets;
19111 unsigned long live_regs_mask = 0;
19112 unsigned long l_mask;
19113 unsigned high_regs_pushed = 0;
19114 int cfa_offset = 0;
19117 if (IS_NAKED (arm_current_func_type ()))
19120 if (is_called_in_ARM_mode (current_function_decl))
19124 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19125 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19127 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19129 /* Generate code sequence to switch us into Thumb mode. */
19130 /* The .code 32 directive has already been emitted by
19131 ASM_DECLARE_FUNCTION_NAME. */
19132 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19133 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19135 /* Generate a label, so that the debugger will notice the
19136 change in instruction sets. This label is also used by
19137 the assembler to bypass the ARM code when this function
19138 is called from a Thumb encoded function elsewhere in the
19139 same file. Hence the definition of STUB_NAME here must
19140 agree with the definition in gas/config/tc-arm.c. */
19142 #define STUB_NAME ".real_start_of"
19144 fprintf (f, "\t.code\t16\n");
19146 if (arm_dllexport_name_p (name))
19147 name = arm_strip_name_encoding (name);
19149 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19150 fprintf (f, "\t.thumb_func\n");
19151 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19154 if (crtl->args.pretend_args_size)
19156 /* Output unwind directive for the stack adjustment. */
19157 if (ARM_EABI_UNWIND_TABLES)
19158 fprintf (f, "\t.pad #%d\n",
19159 crtl->args.pretend_args_size);
19161 if (cfun->machine->uses_anonymous_args)
19165 fprintf (f, "\tpush\t{");
19167 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19169 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19170 regno <= LAST_ARG_REGNUM;
19172 asm_fprintf (f, "%r%s", regno,
19173 regno == LAST_ARG_REGNUM ? "" : ", ");
19175 fprintf (f, "}\n");
19178 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19179 SP_REGNUM, SP_REGNUM,
19180 crtl->args.pretend_args_size);
19182 /* We don't need to record the stores for unwinding (would it
19183 help the debugger any if we did?), but record the change in
19184 the stack pointer. */
19185 if (dwarf2out_do_frame ())
19187 char *l = dwarf2out_cfi_label (false);
19189 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19190 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19194 /* Get the registers we are going to push. */
19195 offsets = arm_get_frame_offsets ();
19196 live_regs_mask = offsets->saved_regs_mask;
19197 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19198 l_mask = live_regs_mask & 0x40ff;
19199 /* Then count how many other high registers will need to be pushed. */
19200 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19202 if (TARGET_BACKTRACE)
19205 unsigned work_register;
19207 /* We have been asked to create a stack backtrace structure.
19208 The code looks like this:
19212 0 sub SP, #16 Reserve space for 4 registers.
19213 2 push {R7} Push low registers.
19214 4 add R7, SP, #20 Get the stack pointer before the push.
19215 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19216 8 mov R7, PC Get hold of the start of this code plus 12.
19217 10 str R7, [SP, #16] Store it.
19218 12 mov R7, FP Get hold of the current frame pointer.
19219 14 str R7, [SP, #4] Store it.
19220 16 mov R7, LR Get hold of the current return address.
19221 18 str R7, [SP, #12] Store it.
19222 20 add R7, SP, #16 Point at the start of the backtrace structure.
19223 22 mov FP, R7 Put this value into the frame pointer. */
19225 work_register = thumb_find_work_register (live_regs_mask);
19227 if (ARM_EABI_UNWIND_TABLES)
19228 asm_fprintf (f, "\t.pad #16\n");
19231 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19232 SP_REGNUM, SP_REGNUM);
19234 if (dwarf2out_do_frame ())
19236 char *l = dwarf2out_cfi_label (false);
19238 cfa_offset = cfa_offset + 16;
19239 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19244 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19245 offset = bit_count (l_mask) * UNITS_PER_WORD;
19250 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19251 offset + 16 + crtl->args.pretend_args_size);
19253 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19256 /* Make sure that the instruction fetching the PC is in the right place
19257 to calculate "start of backtrace creation code + 12". */
19260 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19261 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19263 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19264 ARM_HARD_FRAME_POINTER_REGNUM);
19265 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19270 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19271 ARM_HARD_FRAME_POINTER_REGNUM);
19272 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19274 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19275 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19279 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19280 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19282 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19284 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19285 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19287 /* Optimization: If we are not pushing any low registers but we are going
19288 to push some high registers then delay our first push. This will just
19289 be a push of LR and we can combine it with the push of the first high
19291 else if ((l_mask & 0xff) != 0
19292 || (high_regs_pushed == 0 && l_mask))
19293 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19295 if (high_regs_pushed)
19297 unsigned pushable_regs;
19298 unsigned next_hi_reg;
19300 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19301 if (live_regs_mask & (1 << next_hi_reg))
19304 pushable_regs = l_mask & 0xff;
19306 if (pushable_regs == 0)
19307 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19309 while (high_regs_pushed > 0)
19311 unsigned long real_regs_mask = 0;
19313 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19315 if (pushable_regs & (1 << regno))
19317 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19319 high_regs_pushed --;
19320 real_regs_mask |= (1 << next_hi_reg);
19322 if (high_regs_pushed)
19324 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19326 if (live_regs_mask & (1 << next_hi_reg))
19331 pushable_regs &= ~((1 << regno) - 1);
19337 /* If we had to find a work register and we have not yet
19338 saved the LR then add it to the list of regs to push. */
19339 if (l_mask == (1 << LR_REGNUM))
19341 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19343 real_regs_mask | (1 << LR_REGNUM));
19347 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19352 /* Handle the case of a double word load into a low register from
19353 a computed memory address. The computed address may involve a
19354 register which is overwritten by the load. */
19356 thumb_load_double_from_address (rtx *operands)
19364 gcc_assert (GET_CODE (operands[0]) == REG);
19365 gcc_assert (GET_CODE (operands[1]) == MEM);
19367 /* Get the memory address. */
19368 addr = XEXP (operands[1], 0);
19370 /* Work out how the memory address is computed. */
19371 switch (GET_CODE (addr))
19374 operands[2] = adjust_address (operands[1], SImode, 4);
19376 if (REGNO (operands[0]) == REGNO (addr))
19378 output_asm_insn ("ldr\t%H0, %2", operands);
19379 output_asm_insn ("ldr\t%0, %1", operands);
19383 output_asm_insn ("ldr\t%0, %1", operands);
19384 output_asm_insn ("ldr\t%H0, %2", operands);
19389 /* Compute <address> + 4 for the high order load. */
19390 operands[2] = adjust_address (operands[1], SImode, 4);
19392 output_asm_insn ("ldr\t%0, %1", operands);
19393 output_asm_insn ("ldr\t%H0, %2", operands);
19397 arg1 = XEXP (addr, 0);
19398 arg2 = XEXP (addr, 1);
19400 if (CONSTANT_P (arg1))
19401 base = arg2, offset = arg1;
19403 base = arg1, offset = arg2;
19405 gcc_assert (GET_CODE (base) == REG);
19407 /* Catch the case of <address> = <reg> + <reg> */
19408 if (GET_CODE (offset) == REG)
19410 int reg_offset = REGNO (offset);
19411 int reg_base = REGNO (base);
19412 int reg_dest = REGNO (operands[0]);
19414 /* Add the base and offset registers together into the
19415 higher destination register. */
19416 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19417 reg_dest + 1, reg_base, reg_offset);
19419 /* Load the lower destination register from the address in
19420 the higher destination register. */
19421 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19422 reg_dest, reg_dest + 1);
19424 /* Load the higher destination register from its own address
19426 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19427 reg_dest + 1, reg_dest + 1);
19431 /* Compute <address> + 4 for the high order load. */
19432 operands[2] = adjust_address (operands[1], SImode, 4);
19434 /* If the computed address is held in the low order register
19435 then load the high order register first, otherwise always
19436 load the low order register first. */
19437 if (REGNO (operands[0]) == REGNO (base))
19439 output_asm_insn ("ldr\t%H0, %2", operands);
19440 output_asm_insn ("ldr\t%0, %1", operands);
19444 output_asm_insn ("ldr\t%0, %1", operands);
19445 output_asm_insn ("ldr\t%H0, %2", operands);
19451 /* With no registers to worry about we can just load the value
19453 operands[2] = adjust_address (operands[1], SImode, 4);
19455 output_asm_insn ("ldr\t%H0, %2", operands);
19456 output_asm_insn ("ldr\t%0, %1", operands);
19460 gcc_unreachable ();
19467 thumb_output_move_mem_multiple (int n, rtx *operands)
19474 if (REGNO (operands[4]) > REGNO (operands[5]))
19477 operands[4] = operands[5];
19480 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19481 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19485 if (REGNO (operands[4]) > REGNO (operands[5]))
19488 operands[4] = operands[5];
19491 if (REGNO (operands[5]) > REGNO (operands[6]))
19494 operands[5] = operands[6];
19497 if (REGNO (operands[4]) > REGNO (operands[5]))
19500 operands[4] = operands[5];
19504 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19505 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19509 gcc_unreachable ();
19515 /* Output a call-via instruction for thumb state. */
19517 thumb_call_via_reg (rtx reg)
19519 int regno = REGNO (reg);
19522 gcc_assert (regno < LR_REGNUM);
19524 /* If we are in the normal text section we can use a single instance
19525 per compilation unit. If we are doing function sections, then we need
19526 an entry per section, since we can't rely on reachability. */
19527 if (in_section == text_section)
19529 thumb_call_reg_needed = 1;
19531 if (thumb_call_via_label[regno] == NULL)
19532 thumb_call_via_label[regno] = gen_label_rtx ();
19533 labelp = thumb_call_via_label + regno;
19537 if (cfun->machine->call_via[regno] == NULL)
19538 cfun->machine->call_via[regno] = gen_label_rtx ();
19539 labelp = cfun->machine->call_via + regno;
19542 output_asm_insn ("bl\t%a0", labelp);
19546 /* Routines for generating rtl. */
19548 thumb_expand_movmemqi (rtx *operands)
19550 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19551 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19552 HOST_WIDE_INT len = INTVAL (operands[2]);
19553 HOST_WIDE_INT offset = 0;
19557 emit_insn (gen_movmem12b (out, in, out, in));
19563 emit_insn (gen_movmem8b (out, in, out, in));
19569 rtx reg = gen_reg_rtx (SImode);
19570 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19571 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19578 rtx reg = gen_reg_rtx (HImode);
19579 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19580 plus_constant (in, offset))));
19581 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19589 rtx reg = gen_reg_rtx (QImode);
19590 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19591 plus_constant (in, offset))));
19592 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19598 thumb_reload_out_hi (rtx *operands)
19600 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19603 /* Handle reading a half-word from memory during reload. */
19605 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19607 gcc_unreachable ();
19610 /* Return the length of a function name prefix
19611 that starts with the character 'c'. */
19613 arm_get_strip_length (int c)
19617 ARM_NAME_ENCODING_LENGTHS
19622 /* Return a pointer to a function's name with any
19623 and all prefix encodings stripped from it. */
19625 arm_strip_name_encoding (const char *name)
19629 while ((skip = arm_get_strip_length (* name)))
19635 /* If there is a '*' anywhere in the name's prefix, then
19636 emit the stripped name verbatim, otherwise prepend an
19637 underscore if leading underscores are being used. */
19639 arm_asm_output_labelref (FILE *stream, const char *name)
19644 while ((skip = arm_get_strip_length (* name)))
19646 verbatim |= (*name == '*');
19651 fputs (name, stream);
19653 asm_fprintf (stream, "%U%s", name);
19657 arm_file_start (void)
19661 if (TARGET_UNIFIED_ASM)
19662 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19666 const char *fpu_name;
19667 if (arm_select[0].string)
19668 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19669 else if (arm_select[1].string)
19670 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19672 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19673 all_cores[arm_default_cpu].name);
19675 if (TARGET_SOFT_FLOAT)
19678 fpu_name = "softvfp";
19680 fpu_name = "softfpa";
19684 int set_float_abi_attributes = 0;
19685 switch (arm_fpu_arch)
19690 case FPUTYPE_FPA_EMU2:
19693 case FPUTYPE_FPA_EMU3:
19696 case FPUTYPE_MAVERICK:
19697 fpu_name = "maverick";
19701 set_float_abi_attributes = 1;
19703 case FPUTYPE_VFP3D16:
19704 fpu_name = "vfpv3-d16";
19705 set_float_abi_attributes = 1;
19708 fpu_name = "vfpv3";
19709 set_float_abi_attributes = 1;
19713 set_float_abi_attributes = 1;
19715 case FPUTYPE_NEON_FP16:
19716 fpu_name = "neon-fp16";
19717 set_float_abi_attributes = 1;
19722 if (set_float_abi_attributes)
19724 if (TARGET_HARD_FLOAT)
19725 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19726 if (TARGET_HARD_FLOAT_ABI)
19727 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19730 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19732 /* Some of these attributes only apply when the corresponding features
19733 are used. However we don't have any easy way of figuring this out.
19734 Conservatively record the setting that would have been used. */
19736 /* Tag_ABI_FP_rounding. */
19737 if (flag_rounding_math)
19738 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19739 if (!flag_unsafe_math_optimizations)
19741 /* Tag_ABI_FP_denomal. */
19742 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19743 /* Tag_ABI_FP_exceptions. */
19744 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19746 /* Tag_ABI_FP_user_exceptions. */
19747 if (flag_signaling_nans)
19748 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19749 /* Tag_ABI_FP_number_model. */
19750 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19751 flag_finite_math_only ? 1 : 3);
19753 /* Tag_ABI_align8_needed. */
19754 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19755 /* Tag_ABI_align8_preserved. */
19756 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
19757 /* Tag_ABI_enum_size. */
19758 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
19759 flag_short_enums ? 1 : 2);
19761 /* Tag_ABI_optimization_goals. */
19764 else if (optimize >= 2)
19770 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
19772 /* Tag_ABI_FP_16bit_format. */
19773 if (arm_fp16_format)
19774 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
19775 (int)arm_fp16_format);
19777 if (arm_lang_output_object_attributes_hook)
19778 arm_lang_output_object_attributes_hook();
19780 default_file_start();
19784 arm_file_end (void)
19788 if (NEED_INDICATE_EXEC_STACK)
19789 /* Add .note.GNU-stack. */
19790 file_end_indicate_exec_stack ();
19792 if (! thumb_call_reg_needed)
19795 switch_to_section (text_section);
19796 asm_fprintf (asm_out_file, "\t.code 16\n");
19797 ASM_OUTPUT_ALIGN (asm_out_file, 1);
19799 for (regno = 0; regno < LR_REGNUM; regno++)
19801 rtx label = thumb_call_via_label[regno];
19805 targetm.asm_out.internal_label (asm_out_file, "L",
19806 CODE_LABEL_NUMBER (label));
19807 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19813 /* Symbols in the text segment can be accessed without indirecting via the
19814 constant pool; it may take an extra binary operation, but this is still
19815 faster than indirecting via memory. Don't do this when not optimizing,
19816 since we won't be calculating al of the offsets necessary to do this
19820 arm_encode_section_info (tree decl, rtx rtl, int first)
19822 if (optimize > 0 && TREE_CONSTANT (decl))
19823 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
19825 default_encode_section_info (decl, rtl, first);
19827 #endif /* !ARM_PE */
19830 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
19832 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
19833 && !strcmp (prefix, "L"))
19835 arm_ccfsm_state = 0;
19836 arm_target_insn = NULL;
19838 default_internal_label (stream, prefix, labelno);
19841 /* Output code to add DELTA to the first argument, and then jump
19842 to FUNCTION. Used for C++ multiple inheritance. */
19844 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
19845 HOST_WIDE_INT delta,
19846 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
19849 static int thunk_label = 0;
19852 int mi_delta = delta;
19853 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
19855 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
19858 mi_delta = - mi_delta;
19862 int labelno = thunk_label++;
19863 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
19864 /* Thunks are entered in arm mode when avaiable. */
19865 if (TARGET_THUMB1_ONLY)
19867 /* push r3 so we can use it as a temporary. */
19868 /* TODO: Omit this save if r3 is not used. */
19869 fputs ("\tpush {r3}\n", file);
19870 fputs ("\tldr\tr3, ", file);
19874 fputs ("\tldr\tr12, ", file);
19876 assemble_name (file, label);
19877 fputc ('\n', file);
19880 /* If we are generating PIC, the ldr instruction below loads
19881 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
19882 the address of the add + 8, so we have:
19884 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
19887 Note that we have "+ 1" because some versions of GNU ld
19888 don't set the low bit of the result for R_ARM_REL32
19889 relocations against thumb function symbols.
19890 On ARMv6M this is +4, not +8. */
19891 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
19892 assemble_name (file, labelpc);
19893 fputs (":\n", file);
19894 if (TARGET_THUMB1_ONLY)
19896 /* This is 2 insns after the start of the thunk, so we know it
19897 is 4-byte aligned. */
19898 fputs ("\tadd\tr3, pc, r3\n", file);
19899 fputs ("\tmov r12, r3\n", file);
19902 fputs ("\tadd\tr12, pc, r12\n", file);
19904 else if (TARGET_THUMB1_ONLY)
19905 fputs ("\tmov r12, r3\n", file);
19907 if (TARGET_THUMB1_ONLY)
19909 if (mi_delta > 255)
19911 fputs ("\tldr\tr3, ", file);
19912 assemble_name (file, label);
19913 fputs ("+4\n", file);
19914 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
19915 mi_op, this_regno, this_regno);
19917 else if (mi_delta != 0)
19919 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19920 mi_op, this_regno, this_regno,
19926 /* TODO: Use movw/movt for large constants when available. */
19927 while (mi_delta != 0)
19929 if ((mi_delta & (3 << shift)) == 0)
19933 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19934 mi_op, this_regno, this_regno,
19935 mi_delta & (0xff << shift));
19936 mi_delta &= ~(0xff << shift);
19943 if (TARGET_THUMB1_ONLY)
19944 fputs ("\tpop\t{r3}\n", file);
19946 fprintf (file, "\tbx\tr12\n");
19947 ASM_OUTPUT_ALIGN (file, 2);
19948 assemble_name (file, label);
19949 fputs (":\n", file);
19952 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
19953 rtx tem = XEXP (DECL_RTL (function), 0);
19954 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
19955 tem = gen_rtx_MINUS (GET_MODE (tem),
19957 gen_rtx_SYMBOL_REF (Pmode,
19958 ggc_strdup (labelpc)));
19959 assemble_integer (tem, 4, BITS_PER_WORD, 1);
19962 /* Output ".word .LTHUNKn". */
19963 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
19965 if (TARGET_THUMB1_ONLY && mi_delta > 255)
19966 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
19970 fputs ("\tb\t", file);
19971 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
19972 if (NEED_PLT_RELOC)
19973 fputs ("(PLT)", file);
19974 fputc ('\n', file);
19979 arm_emit_vector_const (FILE *file, rtx x)
19982 const char * pattern;
19984 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19986 switch (GET_MODE (x))
19988 case V2SImode: pattern = "%08x"; break;
19989 case V4HImode: pattern = "%04x"; break;
19990 case V8QImode: pattern = "%02x"; break;
19991 default: gcc_unreachable ();
19994 fprintf (file, "0x");
19995 for (i = CONST_VECTOR_NUNITS (x); i--;)
19999 element = CONST_VECTOR_ELT (x, i);
20000 fprintf (file, pattern, INTVAL (element));
20006 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20007 HFmode constant pool entries are actually loaded with ldr. */
20009 arm_emit_fp16_const (rtx c)
20014 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20015 bits = real_to_target (NULL, &r, HFmode);
20016 if (WORDS_BIG_ENDIAN)
20017 assemble_zeros (2);
20018 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20019 if (!WORDS_BIG_ENDIAN)
20020 assemble_zeros (2);
20024 arm_output_load_gr (rtx *operands)
20031 if (GET_CODE (operands [1]) != MEM
20032 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20033 || GET_CODE (reg = XEXP (sum, 0)) != REG
20034 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20035 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20036 return "wldrw%?\t%0, %1";
20038 /* Fix up an out-of-range load of a GR register. */
20039 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20040 wcgr = operands[0];
20042 output_asm_insn ("ldr%?\t%0, %1", operands);
20044 operands[0] = wcgr;
20046 output_asm_insn ("tmcr%?\t%0, %1", operands);
20047 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20052 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20054 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20055 named arg and all anonymous args onto the stack.
20056 XXX I know the prologue shouldn't be pushing registers, but it is faster
20060 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20061 enum machine_mode mode,
20064 int second_time ATTRIBUTE_UNUSED)
20068 cfun->machine->uses_anonymous_args = 1;
20069 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20071 nregs = pcum->aapcs_ncrn;
20072 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20076 nregs = pcum->nregs;
20078 if (nregs < NUM_ARG_REGS)
20079 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20082 /* Return nonzero if the CONSUMER instruction (a store) does not need
20083 PRODUCER's value to calculate the address. */
20086 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20088 rtx value = PATTERN (producer);
20089 rtx addr = PATTERN (consumer);
20091 if (GET_CODE (value) == COND_EXEC)
20092 value = COND_EXEC_CODE (value);
20093 if (GET_CODE (value) == PARALLEL)
20094 value = XVECEXP (value, 0, 0);
20095 value = XEXP (value, 0);
20096 if (GET_CODE (addr) == COND_EXEC)
20097 addr = COND_EXEC_CODE (addr);
20098 if (GET_CODE (addr) == PARALLEL)
20099 addr = XVECEXP (addr, 0, 0);
20100 addr = XEXP (addr, 0);
20102 return !reg_overlap_mentioned_p (value, addr);
20105 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20106 have an early register shift value or amount dependency on the
20107 result of PRODUCER. */
20110 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20112 rtx value = PATTERN (producer);
20113 rtx op = PATTERN (consumer);
20116 if (GET_CODE (value) == COND_EXEC)
20117 value = COND_EXEC_CODE (value);
20118 if (GET_CODE (value) == PARALLEL)
20119 value = XVECEXP (value, 0, 0);
20120 value = XEXP (value, 0);
20121 if (GET_CODE (op) == COND_EXEC)
20122 op = COND_EXEC_CODE (op);
20123 if (GET_CODE (op) == PARALLEL)
20124 op = XVECEXP (op, 0, 0);
20127 early_op = XEXP (op, 0);
20128 /* This is either an actual independent shift, or a shift applied to
20129 the first operand of another operation. We want the whole shift
20131 if (GET_CODE (early_op) == REG)
20134 return !reg_overlap_mentioned_p (value, early_op);
20137 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20138 have an early register shift value dependency on the result of
20142 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20144 rtx value = PATTERN (producer);
20145 rtx op = PATTERN (consumer);
20148 if (GET_CODE (value) == COND_EXEC)
20149 value = COND_EXEC_CODE (value);
20150 if (GET_CODE (value) == PARALLEL)
20151 value = XVECEXP (value, 0, 0);
20152 value = XEXP (value, 0);
20153 if (GET_CODE (op) == COND_EXEC)
20154 op = COND_EXEC_CODE (op);
20155 if (GET_CODE (op) == PARALLEL)
20156 op = XVECEXP (op, 0, 0);
20159 early_op = XEXP (op, 0);
20161 /* This is either an actual independent shift, or a shift applied to
20162 the first operand of another operation. We want the value being
20163 shifted, in either case. */
20164 if (GET_CODE (early_op) != REG)
20165 early_op = XEXP (early_op, 0);
20167 return !reg_overlap_mentioned_p (value, early_op);
20170 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20171 have an early register mult dependency on the result of
20175 arm_no_early_mul_dep (rtx producer, rtx consumer)
20177 rtx value = PATTERN (producer);
20178 rtx op = PATTERN (consumer);
20180 if (GET_CODE (value) == COND_EXEC)
20181 value = COND_EXEC_CODE (value);
20182 if (GET_CODE (value) == PARALLEL)
20183 value = XVECEXP (value, 0, 0);
20184 value = XEXP (value, 0);
20185 if (GET_CODE (op) == COND_EXEC)
20186 op = COND_EXEC_CODE (op);
20187 if (GET_CODE (op) == PARALLEL)
20188 op = XVECEXP (op, 0, 0);
20191 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20193 if (GET_CODE (XEXP (op, 0)) == MULT)
20194 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20196 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20202 /* We can't rely on the caller doing the proper promotion when
20203 using APCS or ATPCS. */
20206 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20208 return !TARGET_AAPCS_BASED;
20211 static enum machine_mode
20212 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20213 enum machine_mode mode,
20214 int *punsignedp ATTRIBUTE_UNUSED,
20215 const_tree fntype ATTRIBUTE_UNUSED,
20216 int for_return ATTRIBUTE_UNUSED)
20218 if (GET_MODE_CLASS (mode) == MODE_INT
20219 && GET_MODE_SIZE (mode) < 4)
20225 /* AAPCS based ABIs use short enums by default. */
20228 arm_default_short_enums (void)
20230 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20234 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20237 arm_align_anon_bitfield (void)
20239 return TARGET_AAPCS_BASED;
20243 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20246 arm_cxx_guard_type (void)
20248 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20251 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20252 has an accumulator dependency on the result of the producer (a
20253 multiplication instruction) and no other dependency on that result. */
20255 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20257 rtx mul = PATTERN (producer);
20258 rtx mac = PATTERN (consumer);
20260 rtx mac_op0, mac_op1, mac_acc;
20262 if (GET_CODE (mul) == COND_EXEC)
20263 mul = COND_EXEC_CODE (mul);
20264 if (GET_CODE (mac) == COND_EXEC)
20265 mac = COND_EXEC_CODE (mac);
20267 /* Check that mul is of the form (set (...) (mult ...))
20268 and mla is of the form (set (...) (plus (mult ...) (...))). */
20269 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20270 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20271 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20274 mul_result = XEXP (mul, 0);
20275 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20276 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20277 mac_acc = XEXP (XEXP (mac, 1), 1);
20279 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20280 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20281 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20285 /* The EABI says test the least significant bit of a guard variable. */
20288 arm_cxx_guard_mask_bit (void)
20290 return TARGET_AAPCS_BASED;
20294 /* The EABI specifies that all array cookies are 8 bytes long. */
20297 arm_get_cookie_size (tree type)
20301 if (!TARGET_AAPCS_BASED)
20302 return default_cxx_get_cookie_size (type);
20304 size = build_int_cst (sizetype, 8);
20309 /* The EABI says that array cookies should also contain the element size. */
20312 arm_cookie_has_size (void)
20314 return TARGET_AAPCS_BASED;
20318 /* The EABI says constructors and destructors should return a pointer to
20319 the object constructed/destroyed. */
20322 arm_cxx_cdtor_returns_this (void)
20324 return TARGET_AAPCS_BASED;
20327 /* The EABI says that an inline function may never be the key
20331 arm_cxx_key_method_may_be_inline (void)
20333 return !TARGET_AAPCS_BASED;
20337 arm_cxx_determine_class_data_visibility (tree decl)
20339 if (!TARGET_AAPCS_BASED
20340 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20343 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20344 is exported. However, on systems without dynamic vague linkage,
20345 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20346 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20347 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20349 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20350 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20354 arm_cxx_class_data_always_comdat (void)
20356 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20357 vague linkage if the class has no key function. */
20358 return !TARGET_AAPCS_BASED;
20362 /* The EABI says __aeabi_atexit should be used to register static
20366 arm_cxx_use_aeabi_atexit (void)
20368 return TARGET_AAPCS_BASED;
20373 arm_set_return_address (rtx source, rtx scratch)
20375 arm_stack_offsets *offsets;
20376 HOST_WIDE_INT delta;
20378 unsigned long saved_regs;
20380 offsets = arm_get_frame_offsets ();
20381 saved_regs = offsets->saved_regs_mask;
20383 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20384 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20387 if (frame_pointer_needed)
20388 addr = plus_constant(hard_frame_pointer_rtx, -4);
20391 /* LR will be the first saved register. */
20392 delta = offsets->outgoing_args - (offsets->frame + 4);
20397 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20398 GEN_INT (delta & ~4095)));
20403 addr = stack_pointer_rtx;
20405 addr = plus_constant (addr, delta);
20407 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20413 thumb_set_return_address (rtx source, rtx scratch)
20415 arm_stack_offsets *offsets;
20416 HOST_WIDE_INT delta;
20417 HOST_WIDE_INT limit;
20420 unsigned long mask;
20424 offsets = arm_get_frame_offsets ();
20425 mask = offsets->saved_regs_mask;
20426 if (mask & (1 << LR_REGNUM))
20429 /* Find the saved regs. */
20430 if (frame_pointer_needed)
20432 delta = offsets->soft_frame - offsets->saved_args;
20433 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20439 delta = offsets->outgoing_args - offsets->saved_args;
20442 /* Allow for the stack frame. */
20443 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20445 /* The link register is always the first saved register. */
20448 /* Construct the address. */
20449 addr = gen_rtx_REG (SImode, reg);
20452 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20453 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20457 addr = plus_constant (addr, delta);
20459 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20462 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20465 /* Implements target hook vector_mode_supported_p. */
20467 arm_vector_mode_supported_p (enum machine_mode mode)
20469 /* Neon also supports V2SImode, etc. listed in the clause below. */
20470 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20471 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20474 if ((TARGET_NEON || TARGET_IWMMXT)
20475 && ((mode == V2SImode)
20476 || (mode == V4HImode)
20477 || (mode == V8QImode)))
20483 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20484 ARM insns and therefore guarantee that the shift count is modulo 256.
20485 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20486 guarantee no particular behavior for out-of-range counts. */
20488 static unsigned HOST_WIDE_INT
20489 arm_shift_truncation_mask (enum machine_mode mode)
20491 return mode == SImode ? 255 : 0;
20495 /* Map internal gcc register numbers to DWARF2 register numbers. */
20498 arm_dbx_register_number (unsigned int regno)
20503 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20504 compatibility. The EABI defines them as registers 96-103. */
20505 if (IS_FPA_REGNUM (regno))
20506 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20508 /* FIXME: VFPv3 register numbering. */
20509 if (IS_VFP_REGNUM (regno))
20510 return 64 + regno - FIRST_VFP_REGNUM;
20512 if (IS_IWMMXT_GR_REGNUM (regno))
20513 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20515 if (IS_IWMMXT_REGNUM (regno))
20516 return 112 + regno - FIRST_IWMMXT_REGNUM;
20518 gcc_unreachable ();
20522 #ifdef TARGET_UNWIND_INFO
20523 /* Emit unwind directives for a store-multiple instruction or stack pointer
20524 push during alignment.
20525 These should only ever be generated by the function prologue code, so
20526 expect them to have a particular form. */
20529 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20532 HOST_WIDE_INT offset;
20533 HOST_WIDE_INT nregs;
20539 e = XVECEXP (p, 0, 0);
20540 if (GET_CODE (e) != SET)
20543 /* First insn will adjust the stack pointer. */
20544 if (GET_CODE (e) != SET
20545 || GET_CODE (XEXP (e, 0)) != REG
20546 || REGNO (XEXP (e, 0)) != SP_REGNUM
20547 || GET_CODE (XEXP (e, 1)) != PLUS)
20550 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20551 nregs = XVECLEN (p, 0) - 1;
20553 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20556 /* The function prologue may also push pc, but not annotate it as it is
20557 never restored. We turn this into a stack pointer adjustment. */
20558 if (nregs * 4 == offset - 4)
20560 fprintf (asm_out_file, "\t.pad #4\n");
20564 fprintf (asm_out_file, "\t.save {");
20566 else if (IS_VFP_REGNUM (reg))
20569 fprintf (asm_out_file, "\t.vsave {");
20571 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20573 /* FPA registers are done differently. */
20574 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20578 /* Unknown register type. */
20581 /* If the stack increment doesn't match the size of the saved registers,
20582 something has gone horribly wrong. */
20583 if (offset != nregs * reg_size)
20588 /* The remaining insns will describe the stores. */
20589 for (i = 1; i <= nregs; i++)
20591 /* Expect (set (mem <addr>) (reg)).
20592 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20593 e = XVECEXP (p, 0, i);
20594 if (GET_CODE (e) != SET
20595 || GET_CODE (XEXP (e, 0)) != MEM
20596 || GET_CODE (XEXP (e, 1)) != REG)
20599 reg = REGNO (XEXP (e, 1));
20604 fprintf (asm_out_file, ", ");
20605 /* We can't use %r for vfp because we need to use the
20606 double precision register names. */
20607 if (IS_VFP_REGNUM (reg))
20608 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20610 asm_fprintf (asm_out_file, "%r", reg);
20612 #ifdef ENABLE_CHECKING
20613 /* Check that the addresses are consecutive. */
20614 e = XEXP (XEXP (e, 0), 0);
20615 if (GET_CODE (e) == PLUS)
20617 offset += reg_size;
20618 if (GET_CODE (XEXP (e, 0)) != REG
20619 || REGNO (XEXP (e, 0)) != SP_REGNUM
20620 || GET_CODE (XEXP (e, 1)) != CONST_INT
20621 || offset != INTVAL (XEXP (e, 1)))
20625 || GET_CODE (e) != REG
20626 || REGNO (e) != SP_REGNUM)
20630 fprintf (asm_out_file, "}\n");
20633 /* Emit unwind directives for a SET. */
20636 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20644 switch (GET_CODE (e0))
20647 /* Pushing a single register. */
20648 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20649 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20650 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20653 asm_fprintf (asm_out_file, "\t.save ");
20654 if (IS_VFP_REGNUM (REGNO (e1)))
20655 asm_fprintf(asm_out_file, "{d%d}\n",
20656 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20658 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20662 if (REGNO (e0) == SP_REGNUM)
20664 /* A stack increment. */
20665 if (GET_CODE (e1) != PLUS
20666 || GET_CODE (XEXP (e1, 0)) != REG
20667 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20668 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20671 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20672 -INTVAL (XEXP (e1, 1)));
20674 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20676 HOST_WIDE_INT offset;
20678 if (GET_CODE (e1) == PLUS)
20680 if (GET_CODE (XEXP (e1, 0)) != REG
20681 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20683 reg = REGNO (XEXP (e1, 0));
20684 offset = INTVAL (XEXP (e1, 1));
20685 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20686 HARD_FRAME_POINTER_REGNUM, reg,
20687 INTVAL (XEXP (e1, 1)));
20689 else if (GET_CODE (e1) == REG)
20692 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20693 HARD_FRAME_POINTER_REGNUM, reg);
20698 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20700 /* Move from sp to reg. */
20701 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20703 else if (GET_CODE (e1) == PLUS
20704 && GET_CODE (XEXP (e1, 0)) == REG
20705 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20706 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20708 /* Set reg to offset from sp. */
20709 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20710 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20712 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20714 /* Stack pointer save before alignment. */
20716 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
20729 /* Emit unwind directives for the given insn. */
20732 arm_unwind_emit (FILE * asm_out_file, rtx insn)
20736 if (!ARM_EABI_UNWIND_TABLES)
20739 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20740 && (TREE_NOTHROW (current_function_decl)
20741 || crtl->all_throwers_are_sibcalls))
20744 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
20747 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
20749 pat = XEXP (pat, 0);
20751 pat = PATTERN (insn);
20753 switch (GET_CODE (pat))
20756 arm_unwind_emit_set (asm_out_file, pat);
20760 /* Store multiple. */
20761 arm_unwind_emit_sequence (asm_out_file, pat);
20770 /* Output a reference from a function exception table to the type_info
20771 object X. The EABI specifies that the symbol should be relocated by
20772 an R_ARM_TARGET2 relocation. */
20775 arm_output_ttype (rtx x)
20777 fputs ("\t.word\t", asm_out_file);
20778 output_addr_const (asm_out_file, x);
20779 /* Use special relocations for symbol references. */
20780 if (GET_CODE (x) != CONST_INT)
20781 fputs ("(TARGET2)", asm_out_file);
20782 fputc ('\n', asm_out_file);
20786 #endif /* TARGET_UNWIND_INFO */
20789 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
20790 stack alignment. */
20793 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
20795 rtx unspec = SET_SRC (pattern);
20796 gcc_assert (GET_CODE (unspec) == UNSPEC);
20800 case UNSPEC_STACK_ALIGN:
20801 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
20802 put anything on the stack, so hopefully it won't matter.
20803 CFA = SP will be correct after alignment. */
20804 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
20805 SET_DEST (pattern));
20808 gcc_unreachable ();
20813 /* Output unwind directives for the start/end of a function. */
20816 arm_output_fn_unwind (FILE * f, bool prologue)
20818 if (!ARM_EABI_UNWIND_TABLES)
20822 fputs ("\t.fnstart\n", f);
20825 /* If this function will never be unwound, then mark it as such.
20826 The came condition is used in arm_unwind_emit to suppress
20827 the frame annotations. */
20828 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20829 && (TREE_NOTHROW (current_function_decl)
20830 || crtl->all_throwers_are_sibcalls))
20831 fputs("\t.cantunwind\n", f);
20833 fputs ("\t.fnend\n", f);
20838 arm_emit_tls_decoration (FILE *fp, rtx x)
20840 enum tls_reloc reloc;
20843 val = XVECEXP (x, 0, 0);
20844 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
20846 output_addr_const (fp, val);
20851 fputs ("(tlsgd)", fp);
20854 fputs ("(tlsldm)", fp);
20857 fputs ("(tlsldo)", fp);
20860 fputs ("(gottpoff)", fp);
20863 fputs ("(tpoff)", fp);
20866 gcc_unreachable ();
20874 fputs (" + (. - ", fp);
20875 output_addr_const (fp, XVECEXP (x, 0, 2));
20877 output_addr_const (fp, XVECEXP (x, 0, 3));
20887 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
20890 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
20892 gcc_assert (size == 4);
20893 fputs ("\t.word\t", file);
20894 output_addr_const (file, x);
20895 fputs ("(tlsldo)", file);
20899 arm_output_addr_const_extra (FILE *fp, rtx x)
20901 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
20902 return arm_emit_tls_decoration (fp, x);
20903 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
20906 int labelno = INTVAL (XVECEXP (x, 0, 0));
20908 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
20909 assemble_name_raw (fp, label);
20913 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
20915 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
20919 output_addr_const (fp, XVECEXP (x, 0, 0));
20923 else if (GET_CODE (x) == CONST_VECTOR)
20924 return arm_emit_vector_const (fp, x);
20929 /* Output assembly for a shift instruction.
20930 SET_FLAGS determines how the instruction modifies the condition codes.
20931 0 - Do not set condition codes.
20932 1 - Set condition codes.
20933 2 - Use smallest instruction. */
20935 arm_output_shift(rtx * operands, int set_flags)
20938 static const char flag_chars[3] = {'?', '.', '!'};
20943 c = flag_chars[set_flags];
20944 if (TARGET_UNIFIED_ASM)
20946 shift = shift_op(operands[3], &val);
20950 operands[2] = GEN_INT(val);
20951 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
20954 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
20957 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
20958 output_asm_insn (pattern, operands);
20962 /* Output a Thumb-1 casesi dispatch sequence. */
20964 thumb1_output_casesi (rtx *operands)
20966 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
20967 addr_diff_vec_flags flags;
20969 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20971 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
20973 switch (GET_MODE(diff_vec))
20976 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20977 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
20979 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20980 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
20982 return "bl\t%___gnu_thumb1_case_si";
20984 gcc_unreachable ();
20988 /* Output a Thumb-2 casesi instruction. */
20990 thumb2_output_casesi (rtx *operands)
20992 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
20994 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20996 output_asm_insn ("cmp\t%0, %1", operands);
20997 output_asm_insn ("bhi\t%l3", operands);
20998 switch (GET_MODE(diff_vec))
21001 return "tbb\t[%|pc, %0]";
21003 return "tbh\t[%|pc, %0, lsl #1]";
21007 output_asm_insn ("adr\t%4, %l2", operands);
21008 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21009 output_asm_insn ("add\t%4, %4, %5", operands);
21014 output_asm_insn ("adr\t%4, %l2", operands);
21015 return "ldr\t%|pc, [%4, %0, lsl #2]";
21018 gcc_unreachable ();
21022 /* Most ARM cores are single issue, but some newer ones can dual issue.
21023 The scheduler descriptions rely on this being correct. */
21025 arm_issue_rate (void)
21040 /* A table and a function to perform ARM-specific name mangling for
21041 NEON vector types in order to conform to the AAPCS (see "Procedure
21042 Call Standard for the ARM Architecture", Appendix A). To qualify
21043 for emission with the mangled names defined in that document, a
21044 vector type must not only be of the correct mode but also be
21045 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21048 enum machine_mode mode;
21049 const char *element_type_name;
21050 const char *aapcs_name;
21051 } arm_mangle_map_entry;
21053 static arm_mangle_map_entry arm_mangle_map[] = {
21054 /* 64-bit containerized types. */
21055 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21056 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21057 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21058 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21059 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21060 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21061 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21062 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21063 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21064 /* 128-bit containerized types. */
21065 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21066 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21067 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21068 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21069 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21070 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21071 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21072 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21073 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21074 { VOIDmode, NULL, NULL }
21078 arm_mangle_type (const_tree type)
21080 arm_mangle_map_entry *pos = arm_mangle_map;
21082 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21083 has to be managled as if it is in the "std" namespace. */
21084 if (TARGET_AAPCS_BASED
21085 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21087 static bool warned;
21088 if (!warned && warn_psabi)
21091 inform (input_location,
21092 "the mangling of %<va_list%> has changed in GCC 4.4");
21094 return "St9__va_list";
21097 /* Half-precision float. */
21098 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21101 if (TREE_CODE (type) != VECTOR_TYPE)
21104 /* Check the mode of the vector type, and the name of the vector
21105 element type, against the table. */
21106 while (pos->mode != VOIDmode)
21108 tree elt_type = TREE_TYPE (type);
21110 if (pos->mode == TYPE_MODE (type)
21111 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21112 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21113 pos->element_type_name))
21114 return pos->aapcs_name;
21119 /* Use the default mangling for unrecognized (possibly user-defined)
21124 /* Order of allocation of core registers for Thumb: this allocation is
21125 written over the corresponding initial entries of the array
21126 initialized with REG_ALLOC_ORDER. We allocate all low registers
21127 first. Saving and restoring a low register is usually cheaper than
21128 using a call-clobbered high register. */
21130 static const int thumb_core_reg_alloc_order[] =
21132 3, 2, 1, 0, 4, 5, 6, 7,
21133 14, 12, 8, 9, 10, 11, 13, 15
21136 /* Adjust register allocation order when compiling for Thumb. */
21139 arm_order_regs_for_local_alloc (void)
21141 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21142 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21144 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21145 sizeof (thumb_core_reg_alloc_order));
21148 /* Set default optimization options. */
21150 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21152 /* Enable section anchors by default at -O1 or higher.
21153 Use 2 to distinguish from an explicit -fsection-anchors
21154 given on the command line. */
21156 flag_section_anchors = 2;
21159 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21162 arm_frame_pointer_required (void)
21164 return (cfun->has_nonlocal_label
21165 || SUBTARGET_FRAME_POINTER_REQUIRED
21166 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21169 #include "gt-arm.h"