1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71 inline static int thumb_index_register_rtx_p (rtx, int);
72 static int thumb_far_jump_used_p (void);
73 static bool thumb_force_lr_save (void);
74 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75 static rtx emit_sfm (int, int);
77 static bool arm_assemble_integer (rtx, unsigned int, int);
79 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
80 static arm_cc get_arm_condition_code (rtx);
81 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
82 static rtx is_jump_table (rtx);
83 static const char *output_multi_immediate (rtx *, const char *, const char *,
85 static const char *shift_op (rtx, HOST_WIDE_INT *);
86 static struct machine_function *arm_init_machine_status (void);
87 static void thumb_exit (FILE *, int);
88 static rtx is_jump_table (rtx);
89 static HOST_WIDE_INT get_jump_table_size (rtx);
90 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
91 static Mnode *add_minipool_forward_ref (Mfix *);
92 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_backward_ref (Mfix *);
94 static void assign_minipool_offsets (Mfix *);
95 static void arm_print_value (FILE *, rtx);
96 static void dump_minipool (rtx);
97 static int arm_barrier_cost (rtx);
98 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
99 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
100 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102 static void arm_reorg (void);
103 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
104 static int current_file_function_operand (rtx);
105 static unsigned long arm_compute_save_reg0_reg12_mask (void);
106 static unsigned long arm_compute_save_reg_mask (void);
107 static unsigned long arm_isr_value (tree);
108 static unsigned long arm_compute_func_type (void);
109 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
110 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
112 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
116 static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static int arm_comp_type_attributes (tree, tree);
118 static void arm_set_default_type_attributes (tree);
119 static int arm_adjust_cost (rtx, rtx, rtx, int);
120 static int count_insns_for_constant (HOST_WIDE_INT, int);
121 static int arm_get_strip_length (int);
122 static bool arm_function_ok_for_sibcall (tree, tree);
123 static void arm_internal_label (FILE *, const char *, unsigned long);
124 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
127 static bool arm_size_rtx_costs (rtx, int, int, int *);
128 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
129 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
131 static bool arm_9e_rtx_costs (rtx, int, int, int *);
132 static int arm_address_cost (rtx);
133 static bool arm_memory_load_p (rtx);
134 static bool arm_cirrus_insn_p (rtx);
135 static void cirrus_reorg (rtx);
136 static void arm_init_builtins (void);
137 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
138 static void arm_init_iwmmxt_builtins (void);
139 static rtx safe_vector_operand (rtx, enum machine_mode);
140 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
141 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
142 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
143 static void emit_constant_insn (rtx cond, rtx pattern);
144 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147 #ifdef OBJECT_FORMAT_ELF
148 static void arm_elf_asm_constructor (rtx, int);
151 static void arm_encode_section_info (tree, rtx, int);
154 static void arm_file_end (void);
157 static void aof_globalize_label (FILE *, const char *);
158 static void aof_dump_imports (FILE *);
159 static void aof_dump_pic_table (FILE *);
160 static void aof_file_start (void);
161 static void aof_file_end (void);
163 static rtx arm_struct_value_rtx (tree, int);
164 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
166 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
167 enum machine_mode, tree, bool);
168 static bool arm_promote_prototypes (tree);
169 static bool arm_default_short_enums (void);
170 static bool arm_align_anon_bitfield (void);
171 static bool arm_return_in_msb (tree);
172 static bool arm_must_pass_in_stack (enum machine_mode, tree);
174 static tree arm_cxx_guard_type (void);
175 static bool arm_cxx_guard_mask_bit (void);
176 static tree arm_get_cookie_size (tree);
177 static bool arm_cookie_has_size (void);
178 static bool arm_cxx_cdtor_returns_this (void);
179 static bool arm_cxx_key_method_may_be_inline (void);
180 static void arm_cxx_determine_class_data_visibility (tree);
181 static bool arm_cxx_class_data_always_comdat (void);
182 static bool arm_cxx_use_aeabi_atexit (void);
183 static void arm_init_libfuncs (void);
184 static bool arm_handle_option (size_t, const char *, int);
185 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
187 /* Initialize the GCC target structure. */
188 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
189 #undef TARGET_MERGE_DECL_ATTRIBUTES
190 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
193 #undef TARGET_ATTRIBUTE_TABLE
194 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
196 #undef TARGET_ASM_FILE_END
197 #define TARGET_ASM_FILE_END arm_file_end
200 #undef TARGET_ASM_BYTE_OP
201 #define TARGET_ASM_BYTE_OP "\tDCB\t"
202 #undef TARGET_ASM_ALIGNED_HI_OP
203 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
204 #undef TARGET_ASM_ALIGNED_SI_OP
205 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
206 #undef TARGET_ASM_GLOBALIZE_LABEL
207 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
208 #undef TARGET_ASM_FILE_START
209 #define TARGET_ASM_FILE_START aof_file_start
210 #undef TARGET_ASM_FILE_END
211 #define TARGET_ASM_FILE_END aof_file_end
213 #undef TARGET_ASM_ALIGNED_SI_OP
214 #define TARGET_ASM_ALIGNED_SI_OP NULL
215 #undef TARGET_ASM_INTEGER
216 #define TARGET_ASM_INTEGER arm_assemble_integer
219 #undef TARGET_ASM_FUNCTION_PROLOGUE
220 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
222 #undef TARGET_ASM_FUNCTION_EPILOGUE
223 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
225 #undef TARGET_DEFAULT_TARGET_FLAGS
226 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
227 #undef TARGET_HANDLE_OPTION
228 #define TARGET_HANDLE_OPTION arm_handle_option
230 #undef TARGET_COMP_TYPE_ATTRIBUTES
231 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
233 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
234 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
236 #undef TARGET_SCHED_ADJUST_COST
237 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
239 #undef TARGET_ENCODE_SECTION_INFO
241 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
243 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
246 #undef TARGET_STRIP_NAME_ENCODING
247 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
249 #undef TARGET_ASM_INTERNAL_LABEL
250 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
252 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
253 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
255 #undef TARGET_ASM_OUTPUT_MI_THUNK
256 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
260 /* This will be overridden in arm_override_options. */
261 #undef TARGET_RTX_COSTS
262 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
263 #undef TARGET_ADDRESS_COST
264 #define TARGET_ADDRESS_COST arm_address_cost
266 #undef TARGET_SHIFT_TRUNCATION_MASK
267 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
268 #undef TARGET_VECTOR_MODE_SUPPORTED_P
269 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
271 #undef TARGET_MACHINE_DEPENDENT_REORG
272 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
274 #undef TARGET_INIT_BUILTINS
275 #define TARGET_INIT_BUILTINS arm_init_builtins
276 #undef TARGET_EXPAND_BUILTIN
277 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
279 #undef TARGET_INIT_LIBFUNCS
280 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
282 #undef TARGET_PROMOTE_FUNCTION_ARGS
283 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
284 #undef TARGET_PROMOTE_FUNCTION_RETURN
285 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
286 #undef TARGET_PROMOTE_PROTOTYPES
287 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
288 #undef TARGET_PASS_BY_REFERENCE
289 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
290 #undef TARGET_ARG_PARTIAL_BYTES
291 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX arm_struct_value_rtx
296 #undef TARGET_SETUP_INCOMING_VARARGS
297 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
299 #undef TARGET_DEFAULT_SHORT_ENUMS
300 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
302 #undef TARGET_ALIGN_ANON_BITFIELD
303 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
305 #undef TARGET_CXX_GUARD_TYPE
306 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
308 #undef TARGET_CXX_GUARD_MASK_BIT
309 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
311 #undef TARGET_CXX_GET_COOKIE_SIZE
312 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
314 #undef TARGET_CXX_COOKIE_HAS_SIZE
315 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
317 #undef TARGET_CXX_CDTOR_RETURNS_THIS
318 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
320 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
321 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
323 #undef TARGET_CXX_USE_AEABI_ATEXIT
324 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
326 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
327 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
328 arm_cxx_determine_class_data_visibility
330 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
331 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
333 #undef TARGET_RETURN_IN_MSB
334 #define TARGET_RETURN_IN_MSB arm_return_in_msb
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
339 struct gcc_target targetm = TARGET_INITIALIZER;
341 /* Obstack for minipool constant handling. */
342 static struct obstack minipool_obstack;
343 static char * minipool_startobj;
345 /* The maximum number of insns skipped which
346 will be conditionalised if possible. */
347 static int max_insns_skipped = 5;
349 extern FILE * asm_out_file;
351 /* True if we are currently building a constant table. */
352 int making_const_table;
354 /* Define the information needed to generate branch insns. This is
355 stored from the compare operation. */
356 rtx arm_compare_op0, arm_compare_op1;
358 /* The processor for which instructions should be scheduled. */
359 enum processor_type arm_tune = arm_none;
361 /* Which floating point model to use. */
362 enum arm_fp_model arm_fp_model;
364 /* Which floating point hardware is available. */
365 enum fputype arm_fpu_arch;
367 /* Which floating point hardware to schedule for. */
368 enum fputype arm_fpu_tune;
370 /* Whether to use floating point hardware. */
371 enum float_abi_type arm_float_abi;
373 /* Which ABI to use. */
374 enum arm_abi_type arm_abi;
376 /* Set by the -mfpu=... option. */
377 static const char * target_fpu_name = NULL;
379 /* Set by the -mfpe=... option. */
380 static const char * target_fpe_name = NULL;
382 /* Set by the -mfloat-abi=... option. */
383 static const char * target_float_abi_name = NULL;
385 /* Set by the -mabi=... option. */
386 static const char * target_abi_name = NULL;
388 /* Used to parse -mstructure_size_boundary command line option. */
389 static const char * structure_size_string = NULL;
390 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
392 /* Used for Thumb call_via trampolines. */
393 rtx thumb_call_via_label[14];
394 static int thumb_call_reg_needed;
396 /* Bit values used to identify processor capabilities. */
397 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
398 #define FL_ARCH3M (1 << 1) /* Extended multiply */
399 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
400 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
401 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
402 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
403 #define FL_THUMB (1 << 6) /* Thumb aware */
404 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
405 #define FL_STRONG (1 << 8) /* StrongARM */
406 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
407 #define FL_XSCALE (1 << 10) /* XScale */
408 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
409 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
410 media instructions. */
411 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
412 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
413 Note: ARM6 & 7 derivatives only. */
415 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
417 #define FL_FOR_ARCH2 0
418 #define FL_FOR_ARCH3 FL_MODE32
419 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
420 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
421 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
422 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
423 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
424 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
425 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
426 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
427 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
428 #define FL_FOR_ARCH6J FL_FOR_ARCH6
429 #define FL_FOR_ARCH6K FL_FOR_ARCH6
430 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
431 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6
433 /* The bits in this mask specify which
434 instructions we are allowed to generate. */
435 static unsigned long insn_flags = 0;
437 /* The bits in this mask specify which instruction scheduling options should
439 static unsigned long tune_flags = 0;
441 /* The following are used in the arm.md file as equivalents to bits
442 in the above two flag variables. */
444 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
447 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
450 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
453 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
456 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
459 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
462 /* Nonzero if this chip can benefit from load scheduling. */
463 int arm_ld_sched = 0;
465 /* Nonzero if this chip is a StrongARM. */
466 int arm_tune_strongarm = 0;
468 /* Nonzero if this chip is a Cirrus variant. */
469 int arm_arch_cirrus = 0;
471 /* Nonzero if this chip supports Intel Wireless MMX technology. */
472 int arm_arch_iwmmxt = 0;
474 /* Nonzero if this chip is an XScale. */
475 int arm_arch_xscale = 0;
477 /* Nonzero if tuning for XScale */
478 int arm_tune_xscale = 0;
480 /* Nonzero if we want to tune for stores that access the write-buffer.
481 This typically means an ARM6 or ARM7 with MMU or MPU. */
482 int arm_tune_wbuf = 0;
484 /* Nonzero if generating Thumb instructions. */
487 /* Nonzero if we should define __THUMB_INTERWORK__ in the
489 XXX This is a bit of a hack, it's intended to help work around
490 problems in GLD which doesn't understand that armv5t code is
491 interworking clean. */
492 int arm_cpp_interwork = 0;
494 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
495 must report the mode of the memory reference from PRINT_OPERAND to
496 PRINT_OPERAND_ADDRESS. */
497 enum machine_mode output_memory_reference_mode;
499 /* The register number to be used for the PIC offset register. */
500 static const char * arm_pic_register_string = NULL;
501 int arm_pic_register = INVALID_REGNUM;
503 /* Set to 1 when a return insn is output, this means that the epilogue
505 int return_used_this_function;
507 /* Set to 1 after arm_reorg has started. Reset to start at the start of
508 the next function. */
509 static int after_arm_reorg = 0;
511 /* The maximum number of insns to be used when loading a constant. */
512 static int arm_constant_limit = 3;
514 /* For an explanation of these variables, see final_prescan_insn below. */
516 enum arm_cond_code arm_current_cc;
518 int arm_target_label;
520 /* The condition codes of the ARM, and the inverse function. */
521 static const char * const arm_condition_codes[] =
523 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
524 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
527 #define streq(string1, string2) (strcmp (string1, string2) == 0)
529 /* Initialization code. */
533 const char *const name;
534 enum processor_type core;
536 const unsigned long flags;
537 bool (* rtx_costs) (rtx, int, int, int *);
540 /* Not all of these give usefully different compilation alternatives,
541 but there is no simple way of generalizing them. */
542 static const struct processors all_cores[] =
545 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
546 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
547 #include "arm-cores.def"
549 {NULL, arm_none, NULL, 0, NULL}
552 static const struct processors all_architectures[] =
554 /* ARM Architectures */
555 /* We don't specify rtx_costs here as it will be figured out
558 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
559 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
560 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
561 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
562 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
563 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
564 implementations that support it, so we will leave it out for now. */
565 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
566 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
567 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
568 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
569 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
570 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
571 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
572 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
573 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
574 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
575 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
576 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
577 {NULL, arm_none, NULL, 0 , NULL}
580 struct arm_cpu_select
584 const struct processors * processors;
587 /* This is a magic structure. The 'string' field is magically filled in
588 with a pointer to the value specified by the user on the command line
589 assuming that the user has specified such a value. */
591 static struct arm_cpu_select arm_select[] =
593 /* string name processors */
594 { NULL, "-mcpu=", all_cores },
595 { NULL, "-march=", all_architectures },
596 { NULL, "-mtune=", all_cores }
599 /* Defines representing the indexes into the above table. */
600 #define ARM_OPT_SET_CPU 0
601 #define ARM_OPT_SET_ARCH 1
602 #define ARM_OPT_SET_TUNE 2
604 /* The name of the proprocessor macro to define for this architecture. */
606 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
615 /* Available values for for -mfpu=. */
617 static const struct fpu_desc all_fpus[] =
619 {"fpa", FPUTYPE_FPA},
620 {"fpe2", FPUTYPE_FPA_EMU2},
621 {"fpe3", FPUTYPE_FPA_EMU2},
622 {"maverick", FPUTYPE_MAVERICK},
627 /* Floating point models used by the different hardware.
628 See fputype in arm.h. */
630 static const enum fputype fp_model_for_fpu[] =
632 /* No FP hardware. */
633 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
634 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
635 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
636 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
637 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
638 ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
645 enum float_abi_type abi_type;
649 /* Available values for -mfloat-abi=. */
651 static const struct float_abi all_float_abis[] =
653 {"soft", ARM_FLOAT_ABI_SOFT},
654 {"softfp", ARM_FLOAT_ABI_SOFTFP},
655 {"hard", ARM_FLOAT_ABI_HARD}
662 enum arm_abi_type abi_type;
666 /* Available values for -mabi=. */
668 static const struct abi_name arm_all_abis[] =
670 {"apcs-gnu", ARM_ABI_APCS},
671 {"atpcs", ARM_ABI_ATPCS},
672 {"aapcs", ARM_ABI_AAPCS},
673 {"iwmmxt", ARM_ABI_IWMMXT}
676 /* Return the number of bits set in VALUE. */
678 bit_count (unsigned long value)
680 unsigned long count = 0;
685 value &= value - 1; /* Clear the least-significant set bit. */
691 /* Set up library functions unique to ARM. */
694 arm_init_libfuncs (void)
696 /* There are no special library functions unless we are using the
701 /* The functions below are described in Section 4 of the "Run-Time
702 ABI for the ARM architecture", Version 1.0. */
704 /* Double-precision floating-point arithmetic. Table 2. */
705 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
706 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
707 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
708 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
709 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
711 /* Double-precision comparisons. Table 3. */
712 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
713 set_optab_libfunc (ne_optab, DFmode, NULL);
714 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
715 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
716 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
717 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
718 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
720 /* Single-precision floating-point arithmetic. Table 4. */
721 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
722 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
723 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
724 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
725 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
727 /* Single-precision comparisons. Table 5. */
728 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
729 set_optab_libfunc (ne_optab, SFmode, NULL);
730 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
731 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
732 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
733 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
734 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
736 /* Floating-point to integer conversions. Table 6. */
737 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
738 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
739 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
740 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
741 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
742 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
743 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
744 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
746 /* Conversions between floating types. Table 7. */
747 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
748 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
750 /* Integer to floating-point conversions. Table 8. */
751 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
752 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
753 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
754 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
755 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
756 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
757 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
758 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
760 /* Long long. Table 9. */
761 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
762 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
763 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
764 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
765 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
766 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
767 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
768 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
770 /* Integer (32/32->32) division. \S 4.3.1. */
771 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
772 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
774 /* The divmod functions are designed so that they can be used for
775 plain division, even though they return both the quotient and the
776 remainder. The quotient is returned in the usual location (i.e.,
777 r0 for SImode, {r0, r1} for DImode), just as would be expected
778 for an ordinary division routine. Because the AAPCS calling
779 conventions specify that all of { r0, r1, r2, r3 } are
780 callee-saved registers, there is no need to tell the compiler
781 explicitly that those registers are clobbered by these
783 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
784 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
785 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idivmod");
786 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidivmod");
788 /* We don't have mod libcalls. Fortunately gcc knows how to use the
789 divmod libcalls instead. */
790 set_optab_libfunc (smod_optab, DImode, NULL);
791 set_optab_libfunc (umod_optab, DImode, NULL);
792 set_optab_libfunc (smod_optab, SImode, NULL);
793 set_optab_libfunc (umod_optab, SImode, NULL);
796 /* Implement TARGET_HANDLE_OPTION. */
799 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
804 target_abi_name = arg;
808 arm_select[1].string = arg;
812 arm_select[0].string = arg;
815 case OPT_mfloat_abi_:
816 target_float_abi_name = arg;
821 target_fpe_name = arg;
825 target_fpu_name = arg;
828 case OPT_mhard_float:
829 target_float_abi_name = "hard";
832 case OPT_mpic_register_:
833 arm_pic_register_string = arg;
836 case OPT_msoft_float:
837 target_float_abi_name = "soft";
840 case OPT_mstructure_size_boundary_:
841 structure_size_string = arg;
845 arm_select[2].string = arg;
853 /* Fix up any incompatible options that the user has specified.
854 This has now turned into a maze. */
856 arm_override_options (void)
859 enum processor_type target_arch_cpu = arm_none;
861 /* Set up the flags based on the cpu/architecture selected by the user. */
862 for (i = ARRAY_SIZE (arm_select); i--;)
864 struct arm_cpu_select * ptr = arm_select + i;
866 if (ptr->string != NULL && ptr->string[0] != '\0')
868 const struct processors * sel;
870 for (sel = ptr->processors; sel->name != NULL; sel++)
871 if (streq (ptr->string, sel->name))
873 /* Set the architecture define. */
874 if (i != ARM_OPT_SET_TUNE)
875 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
877 /* Determine the processor core for which we should
878 tune code-generation. */
879 if (/* -mcpu= is a sensible default. */
881 /* -mtune= overrides -mcpu= and -march=. */
882 || i == ARM_OPT_SET_TUNE)
883 arm_tune = (enum processor_type) (sel - ptr->processors);
885 /* Remember the CPU associated with this architecture.
886 If no other option is used to set the CPU type,
887 we'll use this to guess the most suitable tuning
889 if (i == ARM_OPT_SET_ARCH)
890 target_arch_cpu = sel->core;
892 if (i != ARM_OPT_SET_TUNE)
894 /* If we have been given an architecture and a processor
895 make sure that they are compatible. We only generate
896 a warning though, and we prefer the CPU over the
898 if (insn_flags != 0 && (insn_flags ^ sel->flags))
899 warning (0, "switch -mcpu=%s conflicts with -march= switch",
902 insn_flags = sel->flags;
908 if (sel->name == NULL)
909 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
913 /* Guess the tuning options from the architecture if necessary. */
914 if (arm_tune == arm_none)
915 arm_tune = target_arch_cpu;
917 /* If the user did not specify a processor, choose one for them. */
920 const struct processors * sel;
922 enum processor_type cpu;
924 cpu = TARGET_CPU_DEFAULT;
927 #ifdef SUBTARGET_CPU_DEFAULT
928 /* Use the subtarget default CPU if none was specified by
930 cpu = SUBTARGET_CPU_DEFAULT;
932 /* Default to ARM6. */
936 sel = &all_cores[cpu];
938 insn_flags = sel->flags;
940 /* Now check to see if the user has specified some command line
941 switch that require certain abilities from the cpu. */
944 if (TARGET_INTERWORK || TARGET_THUMB)
946 sought |= (FL_THUMB | FL_MODE32);
948 /* There are no ARM processors that support both APCS-26 and
949 interworking. Therefore we force FL_MODE26 to be removed
950 from insn_flags here (if it was set), so that the search
951 below will always be able to find a compatible processor. */
952 insn_flags &= ~FL_MODE26;
955 if (sought != 0 && ((sought & insn_flags) != sought))
957 /* Try to locate a CPU type that supports all of the abilities
958 of the default CPU, plus the extra abilities requested by
960 for (sel = all_cores; sel->name != NULL; sel++)
961 if ((sel->flags & sought) == (sought | insn_flags))
964 if (sel->name == NULL)
966 unsigned current_bit_count = 0;
967 const struct processors * best_fit = NULL;
969 /* Ideally we would like to issue an error message here
970 saying that it was not possible to find a CPU compatible
971 with the default CPU, but which also supports the command
972 line options specified by the programmer, and so they
973 ought to use the -mcpu=<name> command line option to
974 override the default CPU type.
976 If we cannot find a cpu that has both the
977 characteristics of the default cpu and the given
978 command line options we scan the array again looking
980 for (sel = all_cores; sel->name != NULL; sel++)
981 if ((sel->flags & sought) == sought)
985 count = bit_count (sel->flags & insn_flags);
987 if (count >= current_bit_count)
990 current_bit_count = count;
994 gcc_assert (best_fit);
998 insn_flags = sel->flags;
1000 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1001 if (arm_tune == arm_none)
1002 arm_tune = (enum processor_type) (sel - all_cores);
1005 /* The processor for which we should tune should now have been
1007 gcc_assert (arm_tune != arm_none);
1009 tune_flags = all_cores[(int)arm_tune].flags;
1011 targetm.rtx_costs = arm_size_rtx_costs;
1013 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1015 /* Make sure that the processor choice does not conflict with any of the
1016 other command line choices. */
1017 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1019 warning (0, "target CPU does not support interworking" );
1020 target_flags &= ~MASK_INTERWORK;
1023 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1025 warning (0, "target CPU does not support THUMB instructions");
1026 target_flags &= ~MASK_THUMB;
1029 if (TARGET_APCS_FRAME && TARGET_THUMB)
1031 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1032 target_flags &= ~MASK_APCS_FRAME;
1035 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1036 from here where no function is being compiled currently. */
1037 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1038 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1040 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1041 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1043 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1044 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1046 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1048 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1049 target_flags |= MASK_APCS_FRAME;
1052 if (TARGET_POKE_FUNCTION_NAME)
1053 target_flags |= MASK_APCS_FRAME;
1055 if (TARGET_APCS_REENT && flag_pic)
1056 error ("-fpic and -mapcs-reent are incompatible");
1058 if (TARGET_APCS_REENT)
1059 warning (0, "APCS reentrant code not supported. Ignored");
1061 /* If this target is normally configured to use APCS frames, warn if they
1062 are turned off and debugging is turned on. */
1064 && write_symbols != NO_DEBUG
1065 && !TARGET_APCS_FRAME
1066 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1067 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1069 /* If stack checking is disabled, we can use r10 as the PIC register,
1070 which keeps r9 available. */
1072 arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1074 if (TARGET_APCS_FLOAT)
1075 warning (0, "passing floating point arguments in fp regs not yet supported");
1077 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1078 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1079 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1080 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1081 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1082 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1083 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1084 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1085 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1087 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1088 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1089 thumb_code = (TARGET_ARM == 0);
1090 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1091 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1092 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1094 /* V5 code we generate is completely interworking capable, so we turn off
1095 TARGET_INTERWORK here to avoid many tests later on. */
1097 /* XXX However, we must pass the right pre-processor defines to CPP
1098 or GLD can get confused. This is a hack. */
1099 if (TARGET_INTERWORK)
1100 arm_cpp_interwork = 1;
1103 target_flags &= ~MASK_INTERWORK;
1105 if (target_abi_name)
1107 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1109 if (streq (arm_all_abis[i].name, target_abi_name))
1111 arm_abi = arm_all_abis[i].abi_type;
1115 if (i == ARRAY_SIZE (arm_all_abis))
1116 error ("invalid ABI option: -mabi=%s", target_abi_name);
1119 arm_abi = ARM_DEFAULT_ABI;
1121 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1122 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1124 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1125 error ("iwmmxt abi requires an iwmmxt capable cpu");
1127 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1128 if (target_fpu_name == NULL && target_fpe_name != NULL)
1130 if (streq (target_fpe_name, "2"))
1131 target_fpu_name = "fpe2";
1132 else if (streq (target_fpe_name, "3"))
1133 target_fpu_name = "fpe3";
1135 error ("invalid floating point emulation option: -mfpe=%s",
1138 if (target_fpu_name != NULL)
1140 /* The user specified a FPU. */
1141 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1143 if (streq (all_fpus[i].name, target_fpu_name))
1145 arm_fpu_arch = all_fpus[i].fpu;
1146 arm_fpu_tune = arm_fpu_arch;
1147 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1151 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1152 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1156 #ifdef FPUTYPE_DEFAULT
1157 /* Use the default if it is specified for this platform. */
1158 arm_fpu_arch = FPUTYPE_DEFAULT;
1159 arm_fpu_tune = FPUTYPE_DEFAULT;
1161 /* Pick one based on CPU type. */
1162 /* ??? Some targets assume FPA is the default.
1163 if ((insn_flags & FL_VFP) != 0)
1164 arm_fpu_arch = FPUTYPE_VFP;
1167 if (arm_arch_cirrus)
1168 arm_fpu_arch = FPUTYPE_MAVERICK;
1170 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1172 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1173 arm_fpu_tune = FPUTYPE_FPA;
1175 arm_fpu_tune = arm_fpu_arch;
1176 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1177 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1180 if (target_float_abi_name != NULL)
1182 /* The user specified a FP ABI. */
1183 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1185 if (streq (all_float_abis[i].name, target_float_abi_name))
1187 arm_float_abi = all_float_abis[i].abi_type;
1191 if (i == ARRAY_SIZE (all_float_abis))
1192 error ("invalid floating point abi: -mfloat-abi=%s",
1193 target_float_abi_name);
1196 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1198 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1199 sorry ("-mfloat-abi=hard and VFP");
1201 /* If soft-float is specified then don't use FPU. */
1202 if (TARGET_SOFT_FLOAT)
1203 arm_fpu_arch = FPUTYPE_NONE;
1205 /* For arm2/3 there is no need to do any scheduling if there is only
1206 a floating point emulator, or we are doing software floating-point. */
1207 if ((TARGET_SOFT_FLOAT
1208 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1209 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1210 && (tune_flags & FL_MODE32) == 0)
1211 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1213 /* Override the default structure alignment for AAPCS ABI. */
1214 if (arm_abi == ARM_ABI_AAPCS)
1215 arm_structure_size_boundary = 8;
1217 if (structure_size_string != NULL)
1219 int size = strtol (structure_size_string, NULL, 0);
1221 if (size == 8 || size == 32
1222 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1223 arm_structure_size_boundary = size;
1225 warning (0, "structure size boundary can only be set to %s",
1226 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1229 if (arm_pic_register_string != NULL)
1231 int pic_register = decode_reg_name (arm_pic_register_string);
1234 warning (0, "-mpic-register= is useless without -fpic");
1236 /* Prevent the user from choosing an obviously stupid PIC register. */
1237 else if (pic_register < 0 || call_used_regs[pic_register]
1238 || pic_register == HARD_FRAME_POINTER_REGNUM
1239 || pic_register == STACK_POINTER_REGNUM
1240 || pic_register >= PC_REGNUM)
1241 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1243 arm_pic_register = pic_register;
1246 if (TARGET_THUMB && flag_schedule_insns)
1248 /* Don't warn since it's on by default in -O2. */
1249 flag_schedule_insns = 0;
1254 /* There's some dispute as to whether this should be 1 or 2. However,
1255 experiments seem to show that in pathological cases a setting of
1256 1 degrades less severely than a setting of 2. This could change if
1257 other parts of the compiler change their behavior. */
1258 arm_constant_limit = 1;
1260 /* If optimizing for size, bump the number of instructions that we
1261 are prepared to conditionally execute (even on a StrongARM). */
1262 max_insns_skipped = 6;
1266 /* For processors with load scheduling, it never costs more than
1267 2 cycles to load a constant, and the load scheduler may well
1268 reduce that to 1. */
1270 arm_constant_limit = 1;
1272 /* On XScale the longer latency of a load makes it more difficult
1273 to achieve a good schedule, so it's faster to synthesize
1274 constants that can be done in two insns. */
1275 if (arm_tune_xscale)
1276 arm_constant_limit = 2;
1278 /* StrongARM has early execution of branches, so a sequence
1279 that is worth skipping is shorter. */
1280 if (arm_tune_strongarm)
1281 max_insns_skipped = 3;
1284 /* Register global variables with the garbage collector. */
1285 arm_add_gc_roots ();
1289 arm_add_gc_roots (void)
1291 gcc_obstack_init(&minipool_obstack);
1292 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1295 /* A table of known ARM exception types.
1296 For use with the interrupt function attribute. */
1300 const char *const arg;
1301 const unsigned long return_value;
1305 static const isr_attribute_arg isr_attribute_args [] =
1307 { "IRQ", ARM_FT_ISR },
1308 { "irq", ARM_FT_ISR },
1309 { "FIQ", ARM_FT_FIQ },
1310 { "fiq", ARM_FT_FIQ },
1311 { "ABORT", ARM_FT_ISR },
1312 { "abort", ARM_FT_ISR },
1313 { "ABORT", ARM_FT_ISR },
1314 { "abort", ARM_FT_ISR },
1315 { "UNDEF", ARM_FT_EXCEPTION },
1316 { "undef", ARM_FT_EXCEPTION },
1317 { "SWI", ARM_FT_EXCEPTION },
1318 { "swi", ARM_FT_EXCEPTION },
1319 { NULL, ARM_FT_NORMAL }
1322 /* Returns the (interrupt) function type of the current
1323 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1325 static unsigned long
1326 arm_isr_value (tree argument)
1328 const isr_attribute_arg * ptr;
1331 /* No argument - default to IRQ. */
1332 if (argument == NULL_TREE)
1335 /* Get the value of the argument. */
1336 if (TREE_VALUE (argument) == NULL_TREE
1337 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1338 return ARM_FT_UNKNOWN;
1340 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1342 /* Check it against the list of known arguments. */
1343 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1344 if (streq (arg, ptr->arg))
1345 return ptr->return_value;
1347 /* An unrecognized interrupt type. */
1348 return ARM_FT_UNKNOWN;
1351 /* Computes the type of the current function. */
1353 static unsigned long
1354 arm_compute_func_type (void)
1356 unsigned long type = ARM_FT_UNKNOWN;
1360 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1362 /* Decide if the current function is volatile. Such functions
1363 never return, and many memory cycles can be saved by not storing
1364 register values that will never be needed again. This optimization
1365 was added to speed up context switching in a kernel application. */
1367 && TREE_NOTHROW (current_function_decl)
1368 && TREE_THIS_VOLATILE (current_function_decl))
1369 type |= ARM_FT_VOLATILE;
1371 if (cfun->static_chain_decl != NULL)
1372 type |= ARM_FT_NESTED;
1374 attr = DECL_ATTRIBUTES (current_function_decl);
1376 a = lookup_attribute ("naked", attr);
1378 type |= ARM_FT_NAKED;
1380 a = lookup_attribute ("isr", attr);
1382 a = lookup_attribute ("interrupt", attr);
1385 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1387 type |= arm_isr_value (TREE_VALUE (a));
1392 /* Returns the type of the current function. */
1395 arm_current_func_type (void)
1397 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1398 cfun->machine->func_type = arm_compute_func_type ();
1400 return cfun->machine->func_type;
1403 /* Return 1 if it is possible to return using a single instruction.
1404 If SIBLING is non-null, this is a test for a return before a sibling
1405 call. SIBLING is the call insn, so we can examine its register usage. */
1408 use_return_insn (int iscond, rtx sibling)
1411 unsigned int func_type;
1412 unsigned long saved_int_regs;
1413 unsigned HOST_WIDE_INT stack_adjust;
1414 arm_stack_offsets *offsets;
1416 /* Never use a return instruction before reload has run. */
1417 if (!reload_completed)
1420 func_type = arm_current_func_type ();
1422 /* Naked functions and volatile functions need special
1424 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1427 /* So do interrupt functions that use the frame pointer. */
1428 if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1431 offsets = arm_get_frame_offsets ();
1432 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1434 /* As do variadic functions. */
1435 if (current_function_pretend_args_size
1436 || cfun->machine->uses_anonymous_args
1437 /* Or if the function calls __builtin_eh_return () */
1438 || current_function_calls_eh_return
1439 /* Or if the function calls alloca */
1440 || current_function_calls_alloca
1441 /* Or if there is a stack adjustment. However, if the stack pointer
1442 is saved on the stack, we can use a pre-incrementing stack load. */
1443 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1446 saved_int_regs = arm_compute_save_reg_mask ();
1448 /* Unfortunately, the insn
1450 ldmib sp, {..., sp, ...}
1452 triggers a bug on most SA-110 based devices, such that the stack
1453 pointer won't be correctly restored if the instruction takes a
1454 page fault. We work around this problem by popping r3 along with
1455 the other registers, since that is never slower than executing
1456 another instruction.
1458 We test for !arm_arch5 here, because code for any architecture
1459 less than this could potentially be run on one of the buggy
1461 if (stack_adjust == 4 && !arm_arch5)
1463 /* Validate that r3 is a call-clobbered register (always true in
1464 the default abi) ... */
1465 if (!call_used_regs[3])
1468 /* ... that it isn't being used for a return value (always true
1469 until we implement return-in-regs), or for a tail-call
1473 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1475 if (find_regno_fusage (sibling, USE, 3))
1479 /* ... and that there are no call-saved registers in r0-r2
1480 (always true in the default ABI). */
1481 if (saved_int_regs & 0x7)
1485 /* Can't be done if interworking with Thumb, and any registers have been
1487 if (TARGET_INTERWORK && saved_int_regs != 0)
1490 /* On StrongARM, conditional returns are expensive if they aren't
1491 taken and multiple registers have been stacked. */
1492 if (iscond && arm_tune_strongarm)
1494 /* Conditional return when just the LR is stored is a simple
1495 conditional-load instruction, that's not expensive. */
1496 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1499 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1503 /* If there are saved registers but the LR isn't saved, then we need
1504 two instructions for the return. */
1505 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1508 /* Can't be done if any of the FPA regs are pushed,
1509 since this also requires an insn. */
1510 if (TARGET_HARD_FLOAT && TARGET_FPA)
1511 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1512 if (regs_ever_live[regno] && !call_used_regs[regno])
1515 /* Likewise VFP regs. */
1516 if (TARGET_HARD_FLOAT && TARGET_VFP)
1517 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1518 if (regs_ever_live[regno] && !call_used_regs[regno])
1521 if (TARGET_REALLY_IWMMXT)
1522 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1523 if (regs_ever_live[regno] && ! call_used_regs [regno])
1529 /* Return TRUE if int I is a valid immediate ARM constant. */
1532 const_ok_for_arm (HOST_WIDE_INT i)
1534 unsigned HOST_WIDE_INT mask = ~(unsigned HOST_WIDE_INT)0xFF;
1536 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1537 be all zero, or all one. */
1538 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1539 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1540 != ((~(unsigned HOST_WIDE_INT) 0)
1541 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1544 /* Fast return for 0 and powers of 2 */
1545 if ((i & (i - 1)) == 0)
1550 if ((i & mask & (unsigned HOST_WIDE_INT) 0xffffffff) == 0)
1553 (mask << 2) | ((mask & (unsigned HOST_WIDE_INT) 0xffffffff)
1554 >> (32 - 2)) | ~(unsigned HOST_WIDE_INT) 0xffffffff;
1556 while (mask != ~(unsigned HOST_WIDE_INT) 0xFF);
1561 /* Return true if I is a valid constant for the operation CODE. */
1563 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1565 if (const_ok_for_arm (i))
1571 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1573 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1579 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1586 /* Emit a sequence of insns to handle a large constant.
1587 CODE is the code of the operation required, it can be any of SET, PLUS,
1588 IOR, AND, XOR, MINUS;
1589 MODE is the mode in which the operation is being performed;
1590 VAL is the integer to operate on;
1591 SOURCE is the other operand (a register, or a null-pointer for SET);
1592 SUBTARGETS means it is safe to create scratch registers if that will
1593 either produce a simpler sequence, or we will want to cse the values.
1594 Return value is the number of insns emitted. */
1597 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1598 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1602 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1603 cond = COND_EXEC_TEST (PATTERN (insn));
1607 if (subtargets || code == SET
1608 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1609 && REGNO (target) != REGNO (source)))
1611 /* After arm_reorg has been called, we can't fix up expensive
1612 constants by pushing them into memory so we must synthesize
1613 them in-line, regardless of the cost. This is only likely to
1614 be more costly on chips that have load delay slots and we are
1615 compiling without running the scheduler (so no splitting
1616 occurred before the final instruction emission).
1618 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1620 if (!after_arm_reorg
1622 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1624 > arm_constant_limit + (code != SET)))
1628 /* Currently SET is the only monadic value for CODE, all
1629 the rest are diadic. */
1630 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (val)));
1635 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1637 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (val)));
1638 /* For MINUS, the value is subtracted from, since we never
1639 have subtraction of a constant. */
1641 emit_insn (gen_rtx_SET (VOIDmode, target,
1642 gen_rtx_MINUS (mode, temp, source)));
1644 emit_insn (gen_rtx_SET (VOIDmode, target,
1645 gen_rtx_fmt_ee (code, mode, source, temp)));
1651 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1656 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1658 HOST_WIDE_INT temp1;
1666 if (remainder & (3 << (i - 2)))
1671 temp1 = remainder & ((0x0ff << end)
1672 | ((i < end) ? (0xff >> (32 - end)) : 0));
1673 remainder &= ~temp1;
1678 } while (remainder);
1682 /* Emit an instruction with the indicated PATTERN. If COND is
1683 non-NULL, conditionalize the execution of the instruction on COND
1687 emit_constant_insn (rtx cond, rtx pattern)
1690 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1691 emit_insn (pattern);
1694 /* As above, but extra parameter GENERATE which, if clear, suppresses
1698 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1699 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1704 int can_negate_initial = 0;
1707 int num_bits_set = 0;
1708 int set_sign_bit_copies = 0;
1709 int clear_sign_bit_copies = 0;
1710 int clear_zero_bit_copies = 0;
1711 int set_zero_bit_copies = 0;
1713 unsigned HOST_WIDE_INT temp1, temp2;
1714 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1716 /* Find out which operations are safe for a given CODE. Also do a quick
1717 check for degenerate cases; these can occur when DImode operations
1729 can_negate_initial = 1;
1733 if (remainder == 0xffffffff)
1736 emit_constant_insn (cond,
1737 gen_rtx_SET (VOIDmode, target,
1738 GEN_INT (ARM_SIGN_EXTEND (val))));
1743 if (reload_completed && rtx_equal_p (target, source))
1746 emit_constant_insn (cond,
1747 gen_rtx_SET (VOIDmode, target, source));
1756 emit_constant_insn (cond,
1757 gen_rtx_SET (VOIDmode, target, const0_rtx));
1760 if (remainder == 0xffffffff)
1762 if (reload_completed && rtx_equal_p (target, source))
1765 emit_constant_insn (cond,
1766 gen_rtx_SET (VOIDmode, target, source));
1775 if (reload_completed && rtx_equal_p (target, source))
1778 emit_constant_insn (cond,
1779 gen_rtx_SET (VOIDmode, target, source));
1783 /* We don't know how to handle other cases yet. */
1784 gcc_assert (remainder == 0xffffffff);
1787 emit_constant_insn (cond,
1788 gen_rtx_SET (VOIDmode, target,
1789 gen_rtx_NOT (mode, source)));
1793 /* We treat MINUS as (val - source), since (source - val) is always
1794 passed as (source + (-val)). */
1798 emit_constant_insn (cond,
1799 gen_rtx_SET (VOIDmode, target,
1800 gen_rtx_NEG (mode, source)));
1803 if (const_ok_for_arm (val))
1806 emit_constant_insn (cond,
1807 gen_rtx_SET (VOIDmode, target,
1808 gen_rtx_MINUS (mode, GEN_INT (val),
1820 /* If we can do it in one insn get out quickly. */
1821 if (const_ok_for_arm (val)
1822 || (can_negate_initial && const_ok_for_arm (-val))
1823 || (can_invert && const_ok_for_arm (~val)))
1826 emit_constant_insn (cond,
1827 gen_rtx_SET (VOIDmode, target,
1829 ? gen_rtx_fmt_ee (code, mode, source,
1835 /* Calculate a few attributes that may be useful for specific
1837 for (i = 31; i >= 0; i--)
1839 if ((remainder & (1 << i)) == 0)
1840 clear_sign_bit_copies++;
1845 for (i = 31; i >= 0; i--)
1847 if ((remainder & (1 << i)) != 0)
1848 set_sign_bit_copies++;
1853 for (i = 0; i <= 31; i++)
1855 if ((remainder & (1 << i)) == 0)
1856 clear_zero_bit_copies++;
1861 for (i = 0; i <= 31; i++)
1863 if ((remainder & (1 << i)) != 0)
1864 set_zero_bit_copies++;
1872 /* See if we can do this by sign_extending a constant that is known
1873 to be negative. This is a good, way of doing it, since the shift
1874 may well merge into a subsequent insn. */
1875 if (set_sign_bit_copies > 1)
1877 if (const_ok_for_arm
1878 (temp1 = ARM_SIGN_EXTEND (remainder
1879 << (set_sign_bit_copies - 1))))
1883 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1884 emit_constant_insn (cond,
1885 gen_rtx_SET (VOIDmode, new_src,
1887 emit_constant_insn (cond,
1888 gen_ashrsi3 (target, new_src,
1889 GEN_INT (set_sign_bit_copies - 1)));
1893 /* For an inverted constant, we will need to set the low bits,
1894 these will be shifted out of harm's way. */
1895 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1896 if (const_ok_for_arm (~temp1))
1900 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1901 emit_constant_insn (cond,
1902 gen_rtx_SET (VOIDmode, new_src,
1904 emit_constant_insn (cond,
1905 gen_ashrsi3 (target, new_src,
1906 GEN_INT (set_sign_bit_copies - 1)));
1912 /* See if we can generate this by setting the bottom (or the top)
1913 16 bits, and then shifting these into the other half of the
1914 word. We only look for the simplest cases, to do more would cost
1915 too much. Be careful, however, not to generate this when the
1916 alternative would take fewer insns. */
1917 if (val & 0xffff0000)
1919 temp1 = remainder & 0xffff0000;
1920 temp2 = remainder & 0x0000ffff;
1922 /* Overlaps outside this range are best done using other methods. */
1923 for (i = 9; i < 24; i++)
1925 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
1926 && !const_ok_for_arm (temp2))
1928 rtx new_src = (subtargets
1929 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1931 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
1932 source, subtargets, generate);
1940 gen_rtx_ASHIFT (mode, source,
1947 /* Don't duplicate cases already considered. */
1948 for (i = 17; i < 24; i++)
1950 if (((temp1 | (temp1 >> i)) == remainder)
1951 && !const_ok_for_arm (temp1))
1953 rtx new_src = (subtargets
1954 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1956 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
1957 source, subtargets, generate);
1962 gen_rtx_SET (VOIDmode, target,
1965 gen_rtx_LSHIFTRT (mode, source,
1976 /* If we have IOR or XOR, and the constant can be loaded in a
1977 single instruction, and we can find a temporary to put it in,
1978 then this can be done in two instructions instead of 3-4. */
1980 /* TARGET can't be NULL if SUBTARGETS is 0 */
1981 || (reload_completed && !reg_mentioned_p (target, source)))
1983 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
1987 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
1989 emit_constant_insn (cond,
1990 gen_rtx_SET (VOIDmode, sub,
1992 emit_constant_insn (cond,
1993 gen_rtx_SET (VOIDmode, target,
1994 gen_rtx_fmt_ee (code, mode,
2004 if (set_sign_bit_copies > 8
2005 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2009 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2010 rtx shift = GEN_INT (set_sign_bit_copies);
2014 gen_rtx_SET (VOIDmode, sub,
2016 gen_rtx_ASHIFT (mode,
2021 gen_rtx_SET (VOIDmode, target,
2023 gen_rtx_LSHIFTRT (mode, sub,
2029 if (set_zero_bit_copies > 8
2030 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2034 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2035 rtx shift = GEN_INT (set_zero_bit_copies);
2039 gen_rtx_SET (VOIDmode, sub,
2041 gen_rtx_LSHIFTRT (mode,
2046 gen_rtx_SET (VOIDmode, target,
2048 gen_rtx_ASHIFT (mode, sub,
2054 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2058 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2059 emit_constant_insn (cond,
2060 gen_rtx_SET (VOIDmode, sub,
2061 gen_rtx_NOT (mode, source)));
2064 sub = gen_reg_rtx (mode);
2065 emit_constant_insn (cond,
2066 gen_rtx_SET (VOIDmode, sub,
2067 gen_rtx_AND (mode, source,
2069 emit_constant_insn (cond,
2070 gen_rtx_SET (VOIDmode, target,
2071 gen_rtx_NOT (mode, sub)));
2078 /* See if two shifts will do 2 or more insn's worth of work. */
2079 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2081 HOST_WIDE_INT shift_mask = ((0xffffffff
2082 << (32 - clear_sign_bit_copies))
2085 if ((remainder | shift_mask) != 0xffffffff)
2089 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2090 insns = arm_gen_constant (AND, mode, cond,
2091 remainder | shift_mask,
2092 new_src, source, subtargets, 1);
2097 rtx targ = subtargets ? NULL_RTX : target;
2098 insns = arm_gen_constant (AND, mode, cond,
2099 remainder | shift_mask,
2100 targ, source, subtargets, 0);
2106 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2107 rtx shift = GEN_INT (clear_sign_bit_copies);
2109 emit_insn (gen_ashlsi3 (new_src, source, shift));
2110 emit_insn (gen_lshrsi3 (target, new_src, shift));
2116 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2118 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2120 if ((remainder | shift_mask) != 0xffffffff)
2124 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2126 insns = arm_gen_constant (AND, mode, cond,
2127 remainder | shift_mask,
2128 new_src, source, subtargets, 1);
2133 rtx targ = subtargets ? NULL_RTX : target;
2135 insns = arm_gen_constant (AND, mode, cond,
2136 remainder | shift_mask,
2137 targ, source, subtargets, 0);
2143 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2144 rtx shift = GEN_INT (clear_zero_bit_copies);
2146 emit_insn (gen_lshrsi3 (new_src, source, shift));
2147 emit_insn (gen_ashlsi3 (target, new_src, shift));
2159 for (i = 0; i < 32; i++)
2160 if (remainder & (1 << i))
2163 if (code == AND || (can_invert && num_bits_set > 16))
2164 remainder = (~remainder) & 0xffffffff;
2165 else if (code == PLUS && num_bits_set > 16)
2166 remainder = (-remainder) & 0xffffffff;
2173 /* Now try and find a way of doing the job in either two or three
2175 We start by looking for the largest block of zeros that are aligned on
2176 a 2-bit boundary, we then fill up the temps, wrapping around to the
2177 top of the word when we drop off the bottom.
2178 In the worst case this code should produce no more than four insns. */
2181 int best_consecutive_zeros = 0;
2183 for (i = 0; i < 32; i += 2)
2185 int consecutive_zeros = 0;
2187 if (!(remainder & (3 << i)))
2189 while ((i < 32) && !(remainder & (3 << i)))
2191 consecutive_zeros += 2;
2194 if (consecutive_zeros > best_consecutive_zeros)
2196 best_consecutive_zeros = consecutive_zeros;
2197 best_start = i - consecutive_zeros;
2203 /* So long as it won't require any more insns to do so, it's
2204 desirable to emit a small constant (in bits 0...9) in the last
2205 insn. This way there is more chance that it can be combined with
2206 a later addressing insn to form a pre-indexed load or store
2207 operation. Consider:
2209 *((volatile int *)0xe0000100) = 1;
2210 *((volatile int *)0xe0000110) = 2;
2212 We want this to wind up as:
2216 str rB, [rA, #0x100]
2218 str rB, [rA, #0x110]
2220 rather than having to synthesize both large constants from scratch.
2222 Therefore, we calculate how many insns would be required to emit
2223 the constant starting from `best_start', and also starting from
2224 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2225 yield a shorter sequence, we may as well use zero. */
2227 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2228 && (count_insns_for_constant (remainder, 0) <=
2229 count_insns_for_constant (remainder, best_start)))
2232 /* Now start emitting the insns. */
2240 if (remainder & (3 << (i - 2)))
2245 temp1 = remainder & ((0x0ff << end)
2246 | ((i < end) ? (0xff >> (32 - end)) : 0));
2247 remainder &= ~temp1;
2251 rtx new_src, temp1_rtx;
2253 if (code == SET || code == MINUS)
2255 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2256 if (can_invert && code != MINUS)
2261 if (remainder && subtargets)
2262 new_src = gen_reg_rtx (mode);
2267 else if (can_negate)
2271 temp1 = trunc_int_for_mode (temp1, mode);
2272 temp1_rtx = GEN_INT (temp1);
2276 else if (code == MINUS)
2277 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2279 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2281 emit_constant_insn (cond,
2282 gen_rtx_SET (VOIDmode, new_src,
2292 else if (code == MINUS)
2306 /* Canonicalize a comparison so that we are more likely to recognize it.
2307 This can be done for a few constant compares, where we can make the
2308 immediate value easier to load. */
2311 arm_canonicalize_comparison (enum rtx_code code, rtx * op1)
2313 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2323 if (i != ((((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1)) - 1)
2324 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2326 *op1 = GEN_INT (i + 1);
2327 return code == GT ? GE : LT;
2333 if (i != (((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1))
2334 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2336 *op1 = GEN_INT (i - 1);
2337 return code == GE ? GT : LE;
2343 if (i != ~((unsigned HOST_WIDE_INT) 0)
2344 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2346 *op1 = GEN_INT (i + 1);
2347 return code == GTU ? GEU : LTU;
2354 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2356 *op1 = GEN_INT (i - 1);
2357 return code == GEU ? GTU : LEU;
2369 /* Define how to find the value returned by a function. */
2372 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2374 enum machine_mode mode;
2375 int unsignedp ATTRIBUTE_UNUSED;
2376 rtx r ATTRIBUTE_UNUSED;
2378 mode = TYPE_MODE (type);
2379 /* Promote integer types. */
2380 if (INTEGRAL_TYPE_P (type))
2381 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2383 /* Promotes small structs returned in a register to full-word size
2384 for big-endian AAPCS. */
2385 if (arm_return_in_msb (type))
2387 HOST_WIDE_INT size = int_size_in_bytes (type);
2388 if (size % UNITS_PER_WORD != 0)
2390 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2391 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2395 return LIBCALL_VALUE(mode);
2398 /* Determine the amount of memory needed to store the possible return
2399 registers of an untyped call. */
2401 arm_apply_result_size (void)
2407 if (TARGET_HARD_FLOAT_ABI)
2411 if (TARGET_MAVERICK)
2414 if (TARGET_IWMMXT_ABI)
2421 /* Decide whether a type should be returned in memory (true)
2422 or in a register (false). This is called by the macro
2423 RETURN_IN_MEMORY. */
2425 arm_return_in_memory (tree type)
2429 if (!AGGREGATE_TYPE_P (type) &&
2430 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2431 /* All simple types are returned in registers.
2432 For AAPCS, complex types are treated the same as aggregates. */
2435 size = int_size_in_bytes (type);
2437 if (arm_abi != ARM_ABI_APCS)
2439 /* ATPCS and later return aggregate types in memory only if they are
2440 larger than a word (or are variable size). */
2441 return (size < 0 || size > UNITS_PER_WORD);
2444 /* For the arm-wince targets we choose to be compatible with Microsoft's
2445 ARM and Thumb compilers, which always return aggregates in memory. */
2447 /* All structures/unions bigger than one word are returned in memory.
2448 Also catch the case where int_size_in_bytes returns -1. In this case
2449 the aggregate is either huge or of variable size, and in either case
2450 we will want to return it via memory and not in a register. */
2451 if (size < 0 || size > UNITS_PER_WORD)
2454 if (TREE_CODE (type) == RECORD_TYPE)
2458 /* For a struct the APCS says that we only return in a register
2459 if the type is 'integer like' and every addressable element
2460 has an offset of zero. For practical purposes this means
2461 that the structure can have at most one non bit-field element
2462 and that this element must be the first one in the structure. */
2464 /* Find the first field, ignoring non FIELD_DECL things which will
2465 have been created by C++. */
2466 for (field = TYPE_FIELDS (type);
2467 field && TREE_CODE (field) != FIELD_DECL;
2468 field = TREE_CHAIN (field))
2472 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2474 /* Check that the first field is valid for returning in a register. */
2476 /* ... Floats are not allowed */
2477 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2480 /* ... Aggregates that are not themselves valid for returning in
2481 a register are not allowed. */
2482 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2485 /* Now check the remaining fields, if any. Only bitfields are allowed,
2486 since they are not addressable. */
2487 for (field = TREE_CHAIN (field);
2489 field = TREE_CHAIN (field))
2491 if (TREE_CODE (field) != FIELD_DECL)
2494 if (!DECL_BIT_FIELD_TYPE (field))
2501 if (TREE_CODE (type) == UNION_TYPE)
2505 /* Unions can be returned in registers if every element is
2506 integral, or can be returned in an integer register. */
2507 for (field = TYPE_FIELDS (type);
2509 field = TREE_CHAIN (field))
2511 if (TREE_CODE (field) != FIELD_DECL)
2514 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2517 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2523 #endif /* not ARM_WINCE */
2525 /* Return all other types in memory. */
2529 /* Indicate whether or not words of a double are in big-endian order. */
2532 arm_float_words_big_endian (void)
2534 if (TARGET_MAVERICK)
2537 /* For FPA, float words are always big-endian. For VFP, floats words
2538 follow the memory system mode. */
2546 return (TARGET_BIG_END ? 1 : 0);
2551 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2552 for a call to a function whose data type is FNTYPE.
2553 For a library call, FNTYPE is NULL. */
2555 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2556 rtx libname ATTRIBUTE_UNUSED,
2557 tree fndecl ATTRIBUTE_UNUSED)
2559 /* On the ARM, the offset starts at 0. */
2560 pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype), fntype)) ? 1 : 0);
2561 pcum->iwmmxt_nregs = 0;
2562 pcum->can_split = true;
2564 pcum->call_cookie = CALL_NORMAL;
2566 if (TARGET_LONG_CALLS)
2567 pcum->call_cookie = CALL_LONG;
2569 /* Check for long call/short call attributes. The attributes
2570 override any command line option. */
2573 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2574 pcum->call_cookie = CALL_SHORT;
2575 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2576 pcum->call_cookie = CALL_LONG;
2579 /* Varargs vectors are treated the same as long long.
2580 named_count avoids having to change the way arm handles 'named' */
2581 pcum->named_count = 0;
2584 if (TARGET_REALLY_IWMMXT && fntype)
2588 for (fn_arg = TYPE_ARG_TYPES (fntype);
2590 fn_arg = TREE_CHAIN (fn_arg))
2591 pcum->named_count += 1;
2593 if (! pcum->named_count)
2594 pcum->named_count = INT_MAX;
2599 /* Return true if mode/type need doubleword alignment. */
2601 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2603 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2604 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2608 /* Determine where to put an argument to a function.
2609 Value is zero to push the argument on the stack,
2610 or a hard register in which to store the argument.
2612 MODE is the argument's machine mode.
2613 TYPE is the data type of the argument (as a tree).
2614 This is null for libcalls where that information may
2616 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2617 the preceding args and about the function being called.
2618 NAMED is nonzero if this argument is a named parameter
2619 (otherwise it is an extra parameter matching an ellipsis). */
2622 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2623 tree type, int named)
2627 /* Varargs vectors are treated the same as long long.
2628 named_count avoids having to change the way arm handles 'named' */
2629 if (TARGET_IWMMXT_ABI
2630 && arm_vector_mode_supported_p (mode)
2631 && pcum->named_count > pcum->nargs + 1)
2633 if (pcum->iwmmxt_nregs <= 9)
2634 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2637 pcum->can_split = false;
2642 /* Put doubleword aligned quantities in even register pairs. */
2644 && ARM_DOUBLEWORD_ALIGN
2645 && arm_needs_doubleword_align (mode, type))
2648 if (mode == VOIDmode)
2649 /* Compute operand 2 of the call insn. */
2650 return GEN_INT (pcum->call_cookie);
2652 /* Only allow splitting an arg between regs and memory if all preceding
2653 args were allocated to regs. For args passed by reference we only count
2654 the reference pointer. */
2655 if (pcum->can_split)
2658 nregs = ARM_NUM_REGS2 (mode, type);
2660 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2663 return gen_rtx_REG (mode, pcum->nregs);
2667 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2668 tree type, bool named ATTRIBUTE_UNUSED)
2670 int nregs = pcum->nregs;
2672 if (arm_vector_mode_supported_p (mode))
2675 if (NUM_ARG_REGS > nregs
2676 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2678 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2683 /* Variable sized types are passed by reference. This is a GCC
2684 extension to the ARM ABI. */
2687 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2688 enum machine_mode mode ATTRIBUTE_UNUSED,
2689 tree type, bool named ATTRIBUTE_UNUSED)
2691 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2694 /* Encode the current state of the #pragma [no_]long_calls. */
2697 OFF, /* No #pramgma [no_]long_calls is in effect. */
2698 LONG, /* #pragma long_calls is in effect. */
2699 SHORT /* #pragma no_long_calls is in effect. */
2702 static arm_pragma_enum arm_pragma_long_calls = OFF;
2705 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2707 arm_pragma_long_calls = LONG;
2711 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2713 arm_pragma_long_calls = SHORT;
2717 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2719 arm_pragma_long_calls = OFF;
2722 /* Table of machine attributes. */
2723 const struct attribute_spec arm_attribute_table[] =
2725 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2726 /* Function calls made to this symbol must be done indirectly, because
2727 it may lie outside of the 26 bit addressing range of a normal function
2729 { "long_call", 0, 0, false, true, true, NULL },
2730 /* Whereas these functions are always known to reside within the 26 bit
2731 addressing range. */
2732 { "short_call", 0, 0, false, true, true, NULL },
2733 /* Interrupt Service Routines have special prologue and epilogue requirements. */
2734 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
2735 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
2736 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2738 /* ARM/PE has three new attributes:
2740 dllexport - for exporting a function/variable that will live in a dll
2741 dllimport - for importing a function/variable from a dll
2743 Microsoft allows multiple declspecs in one __declspec, separating
2744 them with spaces. We do NOT support this. Instead, use __declspec
2747 { "dllimport", 0, 0, true, false, false, NULL },
2748 { "dllexport", 0, 0, true, false, false, NULL },
2749 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2750 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2751 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2752 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2753 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
2755 { NULL, 0, 0, false, false, false, NULL }
2758 /* Handle an attribute requiring a FUNCTION_DECL;
2759 arguments as in struct attribute_spec.handler. */
2761 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2762 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2764 if (TREE_CODE (*node) != FUNCTION_DECL)
2766 warning (0, "%qs attribute only applies to functions",
2767 IDENTIFIER_POINTER (name));
2768 *no_add_attrs = true;
2774 /* Handle an "interrupt" or "isr" attribute;
2775 arguments as in struct attribute_spec.handler. */
2777 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2782 if (TREE_CODE (*node) != FUNCTION_DECL)
2784 warning (0, "%qs attribute only applies to functions",
2785 IDENTIFIER_POINTER (name));
2786 *no_add_attrs = true;
2788 /* FIXME: the argument if any is checked for type attributes;
2789 should it be checked for decl ones? */
2793 if (TREE_CODE (*node) == FUNCTION_TYPE
2794 || TREE_CODE (*node) == METHOD_TYPE)
2796 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2798 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2799 *no_add_attrs = true;
2802 else if (TREE_CODE (*node) == POINTER_TYPE
2803 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2804 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2805 && arm_isr_value (args) != ARM_FT_UNKNOWN)
2807 *node = build_variant_type_copy (*node);
2808 TREE_TYPE (*node) = build_type_attribute_variant
2810 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2811 *no_add_attrs = true;
2815 /* Possibly pass this attribute on from the type to a decl. */
2816 if (flags & ((int) ATTR_FLAG_DECL_NEXT
2817 | (int) ATTR_FLAG_FUNCTION_NEXT
2818 | (int) ATTR_FLAG_ARRAY_NEXT))
2820 *no_add_attrs = true;
2821 return tree_cons (name, args, NULL_TREE);
2825 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2833 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2834 /* Handle the "notshared" attribute. This attribute is another way of
2835 requesting hidden visibility. ARM's compiler supports
2836 "__declspec(notshared)"; we support the same thing via an
2840 arm_handle_notshared_attribute (tree *node,
2841 tree name ATTRIBUTE_UNUSED,
2842 tree args ATTRIBUTE_UNUSED,
2843 int flags ATTRIBUTE_UNUSED,
2846 tree decl = TYPE_NAME (*node);
2850 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2851 DECL_VISIBILITY_SPECIFIED (decl) = 1;
2852 *no_add_attrs = false;
2858 /* Return 0 if the attributes for two types are incompatible, 1 if they
2859 are compatible, and 2 if they are nearly compatible (which causes a
2860 warning to be generated). */
2862 arm_comp_type_attributes (tree type1, tree type2)
2866 /* Check for mismatch of non-default calling convention. */
2867 if (TREE_CODE (type1) != FUNCTION_TYPE)
2870 /* Check for mismatched call attributes. */
2871 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2872 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2873 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2874 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2876 /* Only bother to check if an attribute is defined. */
2877 if (l1 | l2 | s1 | s2)
2879 /* If one type has an attribute, the other must have the same attribute. */
2880 if ((l1 != l2) || (s1 != s2))
2883 /* Disallow mixed attributes. */
2884 if ((l1 & s2) || (l2 & s1))
2888 /* Check for mismatched ISR attribute. */
2889 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
2891 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
2892 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
2894 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
2901 /* Encode long_call or short_call attribute by prefixing
2902 symbol name in DECL with a special character FLAG. */
2904 arm_encode_call_attribute (tree decl, int flag)
2906 const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2907 int len = strlen (str);
2910 /* Do not allow weak functions to be treated as short call. */
2911 if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
2914 newstr = alloca (len + 2);
2916 strcpy (newstr + 1, str);
2918 newstr = (char *) ggc_alloc_string (newstr, len + 1);
2919 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2922 /* Assigns default attributes to newly defined type. This is used to
2923 set short_call/long_call attributes for function types of
2924 functions defined inside corresponding #pragma scopes. */
2926 arm_set_default_type_attributes (tree type)
2928 /* Add __attribute__ ((long_call)) to all functions, when
2929 inside #pragma long_calls or __attribute__ ((short_call)),
2930 when inside #pragma no_long_calls. */
2931 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
2933 tree type_attr_list, attr_name;
2934 type_attr_list = TYPE_ATTRIBUTES (type);
2936 if (arm_pragma_long_calls == LONG)
2937 attr_name = get_identifier ("long_call");
2938 else if (arm_pragma_long_calls == SHORT)
2939 attr_name = get_identifier ("short_call");
2943 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
2944 TYPE_ATTRIBUTES (type) = type_attr_list;
2948 /* Return 1 if the operand is a SYMBOL_REF for a function known to be
2949 defined within the current compilation unit. If this cannot be
2950 determined, then 0 is returned. */
2952 current_file_function_operand (rtx sym_ref)
2954 /* This is a bit of a fib. A function will have a short call flag
2955 applied to its name if it has the short call attribute, or it has
2956 already been defined within the current compilation unit. */
2957 if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
2960 /* The current function is always defined within the current compilation
2961 unit. If it s a weak definition however, then this may not be the real
2962 definition of the function, and so we have to say no. */
2963 if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
2964 && !DECL_WEAK (current_function_decl))
2967 /* We cannot make the determination - default to returning 0. */
2971 /* Return nonzero if a 32 bit "long_call" should be generated for
2972 this call. We generate a long_call if the function:
2974 a. has an __attribute__((long call))
2975 or b. is within the scope of a #pragma long_calls
2976 or c. the -mlong-calls command line switch has been specified
2978 1. -ffunction-sections is in effect
2979 or 2. the current function has __attribute__ ((section))
2980 or 3. the target function has __attribute__ ((section))
2982 However we do not generate a long call if the function:
2984 d. has an __attribute__ ((short_call))
2985 or e. is inside the scope of a #pragma no_long_calls
2986 or f. is defined within the current compilation unit.
2988 This function will be called by C fragments contained in the machine
2989 description file. SYM_REF and CALL_COOKIE correspond to the matched
2990 rtl operands. CALL_SYMBOL is used to distinguish between
2991 two different callers of the function. It is set to 1 in the
2992 "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
2993 and "call_value" patterns. This is because of the difference in the
2994 SYM_REFs passed by these patterns. */
2996 arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3000 if (GET_CODE (sym_ref) != MEM)
3003 sym_ref = XEXP (sym_ref, 0);
3006 if (GET_CODE (sym_ref) != SYMBOL_REF)
3009 if (call_cookie & CALL_SHORT)
3012 if (TARGET_LONG_CALLS)
3014 if (flag_function_sections
3015 || DECL_SECTION_NAME (current_function_decl))
3016 /* c.3 is handled by the definition of the
3017 ARM_DECLARE_FUNCTION_SIZE macro. */
3021 if (current_file_function_operand (sym_ref))
3024 return (call_cookie & CALL_LONG)
3025 || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3026 || TARGET_LONG_CALLS;
3029 /* Return nonzero if it is ok to make a tail-call to DECL. */
3031 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3033 int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3035 if (cfun->machine->sibcall_blocked)
3038 /* Never tailcall something for which we have no decl, or if we
3039 are in Thumb mode. */
3040 if (decl == NULL || TARGET_THUMB)
3043 /* Get the calling method. */
3044 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3045 call_type = CALL_SHORT;
3046 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3047 call_type = CALL_LONG;
3049 /* Cannot tail-call to long calls, since these are out of range of
3050 a branch instruction. However, if not compiling PIC, we know
3051 we can reach the symbol if it is in this compilation unit. */
3052 if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3055 /* If we are interworking and the function is not declared static
3056 then we can't tail-call it unless we know that it exists in this
3057 compilation unit (since it might be a Thumb routine). */
3058 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3061 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3062 if (IS_INTERRUPT (arm_current_func_type ()))
3065 /* Everything else is ok. */
3070 /* Addressing mode support functions. */
3072 /* Return nonzero if X is a legitimate immediate operand when compiling
3075 legitimate_pic_operand_p (rtx x)
3079 && (GET_CODE (x) == SYMBOL_REF
3080 || (GET_CODE (x) == CONST
3081 && GET_CODE (XEXP (x, 0)) == PLUS
3082 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)))
3089 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3091 if (GET_CODE (orig) == SYMBOL_REF
3092 || GET_CODE (orig) == LABEL_REF)
3094 #ifndef AOF_ASSEMBLER
3095 rtx pic_ref, address;
3102 gcc_assert (!no_new_pseudos);
3103 reg = gen_reg_rtx (Pmode);
3108 #ifdef AOF_ASSEMBLER
3109 /* The AOF assembler can generate relocations for these directly, and
3110 understands that the PIC register has to be added into the offset. */
3111 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3114 address = gen_reg_rtx (Pmode);
3119 emit_insn (gen_pic_load_addr_arm (address, orig));
3121 emit_insn (gen_pic_load_addr_thumb (address, orig));
3123 if ((GET_CODE (orig) == LABEL_REF
3124 || (GET_CODE (orig) == SYMBOL_REF &&
3125 SYMBOL_REF_LOCAL_P (orig)))
3127 pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, address);
3130 pic_ref = gen_const_mem (Pmode,
3131 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3135 insn = emit_move_insn (reg, pic_ref);
3137 current_function_uses_pic_offset_table = 1;
3138 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3140 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3144 else if (GET_CODE (orig) == CONST)
3148 if (GET_CODE (XEXP (orig, 0)) == PLUS
3149 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3154 gcc_assert (!no_new_pseudos);
3155 reg = gen_reg_rtx (Pmode);
3158 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3160 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3161 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3162 base == reg ? 0 : reg);
3164 if (GET_CODE (offset) == CONST_INT)
3166 /* The base register doesn't really matter, we only want to
3167 test the index for the appropriate mode. */
3168 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3170 gcc_assert (!no_new_pseudos);
3171 offset = force_reg (Pmode, offset);
3174 if (GET_CODE (offset) == CONST_INT)
3175 return plus_constant (base, INTVAL (offset));
3178 if (GET_MODE_SIZE (mode) > 4
3179 && (GET_MODE_CLASS (mode) == MODE_INT
3180 || TARGET_SOFT_FLOAT))
3182 emit_insn (gen_addsi3 (reg, base, offset));
3186 return gen_rtx_PLUS (Pmode, base, offset);
3193 /* Find a spare low register to use during the prolog of a function. */
3196 thumb_find_work_register (unsigned long pushed_regs_mask)
3200 /* Check the argument registers first as these are call-used. The
3201 register allocation order means that sometimes r3 might be used
3202 but earlier argument registers might not, so check them all. */
3203 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3204 if (!regs_ever_live[reg])
3207 /* Before going on to check the call-saved registers we can try a couple
3208 more ways of deducing that r3 is available. The first is when we are
3209 pushing anonymous arguments onto the stack and we have less than 4
3210 registers worth of fixed arguments(*). In this case r3 will be part of
3211 the variable argument list and so we can be sure that it will be
3212 pushed right at the start of the function. Hence it will be available
3213 for the rest of the prologue.
3214 (*): ie current_function_pretend_args_size is greater than 0. */
3215 if (cfun->machine->uses_anonymous_args
3216 && current_function_pretend_args_size > 0)
3217 return LAST_ARG_REGNUM;
3219 /* The other case is when we have fixed arguments but less than 4 registers
3220 worth. In this case r3 might be used in the body of the function, but
3221 it is not being used to convey an argument into the function. In theory
3222 we could just check current_function_args_size to see how many bytes are
3223 being passed in argument registers, but it seems that it is unreliable.
3224 Sometimes it will have the value 0 when in fact arguments are being
3225 passed. (See testcase execute/20021111-1.c for an example). So we also
3226 check the args_info.nregs field as well. The problem with this field is
3227 that it makes no allowances for arguments that are passed to the
3228 function but which are not used. Hence we could miss an opportunity
3229 when a function has an unused argument in r3. But it is better to be
3230 safe than to be sorry. */
3231 if (! cfun->machine->uses_anonymous_args
3232 && current_function_args_size >= 0
3233 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3234 && cfun->args_info.nregs < 4)
3235 return LAST_ARG_REGNUM;
3237 /* Otherwise look for a call-saved register that is going to be pushed. */
3238 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3239 if (pushed_regs_mask & (1 << reg))
3242 /* Something went wrong - thumb_compute_save_reg_mask()
3243 should have arranged for a suitable register to be pushed. */
3248 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3252 arm_load_pic_register (unsigned int scratch)
3254 #ifndef AOF_ASSEMBLER
3255 rtx l1, pic_tmp, pic_tmp2, pic_rtx;
3256 rtx global_offset_table;
3258 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3261 gcc_assert (flag_pic);
3263 l1 = gen_label_rtx ();
3265 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3266 /* On the ARM the PC register contains 'dot + 8' at the time of the
3267 addition, on the Thumb it is 'dot + 4'. */
3268 pic_tmp = plus_constant (gen_rtx_LABEL_REF (Pmode, l1), TARGET_ARM ? 8 : 4);
3270 pic_tmp2 = gen_rtx_CONST (VOIDmode,
3271 gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3273 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3275 pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3279 emit_insn (gen_pic_load_addr_arm (pic_offset_table_rtx, pic_rtx));
3280 emit_insn (gen_pic_add_dot_plus_eight (pic_offset_table_rtx, l1));
3284 if (REGNO (pic_offset_table_rtx) > LAST_LO_REGNUM)
3286 /* We will have pushed the pic register, so should always be
3287 able to find a work register. */
3288 pic_tmp = gen_rtx_REG (SImode, scratch);
3289 emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3290 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3293 emit_insn (gen_pic_load_addr_thumb (pic_offset_table_rtx, pic_rtx));
3294 emit_insn (gen_pic_add_dot_plus_four (pic_offset_table_rtx, l1));
3297 /* Need to emit this whether or not we obey regdecls,
3298 since setjmp/longjmp can cause life info to screw up. */
3299 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3300 #endif /* AOF_ASSEMBLER */
3304 /* Return nonzero if X is valid as an ARM state addressing register. */
3306 arm_address_register_rtx_p (rtx x, int strict_p)
3310 if (GET_CODE (x) != REG)
3316 return ARM_REGNO_OK_FOR_BASE_P (regno);
3318 return (regno <= LAST_ARM_REGNUM
3319 || regno >= FIRST_PSEUDO_REGISTER
3320 || regno == FRAME_POINTER_REGNUM
3321 || regno == ARG_POINTER_REGNUM);
3324 /* Return nonzero if X is a valid ARM state address operand. */
3326 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3330 enum rtx_code code = GET_CODE (x);
3332 if (arm_address_register_rtx_p (x, strict_p))
3335 use_ldrd = (TARGET_LDRD
3337 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3339 if (code == POST_INC || code == PRE_DEC
3340 || ((code == PRE_INC || code == POST_DEC)
3341 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3342 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3344 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3345 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3346 && GET_CODE (XEXP (x, 1)) == PLUS
3347 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3349 rtx addend = XEXP (XEXP (x, 1), 1);
3351 /* Don't allow ldrd post increment by register because it's hard
3352 to fixup invalid register choices. */
3354 && GET_CODE (x) == POST_MODIFY
3355 && GET_CODE (addend) == REG)
3358 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3359 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3362 /* After reload constants split into minipools will have addresses
3363 from a LABEL_REF. */
3364 else if (reload_completed
3365 && (code == LABEL_REF
3367 && GET_CODE (XEXP (x, 0)) == PLUS
3368 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3369 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3372 else if (mode == TImode)
3375 else if (code == PLUS)
3377 rtx xop0 = XEXP (x, 0);
3378 rtx xop1 = XEXP (x, 1);
3380 return ((arm_address_register_rtx_p (xop0, strict_p)
3381 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3382 || (arm_address_register_rtx_p (xop1, strict_p)
3383 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3387 /* Reload currently can't handle MINUS, so disable this for now */
3388 else if (GET_CODE (x) == MINUS)
3390 rtx xop0 = XEXP (x, 0);
3391 rtx xop1 = XEXP (x, 1);
3393 return (arm_address_register_rtx_p (xop0, strict_p)
3394 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3398 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3399 && code == SYMBOL_REF
3400 && CONSTANT_POOL_ADDRESS_P (x)
3402 && symbol_mentioned_p (get_pool_constant (x))))
3408 /* Return nonzero if INDEX is valid for an address index operand in
3411 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3414 HOST_WIDE_INT range;
3415 enum rtx_code code = GET_CODE (index);
3417 /* Standard coprocessor addressing modes. */
3418 if (TARGET_HARD_FLOAT
3419 && (TARGET_FPA || TARGET_MAVERICK)
3420 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3421 || (TARGET_MAVERICK && mode == DImode)))
3422 return (code == CONST_INT && INTVAL (index) < 1024
3423 && INTVAL (index) > -1024
3424 && (INTVAL (index) & 3) == 0);
3426 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3427 return (code == CONST_INT
3428 && INTVAL (index) < 1024
3429 && INTVAL (index) > -1024
3430 && (INTVAL (index) & 3) == 0);
3432 if (arm_address_register_rtx_p (index, strict_p)
3433 && (GET_MODE_SIZE (mode) <= 4))
3436 if (mode == DImode || mode == DFmode)
3438 if (code == CONST_INT)
3440 HOST_WIDE_INT val = INTVAL (index);
3443 return val > -256 && val < 256;
3445 return val > -4096 && val < 4092;
3448 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3451 if (GET_MODE_SIZE (mode) <= 4
3454 || (mode == QImode && outer == SIGN_EXTEND))))
3458 rtx xiop0 = XEXP (index, 0);
3459 rtx xiop1 = XEXP (index, 1);
3461 return ((arm_address_register_rtx_p (xiop0, strict_p)
3462 && power_of_two_operand (xiop1, SImode))
3463 || (arm_address_register_rtx_p (xiop1, strict_p)
3464 && power_of_two_operand (xiop0, SImode)));
3466 else if (code == LSHIFTRT || code == ASHIFTRT
3467 || code == ASHIFT || code == ROTATERT)
3469 rtx op = XEXP (index, 1);
3471 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3472 && GET_CODE (op) == CONST_INT
3474 && INTVAL (op) <= 31);
3478 /* For ARM v4 we may be doing a sign-extend operation during the
3482 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3488 range = (mode == HImode) ? 4095 : 4096;
3490 return (code == CONST_INT
3491 && INTVAL (index) < range
3492 && INTVAL (index) > -range);
3495 /* Return nonzero if X is valid as a Thumb state base register. */
3497 thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3501 if (GET_CODE (x) != REG)
3507 return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3509 return (regno <= LAST_LO_REGNUM
3510 || regno > LAST_VIRTUAL_REGISTER
3511 || regno == FRAME_POINTER_REGNUM
3512 || (GET_MODE_SIZE (mode) >= 4
3513 && (regno == STACK_POINTER_REGNUM
3514 || regno >= FIRST_PSEUDO_REGISTER
3515 || x == hard_frame_pointer_rtx
3516 || x == arg_pointer_rtx)));
3519 /* Return nonzero if x is a legitimate index register. This is the case
3520 for any base register that can access a QImode object. */
3522 thumb_index_register_rtx_p (rtx x, int strict_p)
3524 return thumb_base_register_rtx_p (x, QImode, strict_p);
3527 /* Return nonzero if x is a legitimate Thumb-state address.
3529 The AP may be eliminated to either the SP or the FP, so we use the
3530 least common denominator, e.g. SImode, and offsets from 0 to 64.
3532 ??? Verify whether the above is the right approach.
3534 ??? Also, the FP may be eliminated to the SP, so perhaps that
3535 needs special handling also.
3537 ??? Look at how the mips16 port solves this problem. It probably uses
3538 better ways to solve some of these problems.
3540 Although it is not incorrect, we don't accept QImode and HImode
3541 addresses based on the frame pointer or arg pointer until the
3542 reload pass starts. This is so that eliminating such addresses
3543 into stack based ones won't produce impossible code. */
3545 thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3547 /* ??? Not clear if this is right. Experiment. */
3548 if (GET_MODE_SIZE (mode) < 4
3549 && !(reload_in_progress || reload_completed)
3550 && (reg_mentioned_p (frame_pointer_rtx, x)
3551 || reg_mentioned_p (arg_pointer_rtx, x)
3552 || reg_mentioned_p (virtual_incoming_args_rtx, x)
3553 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3554 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3555 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3558 /* Accept any base register. SP only in SImode or larger. */
3559 else if (thumb_base_register_rtx_p (x, mode, strict_p))
3562 /* This is PC relative data before arm_reorg runs. */
3563 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3564 && GET_CODE (x) == SYMBOL_REF
3565 && CONSTANT_POOL_ADDRESS_P (x) && ! flag_pic)
3568 /* This is PC relative data after arm_reorg runs. */
3569 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3570 && (GET_CODE (x) == LABEL_REF
3571 || (GET_CODE (x) == CONST
3572 && GET_CODE (XEXP (x, 0)) == PLUS
3573 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3574 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3577 /* Post-inc indexing only supported for SImode and larger. */
3578 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3579 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3582 else if (GET_CODE (x) == PLUS)
3584 /* REG+REG address can be any two index registers. */
3585 /* We disallow FRAME+REG addressing since we know that FRAME
3586 will be replaced with STACK, and SP relative addressing only
3587 permits SP+OFFSET. */
3588 if (GET_MODE_SIZE (mode) <= 4
3589 && XEXP (x, 0) != frame_pointer_rtx
3590 && XEXP (x, 1) != frame_pointer_rtx
3591 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3592 && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3595 /* REG+const has 5-7 bit offset for non-SP registers. */
3596 else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3597 || XEXP (x, 0) == arg_pointer_rtx)
3598 && GET_CODE (XEXP (x, 1)) == CONST_INT
3599 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3602 /* REG+const has 10 bit offset for SP, but only SImode and
3603 larger is supported. */
3604 /* ??? Should probably check for DI/DFmode overflow here
3605 just like GO_IF_LEGITIMATE_OFFSET does. */
3606 else if (GET_CODE (XEXP (x, 0)) == REG
3607 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3608 && GET_MODE_SIZE (mode) >= 4
3609 && GET_CODE (XEXP (x, 1)) == CONST_INT
3610 && INTVAL (XEXP (x, 1)) >= 0
3611 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3612 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3615 else if (GET_CODE (XEXP (x, 0)) == REG
3616 && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3617 && GET_MODE_SIZE (mode) >= 4
3618 && GET_CODE (XEXP (x, 1)) == CONST_INT
3619 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3623 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3624 && GET_MODE_SIZE (mode) == 4
3625 && GET_CODE (x) == SYMBOL_REF
3626 && CONSTANT_POOL_ADDRESS_P (x)
3628 && symbol_mentioned_p (get_pool_constant (x))))
3634 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
3635 instruction of mode MODE. */
3637 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3639 switch (GET_MODE_SIZE (mode))
3642 return val >= 0 && val < 32;
3645 return val >= 0 && val < 64 && (val & 1) == 0;
3649 && (val + GET_MODE_SIZE (mode)) <= 128
3654 /* Try machine-dependent ways of modifying an illegitimate address
3655 to be legitimate. If we find one, return the new, valid address. */
3657 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3659 if (GET_CODE (x) == PLUS)
3661 rtx xop0 = XEXP (x, 0);
3662 rtx xop1 = XEXP (x, 1);
3664 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
3665 xop0 = force_reg (SImode, xop0);
3667 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
3668 xop1 = force_reg (SImode, xop1);
3670 if (ARM_BASE_REGISTER_RTX_P (xop0)
3671 && GET_CODE (xop1) == CONST_INT)
3673 HOST_WIDE_INT n, low_n;
3677 /* VFP addressing modes actually allow greater offsets, but for
3678 now we just stick with the lowest common denominator. */
3680 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
3692 low_n = ((mode) == TImode ? 0
3693 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
3697 base_reg = gen_reg_rtx (SImode);
3698 val = force_operand (gen_rtx_PLUS (SImode, xop0,
3699 GEN_INT (n)), NULL_RTX);
3700 emit_move_insn (base_reg, val);
3701 x = (low_n == 0 ? base_reg
3702 : gen_rtx_PLUS (SImode, base_reg, GEN_INT (low_n)));
3704 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3705 x = gen_rtx_PLUS (SImode, xop0, xop1);
3708 /* XXX We don't allow MINUS any more -- see comment in
3709 arm_legitimate_address_p (). */
3710 else if (GET_CODE (x) == MINUS)
3712 rtx xop0 = XEXP (x, 0);
3713 rtx xop1 = XEXP (x, 1);
3715 if (CONSTANT_P (xop0))
3716 xop0 = force_reg (SImode, xop0);
3718 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
3719 xop1 = force_reg (SImode, xop1);
3721 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3722 x = gen_rtx_MINUS (SImode, xop0, xop1);
3727 /* We need to find and carefully transform any SYMBOL and LABEL
3728 references; so go back to the original address expression. */
3729 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3731 if (new_x != orig_x)
3739 /* Try machine-dependent ways of modifying an illegitimate Thumb address
3740 to be legitimate. If we find one, return the new, valid address. */
3742 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3744 if (GET_CODE (x) == PLUS
3745 && GET_CODE (XEXP (x, 1)) == CONST_INT
3746 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
3747 || INTVAL (XEXP (x, 1)) < 0))
3749 rtx xop0 = XEXP (x, 0);
3750 rtx xop1 = XEXP (x, 1);
3751 HOST_WIDE_INT offset = INTVAL (xop1);
3753 /* Try and fold the offset into a biasing of the base register and
3754 then offsetting that. Don't do this when optimizing for space
3755 since it can cause too many CSEs. */
3756 if (optimize_size && offset >= 0
3757 && offset < 256 + 31 * GET_MODE_SIZE (mode))
3759 HOST_WIDE_INT delta;
3762 delta = offset - (256 - GET_MODE_SIZE (mode));
3763 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
3764 delta = 31 * GET_MODE_SIZE (mode);
3766 delta = offset & (~31 * GET_MODE_SIZE (mode));
3768 xop0 = force_operand (plus_constant (xop0, offset - delta),
3770 x = plus_constant (xop0, delta);
3772 else if (offset < 0 && offset > -256)
3773 /* Small negative offsets are best done with a subtract before the
3774 dereference, forcing these into a register normally takes two
3776 x = force_operand (x, NULL_RTX);
3779 /* For the remaining cases, force the constant into a register. */
3780 xop1 = force_reg (SImode, xop1);
3781 x = gen_rtx_PLUS (SImode, xop0, xop1);
3784 else if (GET_CODE (x) == PLUS
3785 && s_register_operand (XEXP (x, 1), SImode)
3786 && !s_register_operand (XEXP (x, 0), SImode))
3788 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
3790 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
3795 /* We need to find and carefully transform any SYMBOL and LABEL
3796 references; so go back to the original address expression. */
3797 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3799 if (new_x != orig_x)
3808 #define REG_OR_SUBREG_REG(X) \
3809 (GET_CODE (X) == REG \
3810 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
3812 #define REG_OR_SUBREG_RTX(X) \
3813 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
3815 #ifndef COSTS_N_INSNS
3816 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
3819 thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
3821 enum machine_mode mode = GET_MODE (x);
3834 return COSTS_N_INSNS (1);
3837 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3840 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
3847 return COSTS_N_INSNS (2) + cycles;
3849 return COSTS_N_INSNS (1) + 16;
3852 return (COSTS_N_INSNS (1)
3853 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
3854 + GET_CODE (SET_DEST (x)) == MEM));
3859 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
3861 if (thumb_shiftable_const (INTVAL (x)))
3862 return COSTS_N_INSNS (2);
3863 return COSTS_N_INSNS (3);
3865 else if ((outer == PLUS || outer == COMPARE)
3866 && INTVAL (x) < 256 && INTVAL (x) > -256)
3868 else if (outer == AND
3869 && INTVAL (x) < 256 && INTVAL (x) >= -256)
3870 return COSTS_N_INSNS (1);
3871 else if (outer == ASHIFT || outer == ASHIFTRT
3872 || outer == LSHIFTRT)
3874 return COSTS_N_INSNS (2);
3880 return COSTS_N_INSNS (3);
3898 /* XXX another guess. */
3899 /* Memory costs quite a lot for the first word, but subsequent words
3900 load at the equivalent of a single insn each. */
3901 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3902 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
3907 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
3912 /* XXX still guessing. */
3913 switch (GET_MODE (XEXP (x, 0)))
3916 return (1 + (mode == DImode ? 4 : 0)
3917 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3920 return (4 + (mode == DImode ? 4 : 0)
3921 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3924 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3936 /* Worker routine for arm_rtx_costs. */
3938 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
3940 enum machine_mode mode = GET_MODE (x);
3941 enum rtx_code subcode;
3947 /* Memory costs quite a lot for the first word, but subsequent words
3948 load at the equivalent of a single insn each. */
3949 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3950 + (GET_CODE (x) == SYMBOL_REF
3951 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
3957 return optimize_size ? COSTS_N_INSNS (2) : 100;
3960 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
3967 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
3969 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
3970 + ((GET_CODE (XEXP (x, 0)) == REG
3971 || (GET_CODE (XEXP (x, 0)) == SUBREG
3972 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
3974 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
3975 || (GET_CODE (XEXP (x, 0)) == SUBREG
3976 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
3978 + ((GET_CODE (XEXP (x, 1)) == REG
3979 || (GET_CODE (XEXP (x, 1)) == SUBREG
3980 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
3981 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
3986 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
3987 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
3988 || (GET_CODE (XEXP (x, 0)) == CONST_INT
3989 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
3992 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3993 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
3994 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
3995 && arm_const_double_rtx (XEXP (x, 1))))
3997 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
3998 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
3999 && arm_const_double_rtx (XEXP (x, 0))))
4002 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4003 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4004 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4005 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4006 || subcode == ASHIFTRT || subcode == LSHIFTRT
4007 || subcode == ROTATE || subcode == ROTATERT
4009 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4010 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4011 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4012 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4013 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4014 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4015 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4020 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4021 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4022 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4023 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4024 && arm_const_double_rtx (XEXP (x, 1))))
4028 case AND: case XOR: case IOR:
4031 /* Normally the frame registers will be spilt into reg+const during
4032 reload, so it is a bad idea to combine them with other instructions,
4033 since then they might not be moved outside of loops. As a compromise
4034 we allow integration with ops that have a constant as their second
4036 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4037 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4038 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4039 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4040 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4044 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4045 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4046 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4047 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4050 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4051 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4052 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4053 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4054 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4057 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4058 return (1 + extra_cost
4059 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4060 || subcode == LSHIFTRT || subcode == ASHIFTRT
4061 || subcode == ROTATE || subcode == ROTATERT
4063 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4064 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4065 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4066 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4067 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4068 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4074 /* This should have been handled by the CPU specific routines. */
4078 if (arm_arch3m && mode == SImode
4079 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4080 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4081 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4082 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4083 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4084 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4089 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4090 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4094 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4096 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4099 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4107 return 4 + (mode == DImode ? 4 : 0);
4110 if (GET_MODE (XEXP (x, 0)) == QImode)
4111 return (4 + (mode == DImode ? 4 : 0)
4112 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4115 switch (GET_MODE (XEXP (x, 0)))
4118 return (1 + (mode == DImode ? 4 : 0)
4119 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4122 return (4 + (mode == DImode ? 4 : 0)
4123 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4126 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4141 if (const_ok_for_arm (INTVAL (x)))
4142 return outer == SET ? 2 : -1;
4143 else if (outer == AND
4144 && const_ok_for_arm (~INTVAL (x)))
4146 else if ((outer == COMPARE
4147 || outer == PLUS || outer == MINUS)
4148 && const_ok_for_arm (-INTVAL (x)))
4159 if (arm_const_double_rtx (x))
4160 return outer == SET ? 2 : -1;
4161 else if ((outer == COMPARE || outer == PLUS)
4162 && neg_const_double_rtx_ok_for_fpa (x))
4171 /* RTX costs when optimizing for size. */
4173 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4175 enum machine_mode mode = GET_MODE (x);
4179 /* XXX TBD. For now, use the standard costs. */
4180 *total = thumb_rtx_costs (x, code, outer_code);
4187 /* A memory access costs 1 insn if the mode is small, or the address is
4188 a single register, otherwise it costs one insn per word. */
4189 if (REG_P (XEXP (x, 0)))
4190 *total = COSTS_N_INSNS (1);
4192 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4199 /* Needs a libcall, so it costs about this. */
4200 *total = COSTS_N_INSNS (2);
4204 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4206 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4214 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4216 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4219 else if (mode == SImode)
4221 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4222 /* Slightly disparage register shifts, but not by much. */
4223 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4224 *total += 1 + rtx_cost (XEXP (x, 1), code);
4228 /* Needs a libcall. */
4229 *total = COSTS_N_INSNS (2);
4233 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4235 *total = COSTS_N_INSNS (1);
4241 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4242 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4244 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4245 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4246 || subcode1 == ROTATE || subcode1 == ROTATERT
4247 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4248 || subcode1 == ASHIFTRT)
4250 /* It's just the cost of the two operands. */
4255 *total = COSTS_N_INSNS (1);
4259 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4263 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4265 *total = COSTS_N_INSNS (1);
4270 case AND: case XOR: case IOR:
4273 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4275 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4276 || subcode == LSHIFTRT || subcode == ASHIFTRT
4277 || (code == AND && subcode == NOT))
4279 /* It's just the cost of the two operands. */
4285 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4289 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4293 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4294 *total = COSTS_N_INSNS (1);
4297 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4306 if (cc_register (XEXP (x, 0), VOIDmode))
4309 *total = COSTS_N_INSNS (1);
4313 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4314 *total = COSTS_N_INSNS (1);
4316 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4321 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4323 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4324 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4327 *total += COSTS_N_INSNS (1);
4332 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4334 switch (GET_MODE (XEXP (x, 0)))
4337 *total += COSTS_N_INSNS (1);
4341 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4347 *total += COSTS_N_INSNS (2);
4352 *total += COSTS_N_INSNS (1);
4357 if (const_ok_for_arm (INTVAL (x)))
4358 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4359 else if (const_ok_for_arm (~INTVAL (x)))
4360 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4361 else if (const_ok_for_arm (-INTVAL (x)))
4363 if (outer_code == COMPARE || outer_code == PLUS
4364 || outer_code == MINUS)
4367 *total = COSTS_N_INSNS (1);
4370 *total = COSTS_N_INSNS (2);
4376 *total = COSTS_N_INSNS (2);
4380 *total = COSTS_N_INSNS (4);
4384 if (mode != VOIDmode)
4385 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4387 *total = COSTS_N_INSNS (4); /* How knows? */
4392 /* RTX costs for cores with a slow MUL implementation. */
4395 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4397 enum machine_mode mode = GET_MODE (x);
4401 *total = thumb_rtx_costs (x, code, outer_code);
4408 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4415 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4417 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4418 & (unsigned HOST_WIDE_INT) 0xffffffff);
4419 int cost, const_ok = const_ok_for_arm (i);
4420 int j, booth_unit_size;
4422 /* Tune as appropriate. */
4423 cost = const_ok ? 4 : 8;
4424 booth_unit_size = 2;
4425 for (j = 0; i && j < 32; j += booth_unit_size)
4427 i >>= booth_unit_size;
4435 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4436 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4440 *total = arm_rtx_costs_1 (x, code, outer_code);
4446 /* RTX cost for cores with a fast multiply unit (M variants). */
4449 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4451 enum machine_mode mode = GET_MODE (x);
4455 *total = thumb_rtx_costs (x, code, outer_code);
4462 /* There is no point basing this on the tuning, since it is always the
4463 fast variant if it exists at all. */
4465 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4466 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4467 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4474 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4481 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4483 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4484 & (unsigned HOST_WIDE_INT) 0xffffffff);
4485 int cost, const_ok = const_ok_for_arm (i);
4486 int j, booth_unit_size;
4488 /* Tune as appropriate. */
4489 cost = const_ok ? 4 : 8;
4490 booth_unit_size = 8;
4491 for (j = 0; i && j < 32; j += booth_unit_size)
4493 i >>= booth_unit_size;
4501 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4502 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4506 *total = arm_rtx_costs_1 (x, code, outer_code);
4512 /* RTX cost for XScale CPUs. */
4515 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4517 enum machine_mode mode = GET_MODE (x);
4521 *total = thumb_rtx_costs (x, code, outer_code);
4528 /* There is no point basing this on the tuning, since it is always the
4529 fast variant if it exists at all. */
4531 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4532 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4533 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4540 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4547 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4549 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4550 & (unsigned HOST_WIDE_INT) 0xffffffff);
4551 int cost, const_ok = const_ok_for_arm (i);
4552 unsigned HOST_WIDE_INT masked_const;
4554 /* The cost will be related to two insns.
4555 First a load of the constant (MOV or LDR), then a multiply. */
4558 cost += 1; /* LDR is probably more expensive because
4559 of longer result latency. */
4560 masked_const = i & 0xffff8000;
4561 if (masked_const != 0 && masked_const != 0xffff8000)
4563 masked_const = i & 0xf8000000;
4564 if (masked_const == 0 || masked_const == 0xf8000000)
4573 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4574 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4578 /* A COMPARE of a MULT is slow on XScale; the muls instruction
4579 will stall until the multiplication is complete. */
4580 if (GET_CODE (XEXP (x, 0)) == MULT)
4581 *total = 4 + rtx_cost (XEXP (x, 0), code);
4583 *total = arm_rtx_costs_1 (x, code, outer_code);
4587 *total = arm_rtx_costs_1 (x, code, outer_code);
4593 /* RTX costs for 9e (and later) cores. */
4596 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
4598 enum machine_mode mode = GET_MODE (x);
4607 *total = COSTS_N_INSNS (3);
4611 *total = thumb_rtx_costs (x, code, outer_code);
4619 /* There is no point basing this on the tuning, since it is always the
4620 fast variant if it exists at all. */
4622 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4623 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4624 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4631 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4648 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
4649 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
4653 *total = arm_rtx_costs_1 (x, code, outer_code);
4657 /* All address computations that can be done are free, but rtx cost returns
4658 the same for practically all of them. So we weight the different types
4659 of address here in the order (most pref first):
4660 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
4662 arm_arm_address_cost (rtx x)
4664 enum rtx_code c = GET_CODE (x);
4666 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
4668 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4671 if (c == PLUS || c == MINUS)
4673 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4676 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
4686 arm_thumb_address_cost (rtx x)
4688 enum rtx_code c = GET_CODE (x);
4693 && GET_CODE (XEXP (x, 0)) == REG
4694 && GET_CODE (XEXP (x, 1)) == CONST_INT)
4701 arm_address_cost (rtx x)
4703 return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
4707 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
4711 /* Some true dependencies can have a higher cost depending
4712 on precisely how certain input operands are used. */
4714 && REG_NOTE_KIND (link) == 0
4715 && recog_memoized (insn) >= 0
4716 && recog_memoized (dep) >= 0)
4718 int shift_opnum = get_attr_shift (insn);
4719 enum attr_type attr_type = get_attr_type (dep);
4721 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
4722 operand for INSN. If we have a shifted input operand and the
4723 instruction we depend on is another ALU instruction, then we may
4724 have to account for an additional stall. */
4725 if (shift_opnum != 0
4726 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
4728 rtx shifted_operand;
4731 /* Get the shifted operand. */
4732 extract_insn (insn);
4733 shifted_operand = recog_data.operand[shift_opnum];
4735 /* Iterate over all the operands in DEP. If we write an operand
4736 that overlaps with SHIFTED_OPERAND, then we have increase the
4737 cost of this dependency. */
4739 preprocess_constraints ();
4740 for (opno = 0; opno < recog_data.n_operands; opno++)
4742 /* We can ignore strict inputs. */
4743 if (recog_data.operand_type[opno] == OP_IN)
4746 if (reg_overlap_mentioned_p (recog_data.operand[opno],
4753 /* XXX This is not strictly true for the FPA. */
4754 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
4755 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4758 /* Call insns don't incur a stall, even if they follow a load. */
4759 if (REG_NOTE_KIND (link) == 0
4760 && GET_CODE (insn) == CALL_INSN)
4763 if ((i_pat = single_set (insn)) != NULL
4764 && GET_CODE (SET_SRC (i_pat)) == MEM
4765 && (d_pat = single_set (dep)) != NULL
4766 && GET_CODE (SET_DEST (d_pat)) == MEM)
4768 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
4769 /* This is a load after a store, there is no conflict if the load reads
4770 from a cached area. Assume that loads from the stack, and from the
4771 constant pool are cached, and that others will miss. This is a
4774 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
4775 || reg_mentioned_p (stack_pointer_rtx, src_mem)
4776 || reg_mentioned_p (frame_pointer_rtx, src_mem)
4777 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
4784 static int fp_consts_inited = 0;
4786 /* Only zero is valid for VFP. Other values are also valid for FPA. */
4787 static const char * const strings_fp[8] =
4790 "4", "5", "0.5", "10"
4793 static REAL_VALUE_TYPE values_fp[8];
4796 init_fp_table (void)
4802 fp_consts_inited = 1;
4804 fp_consts_inited = 8;
4806 for (i = 0; i < fp_consts_inited; i++)
4808 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
4813 /* Return TRUE if rtx X is a valid immediate FP constant. */
4815 arm_const_double_rtx (rtx x)
4820 if (!fp_consts_inited)
4823 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4824 if (REAL_VALUE_MINUS_ZERO (r))
4827 for (i = 0; i < fp_consts_inited; i++)
4828 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4834 /* Return TRUE if rtx X is a valid immediate FPA constant. */
4836 neg_const_double_rtx_ok_for_fpa (rtx x)
4841 if (!fp_consts_inited)
4844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4845 r = REAL_VALUE_NEGATE (r);
4846 if (REAL_VALUE_MINUS_ZERO (r))
4849 for (i = 0; i < 8; i++)
4850 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4856 /* Predicates for `match_operand' and `match_operator'. */
4858 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
4860 cirrus_memory_offset (rtx op)
4862 /* Reject eliminable registers. */
4863 if (! (reload_in_progress || reload_completed)
4864 && ( reg_mentioned_p (frame_pointer_rtx, op)
4865 || reg_mentioned_p (arg_pointer_rtx, op)
4866 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4867 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4868 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4869 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4872 if (GET_CODE (op) == MEM)
4878 /* Match: (mem (reg)). */
4879 if (GET_CODE (ind) == REG)
4885 if (GET_CODE (ind) == PLUS
4886 && GET_CODE (XEXP (ind, 0)) == REG
4887 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4888 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
4895 /* Return TRUE if OP is a valid VFP memory address pattern.
4896 WB if true if writeback address modes are allowed. */
4899 arm_coproc_mem_operand (rtx op, bool wb)
4903 /* Reject eliminable registers. */
4904 if (! (reload_in_progress || reload_completed)
4905 && ( reg_mentioned_p (frame_pointer_rtx, op)
4906 || reg_mentioned_p (arg_pointer_rtx, op)
4907 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4908 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4909 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4910 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4913 /* Constants are converted into offsets from labels. */
4914 if (GET_CODE (op) != MEM)
4919 if (reload_completed
4920 && (GET_CODE (ind) == LABEL_REF
4921 || (GET_CODE (ind) == CONST
4922 && GET_CODE (XEXP (ind, 0)) == PLUS
4923 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
4924 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
4927 /* Match: (mem (reg)). */
4928 if (GET_CODE (ind) == REG)
4929 return arm_address_register_rtx_p (ind, 0);
4931 /* Autoincremment addressing modes. */
4933 && (GET_CODE (ind) == PRE_INC
4934 || GET_CODE (ind) == POST_INC
4935 || GET_CODE (ind) == PRE_DEC
4936 || GET_CODE (ind) == POST_DEC))
4937 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
4940 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
4941 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
4942 && GET_CODE (XEXP (ind, 1)) == PLUS
4943 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
4944 ind = XEXP (ind, 1);
4949 if (GET_CODE (ind) == PLUS
4950 && GET_CODE (XEXP (ind, 0)) == REG
4951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4952 && GET_CODE (XEXP (ind, 1)) == CONST_INT
4953 && INTVAL (XEXP (ind, 1)) > -1024
4954 && INTVAL (XEXP (ind, 1)) < 1024
4955 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
4961 /* Return true if X is a register that will be eliminated later on. */
4963 arm_eliminable_register (rtx x)
4965 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
4966 || REGNO (x) == ARG_POINTER_REGNUM
4967 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
4968 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
4971 /* Return GENERAL_REGS if a scratch register required to reload x to/from
4972 VFP registers. Otherwise return NO_REGS. */
4975 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
4977 if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
4980 return GENERAL_REGS;
4983 /* Values which must be returned in the most-significant end of the return
4987 arm_return_in_msb (tree valtype)
4989 return (TARGET_AAPCS_BASED
4991 && (AGGREGATE_TYPE_P (valtype)
4992 || TREE_CODE (valtype) == COMPLEX_TYPE));
4995 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
4996 Use by the Cirrus Maverick code which has to workaround
4997 a hardware bug triggered by such instructions. */
4999 arm_memory_load_p (rtx insn)
5001 rtx body, lhs, rhs;;
5003 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5006 body = PATTERN (insn);
5008 if (GET_CODE (body) != SET)
5011 lhs = XEXP (body, 0);
5012 rhs = XEXP (body, 1);
5014 lhs = REG_OR_SUBREG_RTX (lhs);
5016 /* If the destination is not a general purpose
5017 register we do not have to worry. */
5018 if (GET_CODE (lhs) != REG
5019 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5022 /* As well as loads from memory we also have to react
5023 to loads of invalid constants which will be turned
5024 into loads from the minipool. */
5025 return (GET_CODE (rhs) == MEM
5026 || GET_CODE (rhs) == SYMBOL_REF
5027 || note_invalid_constants (insn, -1, false));
5030 /* Return TRUE if INSN is a Cirrus instruction. */
5032 arm_cirrus_insn_p (rtx insn)
5034 enum attr_cirrus attr;
5036 /* get_attr cannot accept USE or CLOBBER. */
5038 || GET_CODE (insn) != INSN
5039 || GET_CODE (PATTERN (insn)) == USE
5040 || GET_CODE (PATTERN (insn)) == CLOBBER)
5043 attr = get_attr_cirrus (insn);
5045 return attr != CIRRUS_NOT;
5048 /* Cirrus reorg for invalid instruction combinations. */
5050 cirrus_reorg (rtx first)
5052 enum attr_cirrus attr;
5053 rtx body = PATTERN (first);
5057 /* Any branch must be followed by 2 non Cirrus instructions. */
5058 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5061 t = next_nonnote_insn (first);
5063 if (arm_cirrus_insn_p (t))
5066 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5070 emit_insn_after (gen_nop (), first);
5075 /* (float (blah)) is in parallel with a clobber. */
5076 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5077 body = XVECEXP (body, 0, 0);
5079 if (GET_CODE (body) == SET)
5081 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5083 /* cfldrd, cfldr64, cfstrd, cfstr64 must
5084 be followed by a non Cirrus insn. */
5085 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5087 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5088 emit_insn_after (gen_nop (), first);
5092 else if (arm_memory_load_p (first))
5094 unsigned int arm_regno;
5096 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5097 ldr/cfmv64hr combination where the Rd field is the same
5098 in both instructions must be split with a non Cirrus
5105 /* Get Arm register number for ldr insn. */
5106 if (GET_CODE (lhs) == REG)
5107 arm_regno = REGNO (lhs);
5110 gcc_assert (GET_CODE (rhs) == REG);
5111 arm_regno = REGNO (rhs);
5115 first = next_nonnote_insn (first);
5117 if (! arm_cirrus_insn_p (first))
5120 body = PATTERN (first);
5122 /* (float (blah)) is in parallel with a clobber. */
5123 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5124 body = XVECEXP (body, 0, 0);
5126 if (GET_CODE (body) == FLOAT)
5127 body = XEXP (body, 0);
5129 if (get_attr_cirrus (first) == CIRRUS_MOVE
5130 && GET_CODE (XEXP (body, 1)) == REG
5131 && arm_regno == REGNO (XEXP (body, 1)))
5132 emit_insn_after (gen_nop (), first);
5138 /* get_attr cannot accept USE or CLOBBER. */
5140 || GET_CODE (first) != INSN
5141 || GET_CODE (PATTERN (first)) == USE
5142 || GET_CODE (PATTERN (first)) == CLOBBER)
5145 attr = get_attr_cirrus (first);
5147 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5148 must be followed by a non-coprocessor instruction. */
5149 if (attr == CIRRUS_COMPARE)
5153 t = next_nonnote_insn (first);
5155 if (arm_cirrus_insn_p (t))
5158 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5162 emit_insn_after (gen_nop (), first);
5168 /* Return TRUE if X references a SYMBOL_REF. */
5170 symbol_mentioned_p (rtx x)
5175 if (GET_CODE (x) == SYMBOL_REF)
5178 fmt = GET_RTX_FORMAT (GET_CODE (x));
5180 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5186 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5187 if (symbol_mentioned_p (XVECEXP (x, i, j)))
5190 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5197 /* Return TRUE if X references a LABEL_REF. */
5199 label_mentioned_p (rtx x)
5204 if (GET_CODE (x) == LABEL_REF)
5207 fmt = GET_RTX_FORMAT (GET_CODE (x));
5208 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5214 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5215 if (label_mentioned_p (XVECEXP (x, i, j)))
5218 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5228 enum rtx_code code = GET_CODE (x);
5245 /* Return 1 if memory locations are adjacent. */
5247 adjacent_mem_locations (rtx a, rtx b)
5249 /* We don't guarantee to preserve the order of these memory refs. */
5250 if (volatile_refs_p (a) || volatile_refs_p (b))
5253 if ((GET_CODE (XEXP (a, 0)) == REG
5254 || (GET_CODE (XEXP (a, 0)) == PLUS
5255 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5256 && (GET_CODE (XEXP (b, 0)) == REG
5257 || (GET_CODE (XEXP (b, 0)) == PLUS
5258 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5260 HOST_WIDE_INT val0 = 0, val1 = 0;
5264 if (GET_CODE (XEXP (a, 0)) == PLUS)
5266 reg0 = XEXP (XEXP (a, 0), 0);
5267 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5272 if (GET_CODE (XEXP (b, 0)) == PLUS)
5274 reg1 = XEXP (XEXP (b, 0), 0);
5275 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5280 /* Don't accept any offset that will require multiple
5281 instructions to handle, since this would cause the
5282 arith_adjacentmem pattern to output an overlong sequence. */
5283 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5286 /* Don't allow an eliminable register: register elimination can make
5287 the offset too large. */
5288 if (arm_eliminable_register (reg0))
5291 val_diff = val1 - val0;
5295 /* If the target has load delay slots, then there's no benefit
5296 to using an ldm instruction unless the offset is zero and
5297 we are optimizing for size. */
5298 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5299 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5300 && (val_diff == 4 || val_diff == -4));
5303 return ((REGNO (reg0) == REGNO (reg1))
5304 && (val_diff == 4 || val_diff == -4));
5311 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5312 HOST_WIDE_INT *load_offset)
5314 int unsorted_regs[4];
5315 HOST_WIDE_INT unsorted_offsets[4];
5320 /* Can only handle 2, 3, or 4 insns at present,
5321 though could be easily extended if required. */
5322 gcc_assert (nops >= 2 && nops <= 4);
5324 /* Loop over the operands and check that the memory references are
5325 suitable (i.e. immediate offsets from the same base register). At
5326 the same time, extract the target register, and the memory
5328 for (i = 0; i < nops; i++)
5333 /* Convert a subreg of a mem into the mem itself. */
5334 if (GET_CODE (operands[nops + i]) == SUBREG)
5335 operands[nops + i] = alter_subreg (operands + (nops + i));
5337 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5339 /* Don't reorder volatile memory references; it doesn't seem worth
5340 looking for the case where the order is ok anyway. */
5341 if (MEM_VOLATILE_P (operands[nops + i]))
5344 offset = const0_rtx;
5346 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5347 || (GET_CODE (reg) == SUBREG
5348 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5349 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5350 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5352 || (GET_CODE (reg) == SUBREG
5353 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5354 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5359 base_reg = REGNO (reg);
5360 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5361 ? REGNO (operands[i])
5362 : REGNO (SUBREG_REG (operands[i])));
5367 if (base_reg != (int) REGNO (reg))
5368 /* Not addressed from the same base register. */
5371 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5372 ? REGNO (operands[i])
5373 : REGNO (SUBREG_REG (operands[i])));
5374 if (unsorted_regs[i] < unsorted_regs[order[0]])
5378 /* If it isn't an integer register, or if it overwrites the
5379 base register but isn't the last insn in the list, then
5380 we can't do this. */
5381 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5382 || (i != nops - 1 && unsorted_regs[i] == base_reg))
5385 unsorted_offsets[i] = INTVAL (offset);
5388 /* Not a suitable memory address. */
5392 /* All the useful information has now been extracted from the
5393 operands into unsorted_regs and unsorted_offsets; additionally,
5394 order[0] has been set to the lowest numbered register in the
5395 list. Sort the registers into order, and check that the memory
5396 offsets are ascending and adjacent. */
5398 for (i = 1; i < nops; i++)
5402 order[i] = order[i - 1];
5403 for (j = 0; j < nops; j++)
5404 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5405 && (order[i] == order[i - 1]
5406 || unsorted_regs[j] < unsorted_regs[order[i]]))
5409 /* Have we found a suitable register? if not, one must be used more
5411 if (order[i] == order[i - 1])
5414 /* Is the memory address adjacent and ascending? */
5415 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5423 for (i = 0; i < nops; i++)
5424 regs[i] = unsorted_regs[order[i]];
5426 *load_offset = unsorted_offsets[order[0]];
5429 if (unsorted_offsets[order[0]] == 0)
5430 return 1; /* ldmia */
5432 if (unsorted_offsets[order[0]] == 4)
5433 return 2; /* ldmib */
5435 if (unsorted_offsets[order[nops - 1]] == 0)
5436 return 3; /* ldmda */
5438 if (unsorted_offsets[order[nops - 1]] == -4)
5439 return 4; /* ldmdb */
5441 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5442 if the offset isn't small enough. The reason 2 ldrs are faster
5443 is because these ARMs are able to do more than one cache access
5444 in a single cycle. The ARM9 and StrongARM have Harvard caches,
5445 whilst the ARM8 has a double bandwidth cache. This means that
5446 these cores can do both an instruction fetch and a data fetch in
5447 a single cycle, so the trick of calculating the address into a
5448 scratch register (one of the result regs) and then doing a load
5449 multiple actually becomes slower (and no smaller in code size).
5450 That is the transformation
5452 ldr rd1, [rbase + offset]
5453 ldr rd2, [rbase + offset + 4]
5457 add rd1, rbase, offset
5458 ldmia rd1, {rd1, rd2}
5460 produces worse code -- '3 cycles + any stalls on rd2' instead of
5461 '2 cycles + any stalls on rd2'. On ARMs with only one cache
5462 access per cycle, the first sequence could never complete in less
5463 than 6 cycles, whereas the ldm sequence would only take 5 and
5464 would make better use of sequential accesses if not hitting the
5467 We cheat here and test 'arm_ld_sched' which we currently know to
5468 only be true for the ARM8, ARM9 and StrongARM. If this ever
5469 changes, then the test below needs to be reworked. */
5470 if (nops == 2 && arm_ld_sched)
5473 /* Can't do it without setting up the offset, only do this if it takes
5474 no more than one insn. */
5475 return (const_ok_for_arm (unsorted_offsets[order[0]])
5476 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
5480 emit_ldm_seq (rtx *operands, int nops)
5484 HOST_WIDE_INT offset;
5488 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5491 strcpy (buf, "ldm%?ia\t");
5495 strcpy (buf, "ldm%?ib\t");
5499 strcpy (buf, "ldm%?da\t");
5503 strcpy (buf, "ldm%?db\t");
5508 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5509 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5512 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5513 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5515 output_asm_insn (buf, operands);
5517 strcpy (buf, "ldm%?ia\t");
5524 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5525 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5527 for (i = 1; i < nops; i++)
5528 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5529 reg_names[regs[i]]);
5531 strcat (buf, "}\t%@ phole ldm");
5533 output_asm_insn (buf, operands);
5538 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5539 HOST_WIDE_INT * load_offset)
5541 int unsorted_regs[4];
5542 HOST_WIDE_INT unsorted_offsets[4];
5547 /* Can only handle 2, 3, or 4 insns at present, though could be easily
5548 extended if required. */
5549 gcc_assert (nops >= 2 && nops <= 4);
5551 /* Loop over the operands and check that the memory references are
5552 suitable (i.e. immediate offsets from the same base register). At
5553 the same time, extract the target register, and the memory
5555 for (i = 0; i < nops; i++)
5560 /* Convert a subreg of a mem into the mem itself. */
5561 if (GET_CODE (operands[nops + i]) == SUBREG)
5562 operands[nops + i] = alter_subreg (operands + (nops + i));
5564 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5566 /* Don't reorder volatile memory references; it doesn't seem worth
5567 looking for the case where the order is ok anyway. */
5568 if (MEM_VOLATILE_P (operands[nops + i]))
5571 offset = const0_rtx;
5573 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5574 || (GET_CODE (reg) == SUBREG
5575 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5576 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5577 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5579 || (GET_CODE (reg) == SUBREG
5580 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5581 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5586 base_reg = REGNO (reg);
5587 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5588 ? REGNO (operands[i])
5589 : REGNO (SUBREG_REG (operands[i])));
5594 if (base_reg != (int) REGNO (reg))
5595 /* Not addressed from the same base register. */
5598 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5599 ? REGNO (operands[i])
5600 : REGNO (SUBREG_REG (operands[i])));
5601 if (unsorted_regs[i] < unsorted_regs[order[0]])
5605 /* If it isn't an integer register, then we can't do this. */
5606 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
5609 unsorted_offsets[i] = INTVAL (offset);
5612 /* Not a suitable memory address. */
5616 /* All the useful information has now been extracted from the
5617 operands into unsorted_regs and unsorted_offsets; additionally,
5618 order[0] has been set to the lowest numbered register in the
5619 list. Sort the registers into order, and check that the memory
5620 offsets are ascending and adjacent. */
5622 for (i = 1; i < nops; i++)
5626 order[i] = order[i - 1];
5627 for (j = 0; j < nops; j++)
5628 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5629 && (order[i] == order[i - 1]
5630 || unsorted_regs[j] < unsorted_regs[order[i]]))
5633 /* Have we found a suitable register? if not, one must be used more
5635 if (order[i] == order[i - 1])
5638 /* Is the memory address adjacent and ascending? */
5639 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5647 for (i = 0; i < nops; i++)
5648 regs[i] = unsorted_regs[order[i]];
5650 *load_offset = unsorted_offsets[order[0]];
5653 if (unsorted_offsets[order[0]] == 0)
5654 return 1; /* stmia */
5656 if (unsorted_offsets[order[0]] == 4)
5657 return 2; /* stmib */
5659 if (unsorted_offsets[order[nops - 1]] == 0)
5660 return 3; /* stmda */
5662 if (unsorted_offsets[order[nops - 1]] == -4)
5663 return 4; /* stmdb */
5669 emit_stm_seq (rtx *operands, int nops)
5673 HOST_WIDE_INT offset;
5677 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5680 strcpy (buf, "stm%?ia\t");
5684 strcpy (buf, "stm%?ib\t");
5688 strcpy (buf, "stm%?da\t");
5692 strcpy (buf, "stm%?db\t");
5699 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5700 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5702 for (i = 1; i < nops; i++)
5703 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5704 reg_names[regs[i]]);
5706 strcat (buf, "}\t%@ phole stm");
5708 output_asm_insn (buf, operands);
5713 /* Routines for use in generating RTL. */
5716 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
5717 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5719 HOST_WIDE_INT offset = *offsetp;
5722 int sign = up ? 1 : -1;
5725 /* XScale has load-store double instructions, but they have stricter
5726 alignment requirements than load-store multiple, so we cannot
5729 For XScale ldm requires 2 + NREGS cycles to complete and blocks
5730 the pipeline until completion.
5738 An ldr instruction takes 1-3 cycles, but does not block the
5747 Best case ldr will always win. However, the more ldr instructions
5748 we issue, the less likely we are to be able to schedule them well.
5749 Using ldr instructions also increases code size.
5751 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
5752 for counts of 3 or 4 regs. */
5753 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5759 for (i = 0; i < count; i++)
5761 addr = plus_constant (from, i * 4 * sign);
5762 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5763 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
5769 emit_move_insn (from, plus_constant (from, count * 4 * sign));
5779 result = gen_rtx_PARALLEL (VOIDmode,
5780 rtvec_alloc (count + (write_back ? 1 : 0)));
5783 XVECEXP (result, 0, 0)
5784 = gen_rtx_SET (GET_MODE (from), from,
5785 plus_constant (from, count * 4 * sign));
5790 for (j = 0; i < count; i++, j++)
5792 addr = plus_constant (from, j * 4 * sign);
5793 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5794 XVECEXP (result, 0, i)
5795 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
5806 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
5807 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5809 HOST_WIDE_INT offset = *offsetp;
5812 int sign = up ? 1 : -1;
5815 /* See arm_gen_load_multiple for discussion of
5816 the pros/cons of ldm/stm usage for XScale. */
5817 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5823 for (i = 0; i < count; i++)
5825 addr = plus_constant (to, i * 4 * sign);
5826 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5827 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
5833 emit_move_insn (to, plus_constant (to, count * 4 * sign));
5843 result = gen_rtx_PARALLEL (VOIDmode,
5844 rtvec_alloc (count + (write_back ? 1 : 0)));
5847 XVECEXP (result, 0, 0)
5848 = gen_rtx_SET (GET_MODE (to), to,
5849 plus_constant (to, count * 4 * sign));
5854 for (j = 0; i < count; i++, j++)
5856 addr = plus_constant (to, j * 4 * sign);
5857 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5858 XVECEXP (result, 0, i)
5859 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
5870 arm_gen_movmemqi (rtx *operands)
5872 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
5873 HOST_WIDE_INT srcoffset, dstoffset;
5875 rtx src, dst, srcbase, dstbase;
5876 rtx part_bytes_reg = NULL;
5879 if (GET_CODE (operands[2]) != CONST_INT
5880 || GET_CODE (operands[3]) != CONST_INT
5881 || INTVAL (operands[2]) > 64
5882 || INTVAL (operands[3]) & 3)
5885 dstbase = operands[0];
5886 srcbase = operands[1];
5888 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
5889 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
5891 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
5892 out_words_to_go = INTVAL (operands[2]) / 4;
5893 last_bytes = INTVAL (operands[2]) & 3;
5894 dstoffset = srcoffset = 0;
5896 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
5897 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
5899 for (i = 0; in_words_to_go >= 2; i+=4)
5901 if (in_words_to_go > 4)
5902 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
5903 srcbase, &srcoffset));
5905 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
5906 FALSE, srcbase, &srcoffset));
5908 if (out_words_to_go)
5910 if (out_words_to_go > 4)
5911 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
5912 dstbase, &dstoffset));
5913 else if (out_words_to_go != 1)
5914 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
5918 dstbase, &dstoffset));
5921 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5922 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
5923 if (last_bytes != 0)
5925 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
5931 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
5932 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
5935 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
5936 if (out_words_to_go)
5940 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5941 sreg = copy_to_reg (mem);
5943 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5944 emit_move_insn (mem, sreg);
5947 gcc_assert (!in_words_to_go); /* Sanity check */
5952 gcc_assert (in_words_to_go > 0);
5954 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5955 part_bytes_reg = copy_to_mode_reg (SImode, mem);
5958 gcc_assert (!last_bytes || part_bytes_reg);
5960 if (BYTES_BIG_ENDIAN && last_bytes)
5962 rtx tmp = gen_reg_rtx (SImode);
5964 /* The bytes we want are in the top end of the word. */
5965 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
5966 GEN_INT (8 * (4 - last_bytes))));
5967 part_bytes_reg = tmp;
5971 mem = adjust_automodify_address (dstbase, QImode,
5972 plus_constant (dst, last_bytes - 1),
5973 dstoffset + last_bytes - 1);
5974 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
5978 tmp = gen_reg_rtx (SImode);
5979 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
5980 part_bytes_reg = tmp;
5989 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
5990 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
5994 rtx tmp = gen_reg_rtx (SImode);
5995 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
5996 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
5997 part_bytes_reg = tmp;
6004 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6005 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6012 /* Generate a memory reference for a half word, such that it will be loaded
6013 into the top 16 bits of the word. We can assume that the address is
6014 known to be alignable and of the form reg, or plus (reg, const). */
6017 arm_gen_rotated_half_load (rtx memref)
6019 HOST_WIDE_INT offset = 0;
6020 rtx base = XEXP (memref, 0);
6022 if (GET_CODE (base) == PLUS)
6024 offset = INTVAL (XEXP (base, 1));
6025 base = XEXP (base, 0);
6028 /* If we aren't allowed to generate unaligned addresses, then fail. */
6029 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 0))
6032 base = gen_rtx_MEM (SImode, plus_constant (base, offset & ~2));
6034 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 2))
6037 return gen_rtx_ROTATE (SImode, base, GEN_INT (16));
6040 /* Select a dominance comparison mode if possible for a test of the general
6041 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
6042 COND_OR == DOM_CC_X_AND_Y => (X && Y)
6043 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6044 COND_OR == DOM_CC_X_OR_Y => (X || Y)
6045 In all cases OP will be either EQ or NE, but we don't need to know which
6046 here. If we are unable to support a dominance comparison we return
6047 CC mode. This will then fail to match for the RTL expressions that
6048 generate this call. */
6050 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6052 enum rtx_code cond1, cond2;
6055 /* Currently we will probably get the wrong result if the individual
6056 comparisons are not simple. This also ensures that it is safe to
6057 reverse a comparison if necessary. */
6058 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6060 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6064 /* The if_then_else variant of this tests the second condition if the
6065 first passes, but is true if the first fails. Reverse the first
6066 condition to get a true "inclusive-or" expression. */
6067 if (cond_or == DOM_CC_NX_OR_Y)
6068 cond1 = reverse_condition (cond1);
6070 /* If the comparisons are not equal, and one doesn't dominate the other,
6071 then we can't do this. */
6073 && !comparison_dominates_p (cond1, cond2)
6074 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6079 enum rtx_code temp = cond1;
6087 if (cond_or == DOM_CC_X_AND_Y)
6092 case EQ: return CC_DEQmode;
6093 case LE: return CC_DLEmode;
6094 case LEU: return CC_DLEUmode;
6095 case GE: return CC_DGEmode;
6096 case GEU: return CC_DGEUmode;
6097 default: gcc_unreachable ();
6101 if (cond_or == DOM_CC_X_AND_Y)
6117 if (cond_or == DOM_CC_X_AND_Y)
6133 if (cond_or == DOM_CC_X_AND_Y)
6149 if (cond_or == DOM_CC_X_AND_Y)
6164 /* The remaining cases only occur when both comparisons are the
6167 gcc_assert (cond1 == cond2);
6171 gcc_assert (cond1 == cond2);
6175 gcc_assert (cond1 == cond2);
6179 gcc_assert (cond1 == cond2);
6183 gcc_assert (cond1 == cond2);
6192 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6194 /* All floating point compares return CCFP if it is an equality
6195 comparison, and CCFPE otherwise. */
6196 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6216 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6225 /* A compare with a shifted operand. Because of canonicalization, the
6226 comparison will have to be swapped when we emit the assembler. */
6227 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6228 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6229 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6230 || GET_CODE (x) == ROTATERT))
6233 /* This operation is performed swapped, but since we only rely on the Z
6234 flag we don't need an additional mode. */
6235 if (GET_MODE (y) == SImode && REG_P (y)
6236 && GET_CODE (x) == NEG
6237 && (op == EQ || op == NE))
6240 /* This is a special case that is used by combine to allow a
6241 comparison of a shifted byte load to be split into a zero-extend
6242 followed by a comparison of the shifted integer (only valid for
6243 equalities and unsigned inequalities). */
6244 if (GET_MODE (x) == SImode
6245 && GET_CODE (x) == ASHIFT
6246 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6247 && GET_CODE (XEXP (x, 0)) == SUBREG
6248 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6249 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6250 && (op == EQ || op == NE
6251 || op == GEU || op == GTU || op == LTU || op == LEU)
6252 && GET_CODE (y) == CONST_INT)
6255 /* A construct for a conditional compare, if the false arm contains
6256 0, then both conditions must be true, otherwise either condition
6257 must be true. Not all conditions are possible, so CCmode is
6258 returned if it can't be done. */
6259 if (GET_CODE (x) == IF_THEN_ELSE
6260 && (XEXP (x, 2) == const0_rtx
6261 || XEXP (x, 2) == const1_rtx)
6262 && COMPARISON_P (XEXP (x, 0))
6263 && COMPARISON_P (XEXP (x, 1)))
6264 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6265 INTVAL (XEXP (x, 2)));
6267 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
6268 if (GET_CODE (x) == AND
6269 && COMPARISON_P (XEXP (x, 0))
6270 && COMPARISON_P (XEXP (x, 1)))
6271 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6274 if (GET_CODE (x) == IOR
6275 && COMPARISON_P (XEXP (x, 0))
6276 && COMPARISON_P (XEXP (x, 1)))
6277 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6280 /* An operation (on Thumb) where we want to test for a single bit.
6281 This is done by shifting that bit up into the top bit of a
6282 scratch register; we can then branch on the sign bit. */
6284 && GET_MODE (x) == SImode
6285 && (op == EQ || op == NE)
6286 && (GET_CODE (x) == ZERO_EXTRACT))
6289 /* An operation that sets the condition codes as a side-effect, the
6290 V flag is not set correctly, so we can only use comparisons where
6291 this doesn't matter. (For LT and GE we can use "mi" and "pl"
6293 if (GET_MODE (x) == SImode
6295 && (op == EQ || op == NE || op == LT || op == GE)
6296 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6297 || GET_CODE (x) == AND || GET_CODE (x) == IOR
6298 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6299 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6300 || GET_CODE (x) == LSHIFTRT
6301 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6302 || GET_CODE (x) == ROTATERT
6303 || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6306 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6309 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6310 && GET_CODE (x) == PLUS
6311 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6317 /* X and Y are two things to compare using CODE. Emit the compare insn and
6318 return the rtx for register 0 in the proper mode. FP means this is a
6319 floating point compare: I don't think that it is needed on the arm. */
6321 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6323 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6324 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6326 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
6327 gen_rtx_COMPARE (mode, x, y)));
6332 /* Generate a sequence of insns that will generate the correct return
6333 address mask depending on the physical architecture that the program
6336 arm_gen_return_addr_mask (void)
6338 rtx reg = gen_reg_rtx (Pmode);
6340 emit_insn (gen_return_addr_mask (reg));
6345 arm_reload_in_hi (rtx *operands)
6347 rtx ref = operands[1];
6349 HOST_WIDE_INT offset = 0;
6351 if (GET_CODE (ref) == SUBREG)
6353 offset = SUBREG_BYTE (ref);
6354 ref = SUBREG_REG (ref);
6357 if (GET_CODE (ref) == REG)
6359 /* We have a pseudo which has been spilt onto the stack; there
6360 are two cases here: the first where there is a simple
6361 stack-slot replacement and a second where the stack-slot is
6362 out of range, or is used as a subreg. */
6363 if (reg_equiv_mem[REGNO (ref)])
6365 ref = reg_equiv_mem[REGNO (ref)];
6366 base = find_replacement (&XEXP (ref, 0));
6369 /* The slot is out of range, or was dressed up in a SUBREG. */
6370 base = reg_equiv_address[REGNO (ref)];
6373 base = find_replacement (&XEXP (ref, 0));
6375 /* Handle the case where the address is too complex to be offset by 1. */
6376 if (GET_CODE (base) == MINUS
6377 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6379 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6381 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6384 else if (GET_CODE (base) == PLUS)
6386 /* The addend must be CONST_INT, or we would have dealt with it above. */
6387 HOST_WIDE_INT hi, lo;
6389 offset += INTVAL (XEXP (base, 1));
6390 base = XEXP (base, 0);
6392 /* Rework the address into a legal sequence of insns. */
6393 /* Valid range for lo is -4095 -> 4095 */
6396 : -((-offset) & 0xfff));
6398 /* Corner case, if lo is the max offset then we would be out of range
6399 once we have added the additional 1 below, so bump the msb into the
6400 pre-loading insn(s). */
6404 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6405 ^ (HOST_WIDE_INT) 0x80000000)
6406 - (HOST_WIDE_INT) 0x80000000);
6408 gcc_assert (hi + lo == offset);
6412 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6414 /* Get the base address; addsi3 knows how to handle constants
6415 that require more than one insn. */
6416 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6422 /* Operands[2] may overlap operands[0] (though it won't overlap
6423 operands[1]), that's why we asked for a DImode reg -- so we can
6424 use the bit that does not overlap. */
6425 if (REGNO (operands[2]) == REGNO (operands[0]))
6426 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6428 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6430 emit_insn (gen_zero_extendqisi2 (scratch,
6431 gen_rtx_MEM (QImode,
6432 plus_constant (base,
6434 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6435 gen_rtx_MEM (QImode,
6436 plus_constant (base,
6438 if (!BYTES_BIG_ENDIAN)
6439 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6440 gen_rtx_IOR (SImode,
6443 gen_rtx_SUBREG (SImode, operands[0], 0),
6447 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6448 gen_rtx_IOR (SImode,
6449 gen_rtx_ASHIFT (SImode, scratch,
6451 gen_rtx_SUBREG (SImode, operands[0],
6455 /* Handle storing a half-word to memory during reload by synthesizing as two
6456 byte stores. Take care not to clobber the input values until after we
6457 have moved them somewhere safe. This code assumes that if the DImode
6458 scratch in operands[2] overlaps either the input value or output address
6459 in some way, then that value must die in this insn (we absolutely need
6460 two scratch registers for some corner cases). */
6462 arm_reload_out_hi (rtx *operands)
6464 rtx ref = operands[0];
6465 rtx outval = operands[1];
6467 HOST_WIDE_INT offset = 0;
6469 if (GET_CODE (ref) == SUBREG)
6471 offset = SUBREG_BYTE (ref);
6472 ref = SUBREG_REG (ref);
6475 if (GET_CODE (ref) == REG)
6477 /* We have a pseudo which has been spilt onto the stack; there
6478 are two cases here: the first where there is a simple
6479 stack-slot replacement and a second where the stack-slot is
6480 out of range, or is used as a subreg. */
6481 if (reg_equiv_mem[REGNO (ref)])
6483 ref = reg_equiv_mem[REGNO (ref)];
6484 base = find_replacement (&XEXP (ref, 0));
6487 /* The slot is out of range, or was dressed up in a SUBREG. */
6488 base = reg_equiv_address[REGNO (ref)];
6491 base = find_replacement (&XEXP (ref, 0));
6493 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6495 /* Handle the case where the address is too complex to be offset by 1. */
6496 if (GET_CODE (base) == MINUS
6497 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6499 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6501 /* Be careful not to destroy OUTVAL. */
6502 if (reg_overlap_mentioned_p (base_plus, outval))
6504 /* Updating base_plus might destroy outval, see if we can
6505 swap the scratch and base_plus. */
6506 if (!reg_overlap_mentioned_p (scratch, outval))
6509 scratch = base_plus;
6514 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6516 /* Be conservative and copy OUTVAL into the scratch now,
6517 this should only be necessary if outval is a subreg
6518 of something larger than a word. */
6519 /* XXX Might this clobber base? I can't see how it can,
6520 since scratch is known to overlap with OUTVAL, and
6521 must be wider than a word. */
6522 emit_insn (gen_movhi (scratch_hi, outval));
6523 outval = scratch_hi;
6527 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6530 else if (GET_CODE (base) == PLUS)
6532 /* The addend must be CONST_INT, or we would have dealt with it above. */
6533 HOST_WIDE_INT hi, lo;
6535 offset += INTVAL (XEXP (base, 1));
6536 base = XEXP (base, 0);
6538 /* Rework the address into a legal sequence of insns. */
6539 /* Valid range for lo is -4095 -> 4095 */
6542 : -((-offset) & 0xfff));
6544 /* Corner case, if lo is the max offset then we would be out of range
6545 once we have added the additional 1 below, so bump the msb into the
6546 pre-loading insn(s). */
6550 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6551 ^ (HOST_WIDE_INT) 0x80000000)
6552 - (HOST_WIDE_INT) 0x80000000);
6554 gcc_assert (hi + lo == offset);
6558 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6560 /* Be careful not to destroy OUTVAL. */
6561 if (reg_overlap_mentioned_p (base_plus, outval))
6563 /* Updating base_plus might destroy outval, see if we
6564 can swap the scratch and base_plus. */
6565 if (!reg_overlap_mentioned_p (scratch, outval))
6568 scratch = base_plus;
6573 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6575 /* Be conservative and copy outval into scratch now,
6576 this should only be necessary if outval is a
6577 subreg of something larger than a word. */
6578 /* XXX Might this clobber base? I can't see how it
6579 can, since scratch is known to overlap with
6581 emit_insn (gen_movhi (scratch_hi, outval));
6582 outval = scratch_hi;
6586 /* Get the base address; addsi3 knows how to handle constants
6587 that require more than one insn. */
6588 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6594 if (BYTES_BIG_ENDIAN)
6596 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6597 plus_constant (base, offset + 1)),
6598 gen_lowpart (QImode, outval)));
6599 emit_insn (gen_lshrsi3 (scratch,
6600 gen_rtx_SUBREG (SImode, outval, 0),
6602 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6603 gen_lowpart (QImode, scratch)));
6607 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6608 gen_lowpart (QImode, outval)));
6609 emit_insn (gen_lshrsi3 (scratch,
6610 gen_rtx_SUBREG (SImode, outval, 0),
6612 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6613 plus_constant (base, offset + 1)),
6614 gen_lowpart (QImode, scratch)));
6618 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
6619 (padded to the size of a word) should be passed in a register. */
6622 arm_must_pass_in_stack (enum machine_mode mode, tree type)
6624 if (TARGET_AAPCS_BASED)
6625 return must_pass_in_stack_var_size (mode, type);
6627 return must_pass_in_stack_var_size_or_pad (mode, type);
6631 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
6632 Return true if an argument passed on the stack should be padded upwards,
6633 i.e. if the least-significant byte has useful data. */
6636 arm_pad_arg_upward (enum machine_mode mode, tree type)
6638 if (!TARGET_AAPCS_BASED)
6639 return DEFAULT_FUNCTION_ARG_PADDING(mode, type);
6641 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
6648 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6649 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
6650 byte of the register has useful data, and return the opposite if the
6651 most significant byte does.
6652 For AAPCS, small aggregates and small complex types are always padded
6656 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
6657 tree type, int first ATTRIBUTE_UNUSED)
6659 if (TARGET_AAPCS_BASED
6661 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
6662 && int_size_in_bytes (type) <= 4)
6665 /* Otherwise, use default padding. */
6666 return !BYTES_BIG_ENDIAN;
6671 /* Print a symbolic form of X to the debug file, F. */
6673 arm_print_value (FILE *f, rtx x)
6675 switch (GET_CODE (x))
6678 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
6682 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
6690 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
6692 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
6693 if (i < (CONST_VECTOR_NUNITS (x) - 1))
6701 fprintf (f, "\"%s\"", XSTR (x, 0));
6705 fprintf (f, "`%s'", XSTR (x, 0));
6709 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
6713 arm_print_value (f, XEXP (x, 0));
6717 arm_print_value (f, XEXP (x, 0));
6719 arm_print_value (f, XEXP (x, 1));
6727 fprintf (f, "????");
6732 /* Routines for manipulation of the constant pool. */
6734 /* Arm instructions cannot load a large constant directly into a
6735 register; they have to come from a pc relative load. The constant
6736 must therefore be placed in the addressable range of the pc
6737 relative load. Depending on the precise pc relative load
6738 instruction the range is somewhere between 256 bytes and 4k. This
6739 means that we often have to dump a constant inside a function, and
6740 generate code to branch around it.
6742 It is important to minimize this, since the branches will slow
6743 things down and make the code larger.
6745 Normally we can hide the table after an existing unconditional
6746 branch so that there is no interruption of the flow, but in the
6747 worst case the code looks like this:
6765 We fix this by performing a scan after scheduling, which notices
6766 which instructions need to have their operands fetched from the
6767 constant table and builds the table.
6769 The algorithm starts by building a table of all the constants that
6770 need fixing up and all the natural barriers in the function (places
6771 where a constant table can be dropped without breaking the flow).
6772 For each fixup we note how far the pc-relative replacement will be
6773 able to reach and the offset of the instruction into the function.
6775 Having built the table we then group the fixes together to form
6776 tables that are as large as possible (subject to addressing
6777 constraints) and emit each table of constants after the last
6778 barrier that is within range of all the instructions in the group.
6779 If a group does not contain a barrier, then we forcibly create one
6780 by inserting a jump instruction into the flow. Once the table has
6781 been inserted, the insns are then modified to reference the
6782 relevant entry in the pool.
6784 Possible enhancements to the algorithm (not implemented) are:
6786 1) For some processors and object formats, there may be benefit in
6787 aligning the pools to the start of cache lines; this alignment
6788 would need to be taken into account when calculating addressability
6791 /* These typedefs are located at the start of this file, so that
6792 they can be used in the prototypes there. This comment is to
6793 remind readers of that fact so that the following structures
6794 can be understood more easily.
6796 typedef struct minipool_node Mnode;
6797 typedef struct minipool_fixup Mfix; */
6799 struct minipool_node
6801 /* Doubly linked chain of entries. */
6804 /* The maximum offset into the code that this entry can be placed. While
6805 pushing fixes for forward references, all entries are sorted in order
6806 of increasing max_address. */
6807 HOST_WIDE_INT max_address;
6808 /* Similarly for an entry inserted for a backwards ref. */
6809 HOST_WIDE_INT min_address;
6810 /* The number of fixes referencing this entry. This can become zero
6811 if we "unpush" an entry. In this case we ignore the entry when we
6812 come to emit the code. */
6814 /* The offset from the start of the minipool. */
6815 HOST_WIDE_INT offset;
6816 /* The value in table. */
6818 /* The mode of value. */
6819 enum machine_mode mode;
6820 /* The size of the value. With iWMMXt enabled
6821 sizes > 4 also imply an alignment of 8-bytes. */
6825 struct minipool_fixup
6829 HOST_WIDE_INT address;
6831 enum machine_mode mode;
6835 HOST_WIDE_INT forwards;
6836 HOST_WIDE_INT backwards;
6839 /* Fixes less than a word need padding out to a word boundary. */
6840 #define MINIPOOL_FIX_SIZE(mode) \
6841 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
6843 static Mnode * minipool_vector_head;
6844 static Mnode * minipool_vector_tail;
6845 static rtx minipool_vector_label;
6847 /* The linked list of all minipool fixes required for this function. */
6848 Mfix * minipool_fix_head;
6849 Mfix * minipool_fix_tail;
6850 /* The fix entry for the current minipool, once it has been placed. */
6851 Mfix * minipool_barrier;
6853 /* Determines if INSN is the start of a jump table. Returns the end
6854 of the TABLE or NULL_RTX. */
6856 is_jump_table (rtx insn)
6860 if (GET_CODE (insn) == JUMP_INSN
6861 && JUMP_LABEL (insn) != NULL
6862 && ((table = next_real_insn (JUMP_LABEL (insn)))
6863 == next_real_insn (insn))
6865 && GET_CODE (table) == JUMP_INSN
6866 && (GET_CODE (PATTERN (table)) == ADDR_VEC
6867 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
6873 #ifndef JUMP_TABLES_IN_TEXT_SECTION
6874 #define JUMP_TABLES_IN_TEXT_SECTION 0
6877 static HOST_WIDE_INT
6878 get_jump_table_size (rtx insn)
6880 /* ADDR_VECs only take room if read-only data does into the text
6882 if (JUMP_TABLES_IN_TEXT_SECTION
6883 #if !defined(READONLY_DATA_SECTION) && !defined(READONLY_DATA_SECTION_ASM_OP)
6888 rtx body = PATTERN (insn);
6889 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
6891 return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
6897 /* Move a minipool fix MP from its current location to before MAX_MP.
6898 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
6899 constraints may need updating. */
6901 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
6902 HOST_WIDE_INT max_address)
6904 /* The code below assumes these are different. */
6905 gcc_assert (mp != max_mp);
6909 if (max_address < mp->max_address)
6910 mp->max_address = max_address;
6914 if (max_address > max_mp->max_address - mp->fix_size)
6915 mp->max_address = max_mp->max_address - mp->fix_size;
6917 mp->max_address = max_address;
6919 /* Unlink MP from its current position. Since max_mp is non-null,
6920 mp->prev must be non-null. */
6921 mp->prev->next = mp->next;
6922 if (mp->next != NULL)
6923 mp->next->prev = mp->prev;
6925 minipool_vector_tail = mp->prev;
6927 /* Re-insert it before MAX_MP. */
6929 mp->prev = max_mp->prev;
6932 if (mp->prev != NULL)
6933 mp->prev->next = mp;
6935 minipool_vector_head = mp;
6938 /* Save the new entry. */
6941 /* Scan over the preceding entries and adjust their addresses as
6943 while (mp->prev != NULL
6944 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
6946 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
6953 /* Add a constant to the minipool for a forward reference. Returns the
6954 node added or NULL if the constant will not fit in this pool. */
6956 add_minipool_forward_ref (Mfix *fix)
6958 /* If set, max_mp is the first pool_entry that has a lower
6959 constraint than the one we are trying to add. */
6960 Mnode * max_mp = NULL;
6961 HOST_WIDE_INT max_address = fix->address + fix->forwards;
6964 /* If this fix's address is greater than the address of the first
6965 entry, then we can't put the fix in this pool. We subtract the
6966 size of the current fix to ensure that if the table is fully
6967 packed we still have enough room to insert this value by suffling
6968 the other fixes forwards. */
6969 if (minipool_vector_head &&
6970 fix->address >= minipool_vector_head->max_address - fix->fix_size)
6973 /* Scan the pool to see if a constant with the same value has
6974 already been added. While we are doing this, also note the
6975 location where we must insert the constant if it doesn't already
6977 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
6979 if (GET_CODE (fix->value) == GET_CODE (mp->value)
6980 && fix->mode == mp->mode
6981 && (GET_CODE (fix->value) != CODE_LABEL
6982 || (CODE_LABEL_NUMBER (fix->value)
6983 == CODE_LABEL_NUMBER (mp->value)))
6984 && rtx_equal_p (fix->value, mp->value))
6986 /* More than one fix references this entry. */
6988 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
6991 /* Note the insertion point if necessary. */
6993 && mp->max_address > max_address)
6996 /* If we are inserting an 8-bytes aligned quantity and
6997 we have not already found an insertion point, then
6998 make sure that all such 8-byte aligned quantities are
6999 placed at the start of the pool. */
7000 if (ARM_DOUBLEWORD_ALIGN
7002 && fix->fix_size == 8
7003 && mp->fix_size != 8)
7006 max_address = mp->max_address;
7010 /* The value is not currently in the minipool, so we need to create
7011 a new entry for it. If MAX_MP is NULL, the entry will be put on
7012 the end of the list since the placement is less constrained than
7013 any existing entry. Otherwise, we insert the new fix before
7014 MAX_MP and, if necessary, adjust the constraints on the other
7016 mp = xmalloc (sizeof (* mp));
7017 mp->fix_size = fix->fix_size;
7018 mp->mode = fix->mode;
7019 mp->value = fix->value;
7021 /* Not yet required for a backwards ref. */
7022 mp->min_address = -65536;
7026 mp->max_address = max_address;
7028 mp->prev = minipool_vector_tail;
7030 if (mp->prev == NULL)
7032 minipool_vector_head = mp;
7033 minipool_vector_label = gen_label_rtx ();
7036 mp->prev->next = mp;
7038 minipool_vector_tail = mp;
7042 if (max_address > max_mp->max_address - mp->fix_size)
7043 mp->max_address = max_mp->max_address - mp->fix_size;
7045 mp->max_address = max_address;
7048 mp->prev = max_mp->prev;
7050 if (mp->prev != NULL)
7051 mp->prev->next = mp;
7053 minipool_vector_head = mp;
7056 /* Save the new entry. */
7059 /* Scan over the preceding entries and adjust their addresses as
7061 while (mp->prev != NULL
7062 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7064 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7072 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7073 HOST_WIDE_INT min_address)
7075 HOST_WIDE_INT offset;
7077 /* The code below assumes these are different. */
7078 gcc_assert (mp != min_mp);
7082 if (min_address > mp->min_address)
7083 mp->min_address = min_address;
7087 /* We will adjust this below if it is too loose. */
7088 mp->min_address = min_address;
7090 /* Unlink MP from its current position. Since min_mp is non-null,
7091 mp->next must be non-null. */
7092 mp->next->prev = mp->prev;
7093 if (mp->prev != NULL)
7094 mp->prev->next = mp->next;
7096 minipool_vector_head = mp->next;
7098 /* Reinsert it after MIN_MP. */
7100 mp->next = min_mp->next;
7102 if (mp->next != NULL)
7103 mp->next->prev = mp;
7105 minipool_vector_tail = mp;
7111 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7113 mp->offset = offset;
7114 if (mp->refcount > 0)
7115 offset += mp->fix_size;
7117 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7118 mp->next->min_address = mp->min_address + mp->fix_size;
7124 /* Add a constant to the minipool for a backward reference. Returns the
7125 node added or NULL if the constant will not fit in this pool.
7127 Note that the code for insertion for a backwards reference can be
7128 somewhat confusing because the calculated offsets for each fix do
7129 not take into account the size of the pool (which is still under
7132 add_minipool_backward_ref (Mfix *fix)
7134 /* If set, min_mp is the last pool_entry that has a lower constraint
7135 than the one we are trying to add. */
7136 Mnode *min_mp = NULL;
7137 /* This can be negative, since it is only a constraint. */
7138 HOST_WIDE_INT min_address = fix->address - fix->backwards;
7141 /* If we can't reach the current pool from this insn, or if we can't
7142 insert this entry at the end of the pool without pushing other
7143 fixes out of range, then we don't try. This ensures that we
7144 can't fail later on. */
7145 if (min_address >= minipool_barrier->address
7146 || (minipool_vector_tail->min_address + fix->fix_size
7147 >= minipool_barrier->address))
7150 /* Scan the pool to see if a constant with the same value has
7151 already been added. While we are doing this, also note the
7152 location where we must insert the constant if it doesn't already
7154 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7156 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7157 && fix->mode == mp->mode
7158 && (GET_CODE (fix->value) != CODE_LABEL
7159 || (CODE_LABEL_NUMBER (fix->value)
7160 == CODE_LABEL_NUMBER (mp->value)))
7161 && rtx_equal_p (fix->value, mp->value)
7162 /* Check that there is enough slack to move this entry to the
7163 end of the table (this is conservative). */
7165 > (minipool_barrier->address
7166 + minipool_vector_tail->offset
7167 + minipool_vector_tail->fix_size)))
7170 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7174 mp->min_address += fix->fix_size;
7177 /* Note the insertion point if necessary. */
7178 if (mp->min_address < min_address)
7180 /* For now, we do not allow the insertion of 8-byte alignment
7181 requiring nodes anywhere but at the start of the pool. */
7182 if (ARM_DOUBLEWORD_ALIGN
7183 && fix->fix_size == 8 && mp->fix_size != 8)
7188 else if (mp->max_address
7189 < minipool_barrier->address + mp->offset + fix->fix_size)
7191 /* Inserting before this entry would push the fix beyond
7192 its maximum address (which can happen if we have
7193 re-located a forwards fix); force the new fix to come
7196 min_address = mp->min_address + fix->fix_size;
7198 /* If we are inserting an 8-bytes aligned quantity and
7199 we have not already found an insertion point, then
7200 make sure that all such 8-byte aligned quantities are
7201 placed at the start of the pool. */
7202 else if (ARM_DOUBLEWORD_ALIGN
7204 && fix->fix_size == 8
7205 && mp->fix_size < 8)
7208 min_address = mp->min_address + fix->fix_size;
7213 /* We need to create a new entry. */
7214 mp = xmalloc (sizeof (* mp));
7215 mp->fix_size = fix->fix_size;
7216 mp->mode = fix->mode;
7217 mp->value = fix->value;
7219 mp->max_address = minipool_barrier->address + 65536;
7221 mp->min_address = min_address;
7226 mp->next = minipool_vector_head;
7228 if (mp->next == NULL)
7230 minipool_vector_tail = mp;
7231 minipool_vector_label = gen_label_rtx ();
7234 mp->next->prev = mp;
7236 minipool_vector_head = mp;
7240 mp->next = min_mp->next;
7244 if (mp->next != NULL)
7245 mp->next->prev = mp;
7247 minipool_vector_tail = mp;
7250 /* Save the new entry. */
7258 /* Scan over the following entries and adjust their offsets. */
7259 while (mp->next != NULL)
7261 if (mp->next->min_address < mp->min_address + mp->fix_size)
7262 mp->next->min_address = mp->min_address + mp->fix_size;
7265 mp->next->offset = mp->offset + mp->fix_size;
7267 mp->next->offset = mp->offset;
7276 assign_minipool_offsets (Mfix *barrier)
7278 HOST_WIDE_INT offset = 0;
7281 minipool_barrier = barrier;
7283 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7285 mp->offset = offset;
7287 if (mp->refcount > 0)
7288 offset += mp->fix_size;
7292 /* Output the literal table */
7294 dump_minipool (rtx scan)
7300 if (ARM_DOUBLEWORD_ALIGN)
7301 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7302 if (mp->refcount > 0 && mp->fix_size == 8)
7310 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7311 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7313 scan = emit_label_after (gen_label_rtx (), scan);
7314 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7315 scan = emit_label_after (minipool_vector_label, scan);
7317 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7319 if (mp->refcount > 0)
7324 ";; Offset %u, min %ld, max %ld ",
7325 (unsigned) mp->offset, (unsigned long) mp->min_address,
7326 (unsigned long) mp->max_address);
7327 arm_print_value (dump_file, mp->value);
7328 fputc ('\n', dump_file);
7331 switch (mp->fix_size)
7333 #ifdef HAVE_consttable_1
7335 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7339 #ifdef HAVE_consttable_2
7341 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7345 #ifdef HAVE_consttable_4
7347 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7351 #ifdef HAVE_consttable_8
7353 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7366 minipool_vector_head = minipool_vector_tail = NULL;
7367 scan = emit_insn_after (gen_consttable_end (), scan);
7368 scan = emit_barrier_after (scan);
7371 /* Return the cost of forcibly inserting a barrier after INSN. */
7373 arm_barrier_cost (rtx insn)
7375 /* Basing the location of the pool on the loop depth is preferable,
7376 but at the moment, the basic block information seems to be
7377 corrupt by this stage of the compilation. */
7379 rtx next = next_nonnote_insn (insn);
7381 if (next != NULL && GET_CODE (next) == CODE_LABEL)
7384 switch (GET_CODE (insn))
7387 /* It will always be better to place the table before the label, rather
7396 return base_cost - 10;
7399 return base_cost + 10;
7403 /* Find the best place in the insn stream in the range
7404 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7405 Create the barrier by inserting a jump and add a new fix entry for
7408 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7410 HOST_WIDE_INT count = 0;
7412 rtx from = fix->insn;
7413 rtx selected = from;
7415 HOST_WIDE_INT selected_address;
7417 HOST_WIDE_INT max_count = max_address - fix->address;
7418 rtx label = gen_label_rtx ();
7420 selected_cost = arm_barrier_cost (from);
7421 selected_address = fix->address;
7423 while (from && count < max_count)
7428 /* This code shouldn't have been called if there was a natural barrier
7430 gcc_assert (GET_CODE (from) != BARRIER);
7432 /* Count the length of this insn. */
7433 count += get_attr_length (from);
7435 /* If there is a jump table, add its length. */
7436 tmp = is_jump_table (from);
7439 count += get_jump_table_size (tmp);
7441 /* Jump tables aren't in a basic block, so base the cost on
7442 the dispatch insn. If we select this location, we will
7443 still put the pool after the table. */
7444 new_cost = arm_barrier_cost (from);
7446 if (count < max_count && new_cost <= selected_cost)
7449 selected_cost = new_cost;
7450 selected_address = fix->address + count;
7453 /* Continue after the dispatch table. */
7454 from = NEXT_INSN (tmp);
7458 new_cost = arm_barrier_cost (from);
7460 if (count < max_count && new_cost <= selected_cost)
7463 selected_cost = new_cost;
7464 selected_address = fix->address + count;
7467 from = NEXT_INSN (from);
7470 /* Create a new JUMP_INSN that branches around a barrier. */
7471 from = emit_jump_insn_after (gen_jump (label), selected);
7472 JUMP_LABEL (from) = label;
7473 barrier = emit_barrier_after (from);
7474 emit_label_after (label, barrier);
7476 /* Create a minipool barrier entry for the new barrier. */
7477 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7478 new_fix->insn = barrier;
7479 new_fix->address = selected_address;
7480 new_fix->next = fix->next;
7481 fix->next = new_fix;
7486 /* Record that there is a natural barrier in the insn stream at
7489 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7491 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7494 fix->address = address;
7497 if (minipool_fix_head != NULL)
7498 minipool_fix_tail->next = fix;
7500 minipool_fix_head = fix;
7502 minipool_fix_tail = fix;
7505 /* Record INSN, which will need fixing up to load a value from the
7506 minipool. ADDRESS is the offset of the insn since the start of the
7507 function; LOC is a pointer to the part of the insn which requires
7508 fixing; VALUE is the constant that must be loaded, which is of type
7511 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
7512 enum machine_mode mode, rtx value)
7514 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7516 #ifdef AOF_ASSEMBLER
7517 /* PIC symbol references need to be converted into offsets into the
7519 /* XXX This shouldn't be done here. */
7520 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
7521 value = aof_pic_entry (value);
7522 #endif /* AOF_ASSEMBLER */
7525 fix->address = address;
7528 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
7530 fix->forwards = get_attr_pool_range (insn);
7531 fix->backwards = get_attr_neg_pool_range (insn);
7532 fix->minipool = NULL;
7534 /* If an insn doesn't have a range defined for it, then it isn't
7535 expecting to be reworked by this code. Better to stop now than
7536 to generate duff assembly code. */
7537 gcc_assert (fix->forwards || fix->backwards);
7539 /* With AAPCS/iWMMXt enabled, the pool is aligned to an 8-byte boundary.
7540 So there might be an empty word before the start of the pool.
7541 Hence we reduce the forward range by 4 to allow for this
7543 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
7549 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
7550 GET_MODE_NAME (mode),
7551 INSN_UID (insn), (unsigned long) address,
7552 -1 * (long)fix->backwards, (long)fix->forwards);
7553 arm_print_value (dump_file, fix->value);
7554 fprintf (dump_file, "\n");
7557 /* Add it to the chain of fixes. */
7560 if (minipool_fix_head != NULL)
7561 minipool_fix_tail->next = fix;
7563 minipool_fix_head = fix;
7565 minipool_fix_tail = fix;
7568 /* Return the cost of synthesizing a 64-bit constant VAL inline.
7569 Returns the number of insns needed, or 99 if we don't know how to
7572 arm_const_double_inline_cost (rtx val)
7574 rtx lowpart, highpart;
7575 enum machine_mode mode;
7577 mode = GET_MODE (val);
7579 if (mode == VOIDmode)
7582 gcc_assert (GET_MODE_SIZE (mode) == 8);
7584 lowpart = gen_lowpart (SImode, val);
7585 highpart = gen_highpart_mode (SImode, mode, val);
7587 gcc_assert (GET_CODE (lowpart) == CONST_INT);
7588 gcc_assert (GET_CODE (highpart) == CONST_INT);
7590 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
7591 NULL_RTX, NULL_RTX, 0, 0)
7592 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
7593 NULL_RTX, NULL_RTX, 0, 0));
7596 /* Return true if it is worthwhile to split a 64-bit constant into two
7597 32-bit operations. This is the case if optimizing for size, or
7598 if we have load delay slots, or if one 32-bit part can be done with
7599 a single data operation. */
7601 arm_const_double_by_parts (rtx val)
7603 enum machine_mode mode = GET_MODE (val);
7606 if (optimize_size || arm_ld_sched)
7609 if (mode == VOIDmode)
7612 part = gen_highpart_mode (SImode, mode, val);
7614 gcc_assert (GET_CODE (part) == CONST_INT);
7616 if (const_ok_for_arm (INTVAL (part))
7617 || const_ok_for_arm (~INTVAL (part)))
7620 part = gen_lowpart (SImode, val);
7622 gcc_assert (GET_CODE (part) == CONST_INT);
7624 if (const_ok_for_arm (INTVAL (part))
7625 || const_ok_for_arm (~INTVAL (part)))
7631 /* Scan INSN and note any of its operands that need fixing.
7632 If DO_PUSHES is false we do not actually push any of the fixups
7633 needed. The function returns TRUE if any fixups were needed/pushed.
7634 This is used by arm_memory_load_p() which needs to know about loads
7635 of constants that will be converted into minipool loads. */
7637 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
7639 bool result = false;
7642 extract_insn (insn);
7644 if (!constrain_operands (1))
7645 fatal_insn_not_found (insn);
7647 if (recog_data.n_alternatives == 0)
7650 /* Fill in recog_op_alt with information about the constraints of
7652 preprocess_constraints ();
7654 for (opno = 0; opno < recog_data.n_operands; opno++)
7656 /* Things we need to fix can only occur in inputs. */
7657 if (recog_data.operand_type[opno] != OP_IN)
7660 /* If this alternative is a memory reference, then any mention
7661 of constants in this alternative is really to fool reload
7662 into allowing us to accept one there. We need to fix them up
7663 now so that we output the right code. */
7664 if (recog_op_alt[opno][which_alternative].memory_ok)
7666 rtx op = recog_data.operand[opno];
7668 if (CONSTANT_P (op))
7671 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
7672 recog_data.operand_mode[opno], op);
7675 else if (GET_CODE (op) == MEM
7676 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
7677 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
7681 rtx cop = avoid_constant_pool_reference (op);
7683 /* Casting the address of something to a mode narrower
7684 than a word can cause avoid_constant_pool_reference()
7685 to return the pool reference itself. That's no good to
7686 us here. Lets just hope that we can use the
7687 constant pool value directly. */
7689 cop = get_pool_constant (XEXP (op, 0));
7691 push_minipool_fix (insn, address,
7692 recog_data.operand_loc[opno],
7693 recog_data.operand_mode[opno], cop);
7704 /* Gcc puts the pool in the wrong place for ARM, since we can only
7705 load addresses a limited distance around the pc. We do some
7706 special munging to move the constant pool values to the correct
7707 point in the code. */
7712 HOST_WIDE_INT address = 0;
7715 minipool_fix_head = minipool_fix_tail = NULL;
7717 /* The first insn must always be a note, or the code below won't
7718 scan it properly. */
7719 insn = get_insns ();
7720 gcc_assert (GET_CODE (insn) == NOTE);
7722 /* Scan all the insns and record the operands that will need fixing. */
7723 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
7725 if (TARGET_CIRRUS_FIX_INVALID_INSNS
7726 && (arm_cirrus_insn_p (insn)
7727 || GET_CODE (insn) == JUMP_INSN
7728 || arm_memory_load_p (insn)))
7729 cirrus_reorg (insn);
7731 if (GET_CODE (insn) == BARRIER)
7732 push_minipool_barrier (insn, address);
7733 else if (INSN_P (insn))
7737 note_invalid_constants (insn, address, true);
7738 address += get_attr_length (insn);
7740 /* If the insn is a vector jump, add the size of the table
7741 and skip the table. */
7742 if ((table = is_jump_table (insn)) != NULL)
7744 address += get_jump_table_size (table);
7750 fix = minipool_fix_head;
7752 /* Now scan the fixups and perform the required changes. */
7757 Mfix * last_added_fix;
7758 Mfix * last_barrier = NULL;
7761 /* Skip any further barriers before the next fix. */
7762 while (fix && GET_CODE (fix->insn) == BARRIER)
7765 /* No more fixes. */
7769 last_added_fix = NULL;
7771 for (ftmp = fix; ftmp; ftmp = ftmp->next)
7773 if (GET_CODE (ftmp->insn) == BARRIER)
7775 if (ftmp->address >= minipool_vector_head->max_address)
7778 last_barrier = ftmp;
7780 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
7783 last_added_fix = ftmp; /* Keep track of the last fix added. */
7786 /* If we found a barrier, drop back to that; any fixes that we
7787 could have reached but come after the barrier will now go in
7788 the next mini-pool. */
7789 if (last_barrier != NULL)
7791 /* Reduce the refcount for those fixes that won't go into this
7793 for (fdel = last_barrier->next;
7794 fdel && fdel != ftmp;
7797 fdel->minipool->refcount--;
7798 fdel->minipool = NULL;
7801 ftmp = last_barrier;
7805 /* ftmp is first fix that we can't fit into this pool and
7806 there no natural barriers that we could use. Insert a
7807 new barrier in the code somewhere between the previous
7808 fix and this one, and arrange to jump around it. */
7809 HOST_WIDE_INT max_address;
7811 /* The last item on the list of fixes must be a barrier, so
7812 we can never run off the end of the list of fixes without
7813 last_barrier being set. */
7816 max_address = minipool_vector_head->max_address;
7817 /* Check that there isn't another fix that is in range that
7818 we couldn't fit into this pool because the pool was
7819 already too large: we need to put the pool before such an
7821 if (ftmp->address < max_address)
7822 max_address = ftmp->address;
7824 last_barrier = create_fix_barrier (last_added_fix, max_address);
7827 assign_minipool_offsets (last_barrier);
7831 if (GET_CODE (ftmp->insn) != BARRIER
7832 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
7839 /* Scan over the fixes we have identified for this pool, fixing them
7840 up and adding the constants to the pool itself. */
7841 for (this_fix = fix; this_fix && ftmp != this_fix;
7842 this_fix = this_fix->next)
7843 if (GET_CODE (this_fix->insn) != BARRIER)
7846 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
7847 minipool_vector_label),
7848 this_fix->minipool->offset);
7849 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
7852 dump_minipool (last_barrier->insn);
7856 /* From now on we must synthesize any constants that we can't handle
7857 directly. This can happen if the RTL gets split during final
7858 instruction generation. */
7859 after_arm_reorg = 1;
7861 /* Free the minipool memory. */
7862 obstack_free (&minipool_obstack, minipool_startobj);
7865 /* Routines to output assembly language. */
7867 /* If the rtx is the correct value then return the string of the number.
7868 In this way we can ensure that valid double constants are generated even
7869 when cross compiling. */
7871 fp_immediate_constant (rtx x)
7876 if (!fp_consts_inited)
7879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7880 for (i = 0; i < 8; i++)
7881 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7882 return strings_fp[i];
7887 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
7889 fp_const_from_val (REAL_VALUE_TYPE *r)
7893 if (!fp_consts_inited)
7896 for (i = 0; i < 8; i++)
7897 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
7898 return strings_fp[i];
7903 /* Output the operands of a LDM/STM instruction to STREAM.
7904 MASK is the ARM register set mask of which only bits 0-15 are important.
7905 REG is the base register, either the frame pointer or the stack pointer,
7906 INSTR is the possibly suffixed load or store instruction. */
7909 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
7913 bool not_first = FALSE;
7915 fputc ('\t', stream);
7916 asm_fprintf (stream, instr, reg);
7917 fputs (", {", stream);
7919 for (i = 0; i <= LAST_ARM_REGNUM; i++)
7920 if (mask & (1 << i))
7923 fprintf (stream, ", ");
7925 asm_fprintf (stream, "%r", i);
7929 fprintf (stream, "}\n");
7933 /* Output a FLDMX instruction to STREAM.
7934 BASE if the register containing the address.
7935 REG and COUNT specify the register range.
7936 Extra registers may be added to avoid hardware bugs. */
7939 arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
7943 /* Workaround ARM10 VFPr1 bug. */
7944 if (count == 2 && !arm_arch6)
7951 fputc ('\t', stream);
7952 asm_fprintf (stream, "fldmfdx\t%r!, {", base);
7954 for (i = reg; i < reg + count; i++)
7957 fputs (", ", stream);
7958 asm_fprintf (stream, "d%d", i);
7960 fputs ("}\n", stream);
7965 /* Output the assembly for a store multiple. */
7968 vfp_output_fstmx (rtx * operands)
7975 strcpy (pattern, "fstmfdx\t%m0!, {%P1");
7976 p = strlen (pattern);
7978 gcc_assert (GET_CODE (operands[1]) == REG);
7980 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
7981 for (i = 1; i < XVECLEN (operands[2], 0); i++)
7983 p += sprintf (&pattern[p], ", d%d", base + i);
7985 strcpy (&pattern[p], "}");
7987 output_asm_insn (pattern, operands);
7992 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
7993 number of bytes pushed. */
7996 vfp_emit_fstmx (int base_reg, int count)
8003 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
8004 register pairs are stored by a store multiple insn. We avoid this
8005 by pushing an extra pair. */
8006 if (count == 2 && !arm_arch6)
8008 if (base_reg == LAST_VFP_REGNUM - 3)
8013 /* ??? The frame layout is implementation defined. We describe
8014 standard format 1 (equivalent to a FSTMD insn and unused pad word).
8015 We really need some way of representing the whole block so that the
8016 unwinder can figure it out at runtime. */
8017 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8018 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8020 reg = gen_rtx_REG (DFmode, base_reg);
8024 = gen_rtx_SET (VOIDmode,
8025 gen_rtx_MEM (BLKmode,
8026 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
8027 gen_rtx_UNSPEC (BLKmode,
8031 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8032 gen_rtx_PLUS (SImode, stack_pointer_rtx,
8033 GEN_INT (-(count * 8 + 4))));
8034 RTX_FRAME_RELATED_P (tmp) = 1;
8035 XVECEXP (dwarf, 0, 0) = tmp;
8037 tmp = gen_rtx_SET (VOIDmode,
8038 gen_rtx_MEM (DFmode, stack_pointer_rtx),
8040 RTX_FRAME_RELATED_P (tmp) = 1;
8041 XVECEXP (dwarf, 0, 1) = tmp;
8043 for (i = 1; i < count; i++)
8045 reg = gen_rtx_REG (DFmode, base_reg);
8047 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8049 tmp = gen_rtx_SET (VOIDmode,
8050 gen_rtx_MEM (DFmode,
8051 gen_rtx_PLUS (SImode,
8055 RTX_FRAME_RELATED_P (tmp) = 1;
8056 XVECEXP (dwarf, 0, i + 1) = tmp;
8059 par = emit_insn (par);
8060 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8062 RTX_FRAME_RELATED_P (par) = 1;
8064 return count * 8 + 4;
8068 /* Output a 'call' insn. */
8070 output_call (rtx *operands)
8072 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
8074 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
8075 if (REGNO (operands[0]) == LR_REGNUM)
8077 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8078 output_asm_insn ("mov%?\t%0, %|lr", operands);
8081 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8083 if (TARGET_INTERWORK || arm_arch4t)
8084 output_asm_insn ("bx%?\t%0", operands);
8086 output_asm_insn ("mov%?\t%|pc, %0", operands);
8091 /* Output a 'call' insn that is a reference in memory. */
8093 output_call_mem (rtx *operands)
8095 if (TARGET_INTERWORK && !arm_arch5)
8097 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8098 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8099 output_asm_insn ("bx%?\t%|ip", operands);
8101 else if (regno_use_in (LR_REGNUM, operands[0]))
8103 /* LR is used in the memory address. We load the address in the
8104 first instruction. It's safe to use IP as the target of the
8105 load since the call will kill it anyway. */
8106 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8108 output_asm_insn ("blx%?\t%|ip", operands);
8111 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8113 output_asm_insn ("bx%?\t%|ip", operands);
8115 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8120 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8121 output_asm_insn ("ldr%?\t%|pc, %0", operands);
8128 /* Output a move from arm registers to an fpa registers.
8129 OPERANDS[0] is an fpa register.
8130 OPERANDS[1] is the first registers of an arm register pair. */
8132 output_mov_long_double_fpa_from_arm (rtx *operands)
8134 int arm_reg0 = REGNO (operands[1]);
8137 gcc_assert (arm_reg0 != IP_REGNUM);
8139 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8140 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8141 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8143 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8144 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8149 /* Output a move from an fpa register to arm registers.
8150 OPERANDS[0] is the first registers of an arm register pair.
8151 OPERANDS[1] is an fpa register. */
8153 output_mov_long_double_arm_from_fpa (rtx *operands)
8155 int arm_reg0 = REGNO (operands[0]);
8158 gcc_assert (arm_reg0 != IP_REGNUM);
8160 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8161 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8162 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8164 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8165 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8169 /* Output a move from arm registers to arm registers of a long double
8170 OPERANDS[0] is the destination.
8171 OPERANDS[1] is the source. */
8173 output_mov_long_double_arm_from_arm (rtx *operands)
8175 /* We have to be careful here because the two might overlap. */
8176 int dest_start = REGNO (operands[0]);
8177 int src_start = REGNO (operands[1]);
8181 if (dest_start < src_start)
8183 for (i = 0; i < 3; i++)
8185 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8186 ops[1] = gen_rtx_REG (SImode, src_start + i);
8187 output_asm_insn ("mov%?\t%0, %1", ops);
8192 for (i = 2; i >= 0; i--)
8194 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8195 ops[1] = gen_rtx_REG (SImode, src_start + i);
8196 output_asm_insn ("mov%?\t%0, %1", ops);
8204 /* Output a move from arm registers to an fpa registers.
8205 OPERANDS[0] is an fpa register.
8206 OPERANDS[1] is the first registers of an arm register pair. */
8208 output_mov_double_fpa_from_arm (rtx *operands)
8210 int arm_reg0 = REGNO (operands[1]);
8213 gcc_assert (arm_reg0 != IP_REGNUM);
8215 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8216 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8217 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8218 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8222 /* Output a move from an fpa register to arm registers.
8223 OPERANDS[0] is the first registers of an arm register pair.
8224 OPERANDS[1] is an fpa register. */
8226 output_mov_double_arm_from_fpa (rtx *operands)
8228 int arm_reg0 = REGNO (operands[0]);
8231 gcc_assert (arm_reg0 != IP_REGNUM);
8233 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8234 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8235 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8236 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8240 /* Output a move between double words.
8241 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8242 or MEM<-REG and all MEMs must be offsettable addresses. */
8244 output_move_double (rtx *operands)
8246 enum rtx_code code0 = GET_CODE (operands[0]);
8247 enum rtx_code code1 = GET_CODE (operands[1]);
8252 int reg0 = REGNO (operands[0]);
8254 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8256 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
8258 switch (GET_CODE (XEXP (operands[1], 0)))
8261 output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8265 gcc_assert (TARGET_LDRD);
8266 output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8270 output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8274 output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8278 gcc_assert (TARGET_LDRD);
8279 output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8284 otherops[0] = operands[0];
8285 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8286 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8288 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8290 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8292 /* Registers overlap so split out the increment. */
8293 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8294 output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8297 output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8301 /* We only allow constant increments, so this is safe. */
8302 output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8308 output_asm_insn ("adr%?\t%0, %1", operands);
8309 output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8313 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8314 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8316 otherops[0] = operands[0];
8317 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8318 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8320 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8322 if (GET_CODE (otherops[2]) == CONST_INT)
8324 switch ((int) INTVAL (otherops[2]))
8327 output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8330 output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8333 output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8338 && (GET_CODE (otherops[2]) == REG
8339 || (GET_CODE (otherops[2]) == CONST_INT
8340 && INTVAL (otherops[2]) > -256
8341 && INTVAL (otherops[2]) < 256)))
8343 if (reg_overlap_mentioned_p (otherops[0],
8346 /* Swap base and index registers over to
8347 avoid a conflict. */
8348 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8349 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8352 /* If both registers conflict, it will usually
8353 have been fixed by a splitter. */
8354 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8356 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8357 output_asm_insn ("ldr%?d\t%0, [%1]",
8361 output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8365 if (GET_CODE (otherops[2]) == CONST_INT)
8367 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8368 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8370 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8373 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8376 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8378 return "ldm%?ia\t%0, %M0";
8382 otherops[1] = adjust_address (operands[1], SImode, 4);
8383 /* Take care of overlapping base/data reg. */
8384 if (reg_mentioned_p (operands[0], operands[1]))
8386 output_asm_insn ("ldr%?\t%0, %1", otherops);
8387 output_asm_insn ("ldr%?\t%0, %1", operands);
8391 output_asm_insn ("ldr%?\t%0, %1", operands);
8392 output_asm_insn ("ldr%?\t%0, %1", otherops);
8399 /* Constraints should ensure this. */
8400 gcc_assert (code0 == MEM && code1 == REG);
8401 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8403 switch (GET_CODE (XEXP (operands[0], 0)))
8406 output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8410 gcc_assert (TARGET_LDRD);
8411 output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8415 output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8419 output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8423 gcc_assert (TARGET_LDRD);
8424 output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8429 otherops[0] = operands[1];
8430 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8431 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8433 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8434 output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8436 output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8440 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8441 if (GET_CODE (otherops[2]) == CONST_INT)
8443 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8446 output_asm_insn ("stm%?db\t%m0, %M1", operands);
8450 output_asm_insn ("stm%?da\t%m0, %M1", operands);
8454 output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8459 && (GET_CODE (otherops[2]) == REG
8460 || (GET_CODE (otherops[2]) == CONST_INT
8461 && INTVAL (otherops[2]) > -256
8462 && INTVAL (otherops[2]) < 256)))
8464 otherops[0] = operands[1];
8465 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
8466 output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
8472 otherops[0] = adjust_address (operands[0], SImode, 4);
8473 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8474 output_asm_insn ("str%?\t%1, %0", operands);
8475 output_asm_insn ("str%?\t%1, %0", otherops);
8482 /* Output an ADD r, s, #n where n may be too big for one instruction.
8483 If adding zero to one register, output nothing. */
8485 output_add_immediate (rtx *operands)
8487 HOST_WIDE_INT n = INTVAL (operands[2]);
8489 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
8492 output_multi_immediate (operands,
8493 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
8496 output_multi_immediate (operands,
8497 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
8504 /* Output a multiple immediate operation.
8505 OPERANDS is the vector of operands referred to in the output patterns.
8506 INSTR1 is the output pattern to use for the first constant.
8507 INSTR2 is the output pattern to use for subsequent constants.
8508 IMMED_OP is the index of the constant slot in OPERANDS.
8509 N is the constant value. */
8511 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
8512 int immed_op, HOST_WIDE_INT n)
8514 #if HOST_BITS_PER_WIDE_INT > 32
8520 /* Quick and easy output. */
8521 operands[immed_op] = const0_rtx;
8522 output_asm_insn (instr1, operands);
8527 const char * instr = instr1;
8529 /* Note that n is never zero here (which would give no output). */
8530 for (i = 0; i < 32; i += 2)
8534 operands[immed_op] = GEN_INT (n & (255 << i));
8535 output_asm_insn (instr, operands);
8545 /* Return the appropriate ARM instruction for the operation code.
8546 The returned result should not be overwritten. OP is the rtx of the
8547 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
8550 arithmetic_instr (rtx op, int shift_first_arg)
8552 switch (GET_CODE (op))
8558 return shift_first_arg ? "rsb" : "sub";
8574 /* Ensure valid constant shifts and return the appropriate shift mnemonic
8575 for the operation code. The returned result should not be overwritten.
8576 OP is the rtx code of the shift.
8577 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
8580 shift_op (rtx op, HOST_WIDE_INT *amountp)
8583 enum rtx_code code = GET_CODE (op);
8585 switch (GET_CODE (XEXP (op, 1)))
8593 *amountp = INTVAL (XEXP (op, 1));
8615 gcc_assert (*amountp != -1);
8616 *amountp = 32 - *amountp;
8625 /* We never have to worry about the amount being other than a
8626 power of 2, since this case can never be reloaded from a reg. */
8627 gcc_assert (*amountp != -1);
8628 *amountp = int_log2 (*amountp);
8637 /* This is not 100% correct, but follows from the desire to merge
8638 multiplication by a power of 2 with the recognizer for a
8639 shift. >=32 is not a valid shift for "asl", so we must try and
8640 output a shift that produces the correct arithmetical result.
8641 Using lsr #32 is identical except for the fact that the carry bit
8642 is not set correctly if we set the flags; but we never use the
8643 carry bit from such an operation, so we can ignore that. */
8644 if (code == ROTATERT)
8645 /* Rotate is just modulo 32. */
8647 else if (*amountp != (*amountp & 31))
8654 /* Shifts of 0 are no-ops. */
8662 /* Obtain the shift from the POWER of two. */
8664 static HOST_WIDE_INT
8665 int_log2 (HOST_WIDE_INT power)
8667 HOST_WIDE_INT shift = 0;
8669 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
8671 gcc_assert (shift <= 31);
8678 /* Output a .ascii pseudo-op, keeping track of lengths. This is
8679 because /bin/as is horribly restrictive. The judgement about
8680 whether or not each character is 'printable' (and can be output as
8681 is) or not (and must be printed with an octal escape) must be made
8682 with reference to the *host* character set -- the situation is
8683 similar to that discussed in the comments above pp_c_char in
8684 c-pretty-print.c. */
8686 #define MAX_ASCII_LEN 51
8689 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
8694 fputs ("\t.ascii\t\"", stream);
8696 for (i = 0; i < len; i++)
8700 if (len_so_far >= MAX_ASCII_LEN)
8702 fputs ("\"\n\t.ascii\t\"", stream);
8708 if (c == '\\' || c == '\"')
8710 putc ('\\', stream);
8718 fprintf (stream, "\\%03o", c);
8723 fputs ("\"\n", stream);
8726 /* Compute the register save mask for registers 0 through 12
8727 inclusive. This code is used by arm_compute_save_reg_mask. */
8729 static unsigned long
8730 arm_compute_save_reg0_reg12_mask (void)
8732 unsigned long func_type = arm_current_func_type ();
8733 unsigned long save_reg_mask = 0;
8736 if (IS_INTERRUPT (func_type))
8738 unsigned int max_reg;
8739 /* Interrupt functions must not corrupt any registers,
8740 even call clobbered ones. If this is a leaf function
8741 we can just examine the registers used by the RTL, but
8742 otherwise we have to assume that whatever function is
8743 called might clobber anything, and so we have to save
8744 all the call-clobbered registers as well. */
8745 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
8746 /* FIQ handlers have registers r8 - r12 banked, so
8747 we only need to check r0 - r7, Normal ISRs only
8748 bank r14 and r15, so we must check up to r12.
8749 r13 is the stack pointer which is always preserved,
8750 so we do not need to consider it here. */
8755 for (reg = 0; reg <= max_reg; reg++)
8756 if (regs_ever_live[reg]
8757 || (! current_function_is_leaf && call_used_regs [reg]))
8758 save_reg_mask |= (1 << reg);
8760 /* Also save the pic base register if necessary. */
8762 && !TARGET_SINGLE_PIC_BASE
8763 && current_function_uses_pic_offset_table)
8764 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8768 /* In the normal case we only need to save those registers
8769 which are call saved and which are used by this function. */
8770 for (reg = 0; reg <= 10; reg++)
8771 if (regs_ever_live[reg] && ! call_used_regs [reg])
8772 save_reg_mask |= (1 << reg);
8774 /* Handle the frame pointer as a special case. */
8775 if (! TARGET_APCS_FRAME
8776 && ! frame_pointer_needed
8777 && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
8778 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
8779 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
8781 /* If we aren't loading the PIC register,
8782 don't stack it even though it may be live. */
8784 && !TARGET_SINGLE_PIC_BASE
8785 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
8786 || current_function_uses_pic_offset_table))
8787 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8790 /* Save registers so the exception handler can modify them. */
8791 if (current_function_calls_eh_return)
8797 reg = EH_RETURN_DATA_REGNO (i);
8798 if (reg == INVALID_REGNUM)
8800 save_reg_mask |= 1 << reg;
8804 return save_reg_mask;
8807 /* Compute a bit mask of which registers need to be
8808 saved on the stack for the current function. */
8810 static unsigned long
8811 arm_compute_save_reg_mask (void)
8813 unsigned int save_reg_mask = 0;
8814 unsigned long func_type = arm_current_func_type ();
8816 if (IS_NAKED (func_type))
8817 /* This should never really happen. */
8820 /* If we are creating a stack frame, then we must save the frame pointer,
8821 IP (which will hold the old stack pointer), LR and the PC. */
8822 if (frame_pointer_needed)
8824 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
8829 /* Volatile functions do not return, so there
8830 is no need to save any other registers. */
8831 if (IS_VOLATILE (func_type))
8832 return save_reg_mask;
8834 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
8836 /* Decide if we need to save the link register.
8837 Interrupt routines have their own banked link register,
8838 so they never need to save it.
8839 Otherwise if we do not use the link register we do not need to save
8840 it. If we are pushing other registers onto the stack however, we
8841 can save an instruction in the epilogue by pushing the link register
8842 now and then popping it back into the PC. This incurs extra memory
8843 accesses though, so we only do it when optimizing for size, and only
8844 if we know that we will not need a fancy return sequence. */
8845 if (regs_ever_live [LR_REGNUM]
8848 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
8849 && !current_function_calls_eh_return))
8850 save_reg_mask |= 1 << LR_REGNUM;
8852 if (cfun->machine->lr_save_eliminated)
8853 save_reg_mask &= ~ (1 << LR_REGNUM);
8855 if (TARGET_REALLY_IWMMXT
8856 && ((bit_count (save_reg_mask)
8857 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
8861 /* The total number of registers that are going to be pushed
8862 onto the stack is odd. We need to ensure that the stack
8863 is 64-bit aligned before we start to save iWMMXt registers,
8864 and also before we start to create locals. (A local variable
8865 might be a double or long long which we will load/store using
8866 an iWMMXt instruction). Therefore we need to push another
8867 ARM register, so that the stack will be 64-bit aligned. We
8868 try to avoid using the arg registers (r0 -r3) as they might be
8869 used to pass values in a tail call. */
8870 for (reg = 4; reg <= 12; reg++)
8871 if ((save_reg_mask & (1 << reg)) == 0)
8875 save_reg_mask |= (1 << reg);
8878 cfun->machine->sibcall_blocked = 1;
8879 save_reg_mask |= (1 << 3);
8883 return save_reg_mask;
8887 /* Compute a bit mask of which registers need to be
8888 saved on the stack for the current function. */
8889 static unsigned long
8890 thumb_compute_save_reg_mask (void)
8896 for (reg = 0; reg < 12; reg ++)
8897 if (regs_ever_live[reg] && !call_used_regs[reg])
8900 if (flag_pic && !TARGET_SINGLE_PIC_BASE)
8901 mask |= (1 << PIC_OFFSET_TABLE_REGNUM);
8903 if (TARGET_SINGLE_PIC_BASE)
8904 mask &= ~(1 << arm_pic_register);
8906 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
8907 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
8908 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
8910 /* LR will also be pushed if any lo regs are pushed. */
8911 if (mask & 0xff || thumb_force_lr_save ())
8912 mask |= (1 << LR_REGNUM);
8914 /* Make sure we have a low work register if we need one.
8915 We will need one if we are going to push a high register,
8916 but we are not currently intending to push a low register. */
8917 if ((mask & 0xff) == 0
8918 && ((mask & 0x0f00) || TARGET_BACKTRACE))
8920 /* Use thumb_find_work_register to choose which register
8921 we will use. If the register is live then we will
8922 have to push it. Use LAST_LO_REGNUM as our fallback
8923 choice for the register to select. */
8924 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
8926 if (! call_used_regs[reg])
8934 /* Return the number of bytes required to save VFP registers. */
8936 arm_get_vfp_saved_size (void)
8943 /* Space for saved VFP registers. */
8944 if (TARGET_HARD_FLOAT && TARGET_VFP)
8947 for (regno = FIRST_VFP_REGNUM;
8948 regno < LAST_VFP_REGNUM;
8951 if ((!regs_ever_live[regno] || call_used_regs[regno])
8952 && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
8956 /* Workaround ARM10 VFPr1 bug. */
8957 if (count == 2 && !arm_arch6)
8959 saved += count * 8 + 4;
8968 if (count == 2 && !arm_arch6)
8970 saved += count * 8 + 4;
8977 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
8978 everything bar the final return instruction. */
8980 output_return_instruction (rtx operand, int really_return, int reverse)
8982 char conditional[10];
8985 unsigned long live_regs_mask;
8986 unsigned long func_type;
8987 arm_stack_offsets *offsets;
8989 func_type = arm_current_func_type ();
8991 if (IS_NAKED (func_type))
8994 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
8996 /* If this function was declared non-returning, and we have
8997 found a tail call, then we have to trust that the called
8998 function won't return. */
9003 /* Otherwise, trap an attempted return by aborting. */
9005 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9007 assemble_external_libcall (ops[1]);
9008 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9014 gcc_assert (!current_function_calls_alloca || really_return);
9016 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9018 return_used_this_function = 1;
9020 live_regs_mask = arm_compute_save_reg_mask ();
9024 const char * return_reg;
9026 /* If we do not have any special requirements for function exit
9027 (e.g. interworking, or ISR) then we can load the return address
9028 directly into the PC. Otherwise we must load it into LR. */
9030 && ! TARGET_INTERWORK)
9031 return_reg = reg_names[PC_REGNUM];
9033 return_reg = reg_names[LR_REGNUM];
9035 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9037 /* There are three possible reasons for the IP register
9038 being saved. 1) a stack frame was created, in which case
9039 IP contains the old stack pointer, or 2) an ISR routine
9040 corrupted it, or 3) it was saved to align the stack on
9041 iWMMXt. In case 1, restore IP into SP, otherwise just
9043 if (frame_pointer_needed)
9045 live_regs_mask &= ~ (1 << IP_REGNUM);
9046 live_regs_mask |= (1 << SP_REGNUM);
9049 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9052 /* On some ARM architectures it is faster to use LDR rather than
9053 LDM to load a single register. On other architectures, the
9054 cost is the same. In 26 bit mode, or for exception handlers,
9055 we have to use LDM to load the PC so that the CPSR is also
9057 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9058 if (live_regs_mask == (1U << reg))
9061 if (reg <= LAST_ARM_REGNUM
9062 && (reg != LR_REGNUM
9064 || ! IS_INTERRUPT (func_type)))
9066 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9067 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9074 /* Generate the load multiple instruction to restore the
9075 registers. Note we can get here, even if
9076 frame_pointer_needed is true, but only if sp already
9077 points to the base of the saved core registers. */
9078 if (live_regs_mask & (1 << SP_REGNUM))
9080 unsigned HOST_WIDE_INT stack_adjust;
9082 offsets = arm_get_frame_offsets ();
9083 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9084 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9086 if (stack_adjust && arm_arch5)
9087 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9090 /* If we can't use ldmib (SA110 bug),
9091 then try to pop r3 instead. */
9093 live_regs_mask |= 1 << 3;
9094 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9098 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9100 p = instr + strlen (instr);
9102 for (reg = 0; reg <= SP_REGNUM; reg++)
9103 if (live_regs_mask & (1 << reg))
9105 int l = strlen (reg_names[reg]);
9111 memcpy (p, ", ", 2);
9115 memcpy (p, "%|", 2);
9116 memcpy (p + 2, reg_names[reg], l);
9120 if (live_regs_mask & (1 << LR_REGNUM))
9122 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9123 /* If returning from an interrupt, restore the CPSR. */
9124 if (IS_INTERRUPT (func_type))
9131 output_asm_insn (instr, & operand);
9133 /* See if we need to generate an extra instruction to
9134 perform the actual function return. */
9136 && func_type != ARM_FT_INTERWORKED
9137 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9139 /* The return has already been handled
9140 by loading the LR into the PC. */
9147 switch ((int) ARM_FUNC_TYPE (func_type))
9151 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9154 case ARM_FT_INTERWORKED:
9155 sprintf (instr, "bx%s\t%%|lr", conditional);
9158 case ARM_FT_EXCEPTION:
9159 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9163 /* Use bx if it's available. */
9164 if (arm_arch5 || arm_arch4t)
9165 sprintf (instr, "bx%s\t%%|lr", conditional);
9167 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9171 output_asm_insn (instr, & operand);
9177 /* Write the function name into the code section, directly preceding
9178 the function prologue.
9180 Code will be output similar to this:
9182 .ascii "arm_poke_function_name", 0
9185 .word 0xff000000 + (t1 - t0)
9186 arm_poke_function_name
9188 stmfd sp!, {fp, ip, lr, pc}
9191 When performing a stack backtrace, code can inspect the value
9192 of 'pc' stored at 'fp' + 0. If the trace function then looks
9193 at location pc - 12 and the top 8 bits are set, then we know
9194 that there is a function name embedded immediately preceding this
9195 location and has length ((pc[-3]) & 0xff000000).
9197 We assume that pc is declared as a pointer to an unsigned long.
9199 It is of no benefit to output the function name if we are assembling
9200 a leaf function. These function types will not contain a stack
9201 backtrace structure, therefore it is not possible to determine the
9204 arm_poke_function_name (FILE *stream, const char *name)
9206 unsigned long alignlength;
9207 unsigned long length;
9210 length = strlen (name) + 1;
9211 alignlength = ROUND_UP_WORD (length);
9213 ASM_OUTPUT_ASCII (stream, name, length);
9214 ASM_OUTPUT_ALIGN (stream, 2);
9215 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9216 assemble_aligned_integer (UNITS_PER_WORD, x);
9219 /* Place some comments into the assembler stream
9220 describing the current function. */
9222 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9224 unsigned long func_type;
9228 thumb_output_function_prologue (f, frame_size);
9233 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9235 func_type = arm_current_func_type ();
9237 switch ((int) ARM_FUNC_TYPE (func_type))
9242 case ARM_FT_INTERWORKED:
9243 asm_fprintf (f, "\t%@ Function supports interworking.\n");
9246 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9249 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9251 case ARM_FT_EXCEPTION:
9252 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9256 if (IS_NAKED (func_type))
9257 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9259 if (IS_VOLATILE (func_type))
9260 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9262 if (IS_NESTED (func_type))
9263 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9265 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9266 current_function_args_size,
9267 current_function_pretend_args_size, frame_size);
9269 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9270 frame_pointer_needed,
9271 cfun->machine->uses_anonymous_args);
9273 if (cfun->machine->lr_save_eliminated)
9274 asm_fprintf (f, "\t%@ link register save eliminated.\n");
9276 if (current_function_calls_eh_return)
9277 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9279 #ifdef AOF_ASSEMBLER
9281 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9284 return_used_this_function = 0;
9288 arm_output_epilogue (rtx sibling)
9291 unsigned long saved_regs_mask;
9292 unsigned long func_type;
9293 /* Floats_offset is the offset from the "virtual" frame. In an APCS
9294 frame that is $fp + 4 for a non-variadic function. */
9295 int floats_offset = 0;
9297 FILE * f = asm_out_file;
9298 unsigned int lrm_count = 0;
9299 int really_return = (sibling == NULL);
9301 arm_stack_offsets *offsets;
9303 /* If we have already generated the return instruction
9304 then it is futile to generate anything else. */
9305 if (use_return_insn (FALSE, sibling) && return_used_this_function)
9308 func_type = arm_current_func_type ();
9310 if (IS_NAKED (func_type))
9311 /* Naked functions don't have epilogues. */
9314 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9318 /* A volatile function should never return. Call abort. */
9319 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9320 assemble_external_libcall (op);
9321 output_asm_insn ("bl\t%a0", &op);
9326 /* If we are throwing an exception, then we really must be doing a
9327 return, so we can't tail-call. */
9328 gcc_assert (!current_function_calls_eh_return || really_return);
9330 offsets = arm_get_frame_offsets ();
9331 saved_regs_mask = arm_compute_save_reg_mask ();
9334 lrm_count = bit_count (saved_regs_mask);
9336 floats_offset = offsets->saved_args;
9337 /* Compute how far away the floats will be. */
9338 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9339 if (saved_regs_mask & (1 << reg))
9342 if (frame_pointer_needed)
9344 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
9345 int vfp_offset = offsets->frame;
9347 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9349 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9350 if (regs_ever_live[reg] && !call_used_regs[reg])
9352 floats_offset += 12;
9353 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9354 reg, FP_REGNUM, floats_offset - vfp_offset);
9359 start_reg = LAST_FPA_REGNUM;
9361 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9363 if (regs_ever_live[reg] && !call_used_regs[reg])
9365 floats_offset += 12;
9367 /* We can't unstack more than four registers at once. */
9368 if (start_reg - reg == 3)
9370 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9371 reg, FP_REGNUM, floats_offset - vfp_offset);
9372 start_reg = reg - 1;
9377 if (reg != start_reg)
9378 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9379 reg + 1, start_reg - reg,
9380 FP_REGNUM, floats_offset - vfp_offset);
9381 start_reg = reg - 1;
9385 /* Just in case the last register checked also needs unstacking. */
9386 if (reg != start_reg)
9387 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9388 reg + 1, start_reg - reg,
9389 FP_REGNUM, floats_offset - vfp_offset);
9392 if (TARGET_HARD_FLOAT && TARGET_VFP)
9396 /* The fldmx insn does not have base+offset addressing modes,
9397 so we use IP to hold the address. */
9398 saved_size = arm_get_vfp_saved_size ();
9402 floats_offset += saved_size;
9403 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9404 FP_REGNUM, floats_offset - vfp_offset);
9406 start_reg = FIRST_VFP_REGNUM;
9407 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9409 if ((!regs_ever_live[reg] || call_used_regs[reg])
9410 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9412 if (start_reg != reg)
9413 arm_output_fldmx (f, IP_REGNUM,
9414 (start_reg - FIRST_VFP_REGNUM) / 2,
9415 (reg - start_reg) / 2);
9416 start_reg = reg + 2;
9419 if (start_reg != reg)
9420 arm_output_fldmx (f, IP_REGNUM,
9421 (start_reg - FIRST_VFP_REGNUM) / 2,
9422 (reg - start_reg) / 2);
9427 /* The frame pointer is guaranteed to be non-double-word aligned.
9428 This is because it is set to (old_stack_pointer - 4) and the
9429 old_stack_pointer was double word aligned. Thus the offset to
9430 the iWMMXt registers to be loaded must also be non-double-word
9431 sized, so that the resultant address *is* double-word aligned.
9432 We can ignore floats_offset since that was already included in
9433 the live_regs_mask. */
9434 lrm_count += (lrm_count % 2 ? 2 : 1);
9436 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9437 if (regs_ever_live[reg] && !call_used_regs[reg])
9439 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9440 reg, FP_REGNUM, lrm_count * 4);
9445 /* saved_regs_mask should contain the IP, which at the time of stack
9446 frame generation actually contains the old stack pointer. So a
9447 quick way to unwind the stack is just pop the IP register directly
9448 into the stack pointer. */
9449 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9450 saved_regs_mask &= ~ (1 << IP_REGNUM);
9451 saved_regs_mask |= (1 << SP_REGNUM);
9453 /* There are two registers left in saved_regs_mask - LR and PC. We
9454 only need to restore the LR register (the return address), but to
9455 save time we can load it directly into the PC, unless we need a
9456 special function exit sequence, or we are not really returning. */
9458 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9459 && !current_function_calls_eh_return)
9460 /* Delete the LR from the register mask, so that the LR on
9461 the stack is loaded into the PC in the register mask. */
9462 saved_regs_mask &= ~ (1 << LR_REGNUM);
9464 saved_regs_mask &= ~ (1 << PC_REGNUM);
9466 /* We must use SP as the base register, because SP is one of the
9467 registers being restored. If an interrupt or page fault
9468 happens in the ldm instruction, the SP might or might not
9469 have been restored. That would be bad, as then SP will no
9470 longer indicate the safe area of stack, and we can get stack
9471 corruption. Using SP as the base register means that it will
9472 be reset correctly to the original value, should an interrupt
9473 occur. If the stack pointer already points at the right
9474 place, then omit the subtraction. */
9475 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
9476 || current_function_calls_alloca)
9477 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
9478 4 * bit_count (saved_regs_mask));
9479 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9481 if (IS_INTERRUPT (func_type))
9482 /* Interrupt handlers will have pushed the
9483 IP onto the stack, so restore it now. */
9484 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
9488 /* Restore stack pointer if necessary. */
9489 if (offsets->outgoing_args != offsets->saved_regs)
9491 operands[0] = operands[1] = stack_pointer_rtx;
9492 operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
9493 output_add_immediate (operands);
9496 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9498 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9499 if (regs_ever_live[reg] && !call_used_regs[reg])
9500 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
9505 start_reg = FIRST_FPA_REGNUM;
9507 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9509 if (regs_ever_live[reg] && !call_used_regs[reg])
9511 if (reg - start_reg == 3)
9513 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
9514 start_reg, SP_REGNUM);
9515 start_reg = reg + 1;
9520 if (reg != start_reg)
9521 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9522 start_reg, reg - start_reg,
9525 start_reg = reg + 1;
9529 /* Just in case the last register checked also needs unstacking. */
9530 if (reg != start_reg)
9531 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9532 start_reg, reg - start_reg, SP_REGNUM);
9535 if (TARGET_HARD_FLOAT && TARGET_VFP)
9537 start_reg = FIRST_VFP_REGNUM;
9538 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9540 if ((!regs_ever_live[reg] || call_used_regs[reg])
9541 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9543 if (start_reg != reg)
9544 arm_output_fldmx (f, SP_REGNUM,
9545 (start_reg - FIRST_VFP_REGNUM) / 2,
9546 (reg - start_reg) / 2);
9547 start_reg = reg + 2;
9550 if (start_reg != reg)
9551 arm_output_fldmx (f, SP_REGNUM,
9552 (start_reg - FIRST_VFP_REGNUM) / 2,
9553 (reg - start_reg) / 2);
9556 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
9557 if (regs_ever_live[reg] && !call_used_regs[reg])
9558 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
9560 /* If we can, restore the LR into the PC. */
9561 if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9563 && current_function_pretend_args_size == 0
9564 && saved_regs_mask & (1 << LR_REGNUM)
9565 && !current_function_calls_eh_return)
9567 saved_regs_mask &= ~ (1 << LR_REGNUM);
9568 saved_regs_mask |= (1 << PC_REGNUM);
9571 /* Load the registers off the stack. If we only have one register
9572 to load use the LDR instruction - it is faster. */
9573 if (saved_regs_mask == (1 << LR_REGNUM))
9575 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
9577 else if (saved_regs_mask)
9579 if (saved_regs_mask & (1 << SP_REGNUM))
9580 /* Note - write back to the stack register is not enabled
9581 (i.e. "ldmfd sp!..."). We know that the stack pointer is
9582 in the list of registers and if we add writeback the
9583 instruction becomes UNPREDICTABLE. */
9584 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9586 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
9589 if (current_function_pretend_args_size)
9591 /* Unwind the pre-pushed regs. */
9592 operands[0] = operands[1] = stack_pointer_rtx;
9593 operands[2] = GEN_INT (current_function_pretend_args_size);
9594 output_add_immediate (operands);
9598 /* We may have already restored PC directly from the stack. */
9599 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
9602 /* Stack adjustment for exception handler. */
9603 if (current_function_calls_eh_return)
9604 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
9605 ARM_EH_STACKADJ_REGNUM);
9607 /* Generate the return instruction. */
9608 switch ((int) ARM_FUNC_TYPE (func_type))
9612 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
9615 case ARM_FT_EXCEPTION:
9616 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9619 case ARM_FT_INTERWORKED:
9620 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9624 if (arm_arch5 || arm_arch4t)
9625 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9627 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9635 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9636 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
9638 arm_stack_offsets *offsets;
9644 /* Emit any call-via-reg trampolines that are needed for v4t support
9645 of call_reg and call_value_reg type insns. */
9646 for (regno = 0; regno < LR_REGNUM; regno++)
9648 rtx label = cfun->machine->call_via[regno];
9652 function_section (current_function_decl);
9653 targetm.asm_out.internal_label (asm_out_file, "L",
9654 CODE_LABEL_NUMBER (label));
9655 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
9659 /* ??? Probably not safe to set this here, since it assumes that a
9660 function will be emitted as assembly immediately after we generate
9661 RTL for it. This does not happen for inline functions. */
9662 return_used_this_function = 0;
9666 /* We need to take into account any stack-frame rounding. */
9667 offsets = arm_get_frame_offsets ();
9669 gcc_assert (!use_return_insn (FALSE, NULL)
9670 || !return_used_this_function
9671 || offsets->saved_regs == offsets->outgoing_args
9672 || frame_pointer_needed);
9674 /* Reset the ARM-specific per-function variables. */
9675 after_arm_reorg = 0;
9679 /* Generate and emit an insn that we will recognize as a push_multi.
9680 Unfortunately, since this insn does not reflect very well the actual
9681 semantics of the operation, we need to annotate the insn for the benefit
9682 of DWARF2 frame unwind information. */
9684 emit_multi_reg_push (unsigned long mask)
9691 int dwarf_par_index;
9694 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9695 if (mask & (1 << i))
9698 gcc_assert (num_regs && num_regs <= 16);
9700 /* We don't record the PC in the dwarf frame information. */
9701 num_dwarf_regs = num_regs;
9702 if (mask & (1 << PC_REGNUM))
9705 /* For the body of the insn we are going to generate an UNSPEC in
9706 parallel with several USEs. This allows the insn to be recognized
9707 by the push_multi pattern in the arm.md file. The insn looks
9708 something like this:
9711 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
9712 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
9713 (use (reg:SI 11 fp))
9714 (use (reg:SI 12 ip))
9715 (use (reg:SI 14 lr))
9716 (use (reg:SI 15 pc))
9719 For the frame note however, we try to be more explicit and actually
9720 show each register being stored into the stack frame, plus a (single)
9721 decrement of the stack pointer. We do it this way in order to be
9722 friendly to the stack unwinding code, which only wants to see a single
9723 stack decrement per instruction. The RTL we generate for the note looks
9724 something like this:
9727 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
9728 (set (mem:SI (reg:SI sp)) (reg:SI r4))
9729 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
9730 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
9731 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
9734 This sequence is used both by the code to support stack unwinding for
9735 exceptions handlers and the code to generate dwarf2 frame debugging. */
9737 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
9738 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
9739 dwarf_par_index = 1;
9741 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9743 if (mask & (1 << i))
9745 reg = gen_rtx_REG (SImode, i);
9748 = gen_rtx_SET (VOIDmode,
9749 gen_rtx_MEM (BLKmode,
9750 gen_rtx_PRE_DEC (BLKmode,
9751 stack_pointer_rtx)),
9752 gen_rtx_UNSPEC (BLKmode,
9758 tmp = gen_rtx_SET (VOIDmode,
9759 gen_rtx_MEM (SImode, stack_pointer_rtx),
9761 RTX_FRAME_RELATED_P (tmp) = 1;
9762 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
9770 for (j = 1, i++; j < num_regs; i++)
9772 if (mask & (1 << i))
9774 reg = gen_rtx_REG (SImode, i);
9776 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
9780 tmp = gen_rtx_SET (VOIDmode,
9781 gen_rtx_MEM (SImode,
9782 plus_constant (stack_pointer_rtx,
9785 RTX_FRAME_RELATED_P (tmp) = 1;
9786 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
9793 par = emit_insn (par);
9795 tmp = gen_rtx_SET (SImode,
9797 gen_rtx_PLUS (SImode,
9799 GEN_INT (-4 * num_regs)));
9800 RTX_FRAME_RELATED_P (tmp) = 1;
9801 XVECEXP (dwarf, 0, 0) = tmp;
9803 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9809 emit_sfm (int base_reg, int count)
9816 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9817 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9819 reg = gen_rtx_REG (XFmode, base_reg++);
9822 = gen_rtx_SET (VOIDmode,
9823 gen_rtx_MEM (BLKmode,
9824 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
9825 gen_rtx_UNSPEC (BLKmode,
9828 tmp = gen_rtx_SET (VOIDmode,
9829 gen_rtx_MEM (XFmode, stack_pointer_rtx), reg);
9830 RTX_FRAME_RELATED_P (tmp) = 1;
9831 XVECEXP (dwarf, 0, 1) = tmp;
9833 for (i = 1; i < count; i++)
9835 reg = gen_rtx_REG (XFmode, base_reg++);
9836 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9838 tmp = gen_rtx_SET (VOIDmode,
9839 gen_rtx_MEM (XFmode,
9840 plus_constant (stack_pointer_rtx,
9843 RTX_FRAME_RELATED_P (tmp) = 1;
9844 XVECEXP (dwarf, 0, i + 1) = tmp;
9847 tmp = gen_rtx_SET (VOIDmode,
9849 gen_rtx_PLUS (SImode,
9851 GEN_INT (-12 * count)));
9852 RTX_FRAME_RELATED_P (tmp) = 1;
9853 XVECEXP (dwarf, 0, 0) = tmp;
9855 par = emit_insn (par);
9856 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9862 /* Return true if the current function needs to save/restore LR. */
9865 thumb_force_lr_save (void)
9867 return !cfun->machine->lr_save_eliminated
9868 && (!leaf_function_p ()
9869 || thumb_far_jump_used_p ()
9870 || regs_ever_live [LR_REGNUM]);
9874 /* Compute the distance from register FROM to register TO.
9875 These can be the arg pointer (26), the soft frame pointer (25),
9876 the stack pointer (13) or the hard frame pointer (11).
9877 In thumb mode r7 is used as the soft frame pointer, if needed.
9878 Typical stack layout looks like this:
9880 old stack pointer -> | |
9883 | | saved arguments for
9884 | | vararg functions
9887 hard FP & arg pointer -> | | \
9895 soft frame pointer -> | | /
9905 current stack pointer -> | | /
9908 For a given function some or all of these stack components
9909 may not be needed, giving rise to the possibility of
9910 eliminating some of the registers.
9912 The values returned by this function must reflect the behavior
9913 of arm_expand_prologue() and arm_compute_save_reg_mask().
9915 The sign of the number returned reflects the direction of stack
9916 growth, so the values are positive for all eliminations except
9917 from the soft frame pointer to the hard frame pointer.
9919 SFP may point just inside the local variables block to ensure correct
9923 /* Calculate stack offsets. These are used to calculate register elimination
9924 offsets and in prologue/epilogue code. */
9926 static arm_stack_offsets *
9927 arm_get_frame_offsets (void)
9929 struct arm_stack_offsets *offsets;
9930 unsigned long func_type;
9933 HOST_WIDE_INT frame_size;
9935 offsets = &cfun->machine->stack_offsets;
9937 /* We need to know if we are a leaf function. Unfortunately, it
9938 is possible to be called after start_sequence has been called,
9939 which causes get_insns to return the insns for the sequence,
9940 not the function, which will cause leaf_function_p to return
9941 the incorrect result.
9943 to know about leaf functions once reload has completed, and the
9944 frame size cannot be changed after that time, so we can safely
9945 use the cached value. */
9947 if (reload_completed)
9950 /* Initially this is the size of the local variables. It will translated
9951 into an offset once we have determined the size of preceding data. */
9952 frame_size = ROUND_UP_WORD (get_frame_size ());
9954 leaf = leaf_function_p ();
9956 /* Space for variadic functions. */
9957 offsets->saved_args = current_function_pretend_args_size;
9959 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
9965 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
9967 /* We know that SP will be doubleword aligned on entry, and we must
9968 preserve that condition at any subroutine call. We also require the
9969 soft frame pointer to be doubleword aligned. */
9971 if (TARGET_REALLY_IWMMXT)
9973 /* Check for the call-saved iWMMXt registers. */
9974 for (regno = FIRST_IWMMXT_REGNUM;
9975 regno <= LAST_IWMMXT_REGNUM;
9977 if (regs_ever_live [regno] && ! call_used_regs [regno])
9981 func_type = arm_current_func_type ();
9982 if (! IS_VOLATILE (func_type))
9984 /* Space for saved FPA registers. */
9985 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
9986 if (regs_ever_live[regno] && ! call_used_regs[regno])
9989 /* Space for saved VFP registers. */
9990 if (TARGET_HARD_FLOAT && TARGET_VFP)
9991 saved += arm_get_vfp_saved_size ();
9994 else /* TARGET_THUMB */
9996 saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
9997 if (TARGET_BACKTRACE)
10001 /* Saved registers include the stack frame. */
10002 offsets->saved_regs = offsets->saved_args + saved;
10003 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10004 /* A leaf function does not need any stack alignment if it has nothing
10006 if (leaf && frame_size == 0)
10008 offsets->outgoing_args = offsets->soft_frame;
10012 /* Ensure SFP has the correct alignment. */
10013 if (ARM_DOUBLEWORD_ALIGN
10014 && (offsets->soft_frame & 7))
10015 offsets->soft_frame += 4;
10017 offsets->outgoing_args = offsets->soft_frame + frame_size
10018 + current_function_outgoing_args_size;
10020 if (ARM_DOUBLEWORD_ALIGN)
10022 /* Ensure SP remains doubleword aligned. */
10023 if (offsets->outgoing_args & 7)
10024 offsets->outgoing_args += 4;
10025 gcc_assert (!(offsets->outgoing_args & 7));
10032 /* Calculate the relative offsets for the different stack pointers. Positive
10033 offsets are in the direction of stack growth. */
10036 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10038 arm_stack_offsets *offsets;
10040 offsets = arm_get_frame_offsets ();
10042 /* OK, now we have enough information to compute the distances.
10043 There must be an entry in these switch tables for each pair
10044 of registers in ELIMINABLE_REGS, even if some of the entries
10045 seem to be redundant or useless. */
10048 case ARG_POINTER_REGNUM:
10051 case THUMB_HARD_FRAME_POINTER_REGNUM:
10054 case FRAME_POINTER_REGNUM:
10055 /* This is the reverse of the soft frame pointer
10056 to hard frame pointer elimination below. */
10057 return offsets->soft_frame - offsets->saved_args;
10059 case ARM_HARD_FRAME_POINTER_REGNUM:
10060 /* If there is no stack frame then the hard
10061 frame pointer and the arg pointer coincide. */
10062 if (offsets->frame == offsets->saved_regs)
10064 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
10065 return (frame_pointer_needed
10066 && cfun->static_chain_decl != NULL
10067 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10069 case STACK_POINTER_REGNUM:
10070 /* If nothing has been pushed on the stack at all
10071 then this will return -4. This *is* correct! */
10072 return offsets->outgoing_args - (offsets->saved_args + 4);
10075 gcc_unreachable ();
10077 gcc_unreachable ();
10079 case FRAME_POINTER_REGNUM:
10082 case THUMB_HARD_FRAME_POINTER_REGNUM:
10085 case ARM_HARD_FRAME_POINTER_REGNUM:
10086 /* The hard frame pointer points to the top entry in the
10087 stack frame. The soft frame pointer to the bottom entry
10088 in the stack frame. If there is no stack frame at all,
10089 then they are identical. */
10091 return offsets->frame - offsets->soft_frame;
10093 case STACK_POINTER_REGNUM:
10094 return offsets->outgoing_args - offsets->soft_frame;
10097 gcc_unreachable ();
10099 gcc_unreachable ();
10102 /* You cannot eliminate from the stack pointer.
10103 In theory you could eliminate from the hard frame
10104 pointer to the stack pointer, but this will never
10105 happen, since if a stack frame is not needed the
10106 hard frame pointer will never be used. */
10107 gcc_unreachable ();
10112 /* Generate the prologue instructions for entry into an ARM function. */
10114 arm_expand_prologue (void)
10120 unsigned long live_regs_mask;
10121 unsigned long func_type;
10123 int saved_pretend_args = 0;
10124 int saved_regs = 0;
10125 unsigned HOST_WIDE_INT args_to_push;
10126 arm_stack_offsets *offsets;
10128 func_type = arm_current_func_type ();
10130 /* Naked functions don't have prologues. */
10131 if (IS_NAKED (func_type))
10134 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
10135 args_to_push = current_function_pretend_args_size;
10137 /* Compute which register we will have to save onto the stack. */
10138 live_regs_mask = arm_compute_save_reg_mask ();
10140 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10142 if (frame_pointer_needed)
10144 if (IS_INTERRUPT (func_type))
10146 /* Interrupt functions must not corrupt any registers.
10147 Creating a frame pointer however, corrupts the IP
10148 register, so we must push it first. */
10149 insn = emit_multi_reg_push (1 << IP_REGNUM);
10151 /* Do not set RTX_FRAME_RELATED_P on this insn.
10152 The dwarf stack unwinding code only wants to see one
10153 stack decrement per function, and this is not it. If
10154 this instruction is labeled as being part of the frame
10155 creation sequence then dwarf2out_frame_debug_expr will
10156 die when it encounters the assignment of IP to FP
10157 later on, since the use of SP here establishes SP as
10158 the CFA register and not IP.
10160 Anyway this instruction is not really part of the stack
10161 frame creation although it is part of the prologue. */
10163 else if (IS_NESTED (func_type))
10165 /* The Static chain register is the same as the IP register
10166 used as a scratch register during stack frame creation.
10167 To get around this need to find somewhere to store IP
10168 whilst the frame is being created. We try the following
10171 1. The last argument register.
10172 2. A slot on the stack above the frame. (This only
10173 works if the function is not a varargs function).
10174 3. Register r3, after pushing the argument registers
10177 Note - we only need to tell the dwarf2 backend about the SP
10178 adjustment in the second variant; the static chain register
10179 doesn't need to be unwound, as it doesn't contain a value
10180 inherited from the caller. */
10182 if (regs_ever_live[3] == 0)
10184 insn = gen_rtx_REG (SImode, 3);
10185 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10186 insn = emit_insn (insn);
10188 else if (args_to_push == 0)
10191 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10192 insn = gen_rtx_MEM (SImode, insn);
10193 insn = gen_rtx_SET (VOIDmode, insn, ip_rtx);
10194 insn = emit_insn (insn);
10198 /* Just tell the dwarf backend that we adjusted SP. */
10199 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10200 gen_rtx_PLUS (SImode, stack_pointer_rtx,
10201 GEN_INT (-fp_offset)));
10202 RTX_FRAME_RELATED_P (insn) = 1;
10203 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10204 dwarf, REG_NOTES (insn));
10208 /* Store the args on the stack. */
10209 if (cfun->machine->uses_anonymous_args)
10210 insn = emit_multi_reg_push
10211 ((0xf0 >> (args_to_push / 4)) & 0xf);
10214 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10215 GEN_INT (- args_to_push)));
10217 RTX_FRAME_RELATED_P (insn) = 1;
10219 saved_pretend_args = 1;
10220 fp_offset = args_to_push;
10223 /* Now reuse r3 to preserve IP. */
10224 insn = gen_rtx_REG (SImode, 3);
10225 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10226 (void) emit_insn (insn);
10232 insn = gen_rtx_PLUS (SImode, stack_pointer_rtx, GEN_INT (fp_offset));
10233 insn = gen_rtx_SET (SImode, ip_rtx, insn);
10236 insn = gen_movsi (ip_rtx, stack_pointer_rtx);
10238 insn = emit_insn (insn);
10239 RTX_FRAME_RELATED_P (insn) = 1;
10244 /* Push the argument registers, or reserve space for them. */
10245 if (cfun->machine->uses_anonymous_args)
10246 insn = emit_multi_reg_push
10247 ((0xf0 >> (args_to_push / 4)) & 0xf);
10250 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10251 GEN_INT (- args_to_push)));
10252 RTX_FRAME_RELATED_P (insn) = 1;
10255 /* If this is an interrupt service routine, and the link register
10256 is going to be pushed, and we are not creating a stack frame,
10257 (which would involve an extra push of IP and a pop in the epilogue)
10258 subtracting four from LR now will mean that the function return
10259 can be done with a single instruction. */
10260 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10261 && (live_regs_mask & (1 << LR_REGNUM)) != 0
10262 && ! frame_pointer_needed)
10263 emit_insn (gen_rtx_SET (SImode,
10264 gen_rtx_REG (SImode, LR_REGNUM),
10265 gen_rtx_PLUS (SImode,
10266 gen_rtx_REG (SImode, LR_REGNUM),
10269 if (live_regs_mask)
10271 insn = emit_multi_reg_push (live_regs_mask);
10272 saved_regs += bit_count (live_regs_mask) * 4;
10273 RTX_FRAME_RELATED_P (insn) = 1;
10277 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10278 if (regs_ever_live[reg] && ! call_used_regs [reg])
10280 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10281 insn = gen_rtx_MEM (V2SImode, insn);
10282 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10283 gen_rtx_REG (V2SImode, reg)));
10284 RTX_FRAME_RELATED_P (insn) = 1;
10288 if (! IS_VOLATILE (func_type))
10292 /* Save any floating point call-saved registers used by this
10294 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10296 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10297 if (regs_ever_live[reg] && !call_used_regs[reg])
10299 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10300 insn = gen_rtx_MEM (XFmode, insn);
10301 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10302 gen_rtx_REG (XFmode, reg)));
10303 RTX_FRAME_RELATED_P (insn) = 1;
10309 start_reg = LAST_FPA_REGNUM;
10311 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10313 if (regs_ever_live[reg] && !call_used_regs[reg])
10315 if (start_reg - reg == 3)
10317 insn = emit_sfm (reg, 4);
10318 RTX_FRAME_RELATED_P (insn) = 1;
10320 start_reg = reg - 1;
10325 if (start_reg != reg)
10327 insn = emit_sfm (reg + 1, start_reg - reg);
10328 RTX_FRAME_RELATED_P (insn) = 1;
10329 saved_regs += (start_reg - reg) * 12;
10331 start_reg = reg - 1;
10335 if (start_reg != reg)
10337 insn = emit_sfm (reg + 1, start_reg - reg);
10338 saved_regs += (start_reg - reg) * 12;
10339 RTX_FRAME_RELATED_P (insn) = 1;
10342 if (TARGET_HARD_FLOAT && TARGET_VFP)
10344 start_reg = FIRST_VFP_REGNUM;
10346 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10348 if ((!regs_ever_live[reg] || call_used_regs[reg])
10349 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10351 if (start_reg != reg)
10352 saved_regs += vfp_emit_fstmx (start_reg,
10353 (reg - start_reg) / 2);
10354 start_reg = reg + 2;
10357 if (start_reg != reg)
10358 saved_regs += vfp_emit_fstmx (start_reg,
10359 (reg - start_reg) / 2);
10363 if (frame_pointer_needed)
10365 /* Create the new frame pointer. */
10366 insn = GEN_INT (-(4 + args_to_push + fp_offset));
10367 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10368 RTX_FRAME_RELATED_P (insn) = 1;
10370 if (IS_NESTED (func_type))
10372 /* Recover the static chain register. */
10373 if (regs_ever_live [3] == 0
10374 || saved_pretend_args)
10375 insn = gen_rtx_REG (SImode, 3);
10376 else /* if (current_function_pretend_args_size == 0) */
10378 insn = gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
10380 insn = gen_rtx_MEM (SImode, insn);
10383 emit_insn (gen_rtx_SET (SImode, ip_rtx, insn));
10384 /* Add a USE to stop propagate_one_insn() from barfing. */
10385 emit_insn (gen_prologue_use (ip_rtx));
10389 offsets = arm_get_frame_offsets ();
10390 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10392 /* This add can produce multiple insns for a large constant, so we
10393 need to get tricky. */
10394 rtx last = get_last_insn ();
10396 amount = GEN_INT (offsets->saved_args + saved_regs
10397 - offsets->outgoing_args);
10399 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10403 last = last ? NEXT_INSN (last) : get_insns ();
10404 RTX_FRAME_RELATED_P (last) = 1;
10406 while (last != insn);
10408 /* If the frame pointer is needed, emit a special barrier that
10409 will prevent the scheduler from moving stores to the frame
10410 before the stack adjustment. */
10411 if (frame_pointer_needed)
10412 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10413 hard_frame_pointer_rtx));
10418 arm_load_pic_register (INVALID_REGNUM);
10420 /* If we are profiling, make sure no instructions are scheduled before
10421 the call to mcount. Similarly if the user has requested no
10422 scheduling in the prolog. */
10423 if (current_function_profile || !TARGET_SCHED_PROLOG)
10424 emit_insn (gen_blockage ());
10426 /* If the link register is being kept alive, with the return address in it,
10427 then make sure that it does not get reused by the ce2 pass. */
10428 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10430 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10431 cfun->machine->lr_save_eliminated = 1;
10435 /* If CODE is 'd', then the X is a condition operand and the instruction
10436 should only be executed if the condition is true.
10437 if CODE is 'D', then the X is a condition operand and the instruction
10438 should only be executed if the condition is false: however, if the mode
10439 of the comparison is CCFPEmode, then always execute the instruction -- we
10440 do this because in these circumstances !GE does not necessarily imply LT;
10441 in these cases the instruction pattern will take care to make sure that
10442 an instruction containing %d will follow, thereby undoing the effects of
10443 doing this instruction unconditionally.
10444 If CODE is 'N' then X is a floating point operand that must be negated
10446 If CODE is 'B' then output a bitwise inverted value of X (a const int).
10447 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
10449 arm_print_operand (FILE *stream, rtx x, int code)
10454 fputs (ASM_COMMENT_START, stream);
10458 fputs (user_label_prefix, stream);
10462 fputs (REGISTER_PREFIX, stream);
10466 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
10470 output_operand_lossage ("predicated Thumb instruction");
10473 if (current_insn_predicate != NULL)
10475 output_operand_lossage
10476 ("predicated instruction in conditional sequence");
10480 fputs (arm_condition_codes[arm_current_cc], stream);
10482 else if (current_insn_predicate)
10484 enum arm_cond_code code;
10488 output_operand_lossage ("predicated Thumb instruction");
10492 code = get_arm_condition_code (current_insn_predicate);
10493 fputs (arm_condition_codes[code], stream);
10500 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10501 r = REAL_VALUE_NEGATE (r);
10502 fprintf (stream, "%s", fp_const_from_val (&r));
10507 if (GET_CODE (x) == CONST_INT)
10510 val = ARM_SIGN_EXTEND (~INTVAL (x));
10511 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
10515 putc ('~', stream);
10516 output_addr_const (stream, x);
10521 fprintf (stream, "%s", arithmetic_instr (x, 1));
10524 /* Truncate Cirrus shift counts. */
10526 if (GET_CODE (x) == CONST_INT)
10528 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
10531 arm_print_operand (stream, x, 0);
10535 fprintf (stream, "%s", arithmetic_instr (x, 0));
10541 const char * shift = shift_op (x, &val);
10545 fprintf (stream, ", %s ", shift_op (x, &val));
10547 arm_print_operand (stream, XEXP (x, 1), 0);
10549 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
10554 /* An explanation of the 'Q', 'R' and 'H' register operands:
10556 In a pair of registers containing a DI or DF value the 'Q'
10557 operand returns the register number of the register containing
10558 the least significant part of the value. The 'R' operand returns
10559 the register number of the register containing the most
10560 significant part of the value.
10562 The 'H' operand returns the higher of the two register numbers.
10563 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
10564 same as the 'Q' operand, since the most significant part of the
10565 value is held in the lower number register. The reverse is true
10566 on systems where WORDS_BIG_ENDIAN is false.
10568 The purpose of these operands is to distinguish between cases
10569 where the endian-ness of the values is important (for example
10570 when they are added together), and cases where the endian-ness
10571 is irrelevant, but the order of register operations is important.
10572 For example when loading a value from memory into a register
10573 pair, the endian-ness does not matter. Provided that the value
10574 from the lower memory address is put into the lower numbered
10575 register, and the value from the higher address is put into the
10576 higher numbered register, the load will work regardless of whether
10577 the value being loaded is big-wordian or little-wordian. The
10578 order of the two register loads can matter however, if the address
10579 of the memory location is actually held in one of the registers
10580 being overwritten by the load. */
10582 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10584 output_operand_lossage ("invalid operand for code '%c'", code);
10588 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
10592 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10594 output_operand_lossage ("invalid operand for code '%c'", code);
10598 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
10602 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10604 output_operand_lossage ("invalid operand for code '%c'", code);
10608 asm_fprintf (stream, "%r", REGNO (x) + 1);
10612 asm_fprintf (stream, "%r",
10613 GET_CODE (XEXP (x, 0)) == REG
10614 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
10618 asm_fprintf (stream, "{%r-%r}",
10620 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
10624 /* CONST_TRUE_RTX means always -- that's the default. */
10625 if (x == const_true_rtx)
10628 if (!COMPARISON_P (x))
10630 output_operand_lossage ("invalid operand for code '%c'", code);
10634 fputs (arm_condition_codes[get_arm_condition_code (x)],
10639 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
10640 want to do that. */
10641 if (x == const_true_rtx)
10643 output_operand_lossage ("instruction never exectued");
10646 if (!COMPARISON_P (x))
10648 output_operand_lossage ("invalid operand for code '%c'", code);
10652 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
10653 (get_arm_condition_code (x))],
10657 /* Cirrus registers can be accessed in a variety of ways:
10658 single floating point (f)
10659 double floating point (d)
10661 64bit integer (dx). */
10662 case 'W': /* Cirrus register in F mode. */
10663 case 'X': /* Cirrus register in D mode. */
10664 case 'Y': /* Cirrus register in FX mode. */
10665 case 'Z': /* Cirrus register in DX mode. */
10666 gcc_assert (GET_CODE (x) == REG
10667 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
10669 fprintf (stream, "mv%s%s",
10671 : code == 'X' ? "d"
10672 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
10676 /* Print cirrus register in the mode specified by the register's mode. */
10679 int mode = GET_MODE (x);
10681 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
10683 output_operand_lossage ("invalid operand for code '%c'", code);
10687 fprintf (stream, "mv%s%s",
10688 mode == DFmode ? "d"
10689 : mode == SImode ? "fx"
10690 : mode == DImode ? "dx"
10691 : "f", reg_names[REGNO (x)] + 2);
10697 if (GET_CODE (x) != REG
10698 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
10699 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
10700 /* Bad value for wCG register number. */
10702 output_operand_lossage ("invalid operand for code '%c'", code);
10707 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
10710 /* Print an iWMMXt control register name. */
10712 if (GET_CODE (x) != CONST_INT
10714 || INTVAL (x) >= 16)
10715 /* Bad value for wC register number. */
10717 output_operand_lossage ("invalid operand for code '%c'", code);
10723 static const char * wc_reg_names [16] =
10725 "wCID", "wCon", "wCSSF", "wCASF",
10726 "wC4", "wC5", "wC6", "wC7",
10727 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
10728 "wC12", "wC13", "wC14", "wC15"
10731 fprintf (stream, wc_reg_names [INTVAL (x)]);
10735 /* Print a VFP double precision register name. */
10738 int mode = GET_MODE (x);
10741 if (mode != DImode && mode != DFmode)
10743 output_operand_lossage ("invalid operand for code '%c'", code);
10747 if (GET_CODE (x) != REG
10748 || !IS_VFP_REGNUM (REGNO (x)))
10750 output_operand_lossage ("invalid operand for code '%c'", code);
10754 num = REGNO(x) - FIRST_VFP_REGNUM;
10757 output_operand_lossage ("invalid operand for code '%c'", code);
10761 fprintf (stream, "d%d", num >> 1);
10768 output_operand_lossage ("missing operand");
10772 switch (GET_CODE (x))
10775 asm_fprintf (stream, "%r", REGNO (x));
10779 output_memory_reference_mode = GET_MODE (x);
10780 output_address (XEXP (x, 0));
10784 fprintf (stream, "#%s", fp_immediate_constant (x));
10788 gcc_assert (GET_CODE (x) != NEG);
10789 fputc ('#', stream);
10790 output_addr_const (stream, x);
10796 #ifndef AOF_ASSEMBLER
10797 /* Target hook for assembling integer objects. The ARM version needs to
10798 handle word-sized values specially. */
10800 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
10802 if (size == UNITS_PER_WORD && aligned_p)
10804 fputs ("\t.word\t", asm_out_file);
10805 output_addr_const (asm_out_file, x);
10807 /* Mark symbols as position independent. We only do this in the
10808 .text segment, not in the .data segment. */
10809 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
10810 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
10812 if (GET_CODE (x) == SYMBOL_REF
10813 && (CONSTANT_POOL_ADDRESS_P (x)
10814 || SYMBOL_REF_LOCAL_P (x)))
10815 fputs ("(GOTOFF)", asm_out_file);
10816 else if (GET_CODE (x) == LABEL_REF)
10817 fputs ("(GOTOFF)", asm_out_file);
10819 fputs ("(GOT)", asm_out_file);
10821 fputc ('\n', asm_out_file);
10825 if (arm_vector_mode_supported_p (GET_MODE (x)))
10829 gcc_assert (GET_CODE (x) == CONST_VECTOR);
10831 units = CONST_VECTOR_NUNITS (x);
10833 switch (GET_MODE (x))
10835 case V2SImode: size = 4; break;
10836 case V4HImode: size = 2; break;
10837 case V8QImode: size = 1; break;
10839 gcc_unreachable ();
10842 for (i = 0; i < units; i++)
10846 elt = CONST_VECTOR_ELT (x, i);
10848 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
10854 return default_assemble_integer (x, size, aligned_p);
10858 /* Add a function to the list of static constructors. */
10861 arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
10863 if (!TARGET_AAPCS_BASED)
10865 default_named_section_asm_out_constructor (symbol, priority);
10869 /* Put these in the .init_array section, using a special relocation. */
10871 assemble_align (POINTER_SIZE);
10872 fputs ("\t.word\t", asm_out_file);
10873 output_addr_const (asm_out_file, symbol);
10874 fputs ("(target1)\n", asm_out_file);
10878 /* A finite state machine takes care of noticing whether or not instructions
10879 can be conditionally executed, and thus decrease execution time and code
10880 size by deleting branch instructions. The fsm is controlled by
10881 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
10883 /* The state of the fsm controlling condition codes are:
10884 0: normal, do nothing special
10885 1: make ASM_OUTPUT_OPCODE not output this instruction
10886 2: make ASM_OUTPUT_OPCODE not output this instruction
10887 3: make instructions conditional
10888 4: make instructions conditional
10890 State transitions (state->state by whom under condition):
10891 0 -> 1 final_prescan_insn if the `target' is a label
10892 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
10893 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
10894 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
10895 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
10896 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
10897 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
10898 (the target insn is arm_target_insn).
10900 If the jump clobbers the conditions then we use states 2 and 4.
10902 A similar thing can be done with conditional return insns.
10904 XXX In case the `target' is an unconditional branch, this conditionalising
10905 of the instructions always reduces code size, but not always execution
10906 time. But then, I want to reduce the code size to somewhere near what
10907 /bin/cc produces. */
10909 /* Returns the index of the ARM condition code string in
10910 `arm_condition_codes'. COMPARISON should be an rtx like
10911 `(eq (...) (...))'. */
10912 static enum arm_cond_code
10913 get_arm_condition_code (rtx comparison)
10915 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
10917 enum rtx_code comp_code = GET_CODE (comparison);
10919 if (GET_MODE_CLASS (mode) != MODE_CC)
10920 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
10921 XEXP (comparison, 1));
10925 case CC_DNEmode: code = ARM_NE; goto dominance;
10926 case CC_DEQmode: code = ARM_EQ; goto dominance;
10927 case CC_DGEmode: code = ARM_GE; goto dominance;
10928 case CC_DGTmode: code = ARM_GT; goto dominance;
10929 case CC_DLEmode: code = ARM_LE; goto dominance;
10930 case CC_DLTmode: code = ARM_LT; goto dominance;
10931 case CC_DGEUmode: code = ARM_CS; goto dominance;
10932 case CC_DGTUmode: code = ARM_HI; goto dominance;
10933 case CC_DLEUmode: code = ARM_LS; goto dominance;
10934 case CC_DLTUmode: code = ARM_CC;
10937 gcc_assert (comp_code == EQ || comp_code == NE);
10939 if (comp_code == EQ)
10940 return ARM_INVERSE_CONDITION_CODE (code);
10946 case NE: return ARM_NE;
10947 case EQ: return ARM_EQ;
10948 case GE: return ARM_PL;
10949 case LT: return ARM_MI;
10950 default: gcc_unreachable ();
10956 case NE: return ARM_NE;
10957 case EQ: return ARM_EQ;
10958 default: gcc_unreachable ();
10964 case NE: return ARM_MI;
10965 case EQ: return ARM_PL;
10966 default: gcc_unreachable ();
10971 /* These encodings assume that AC=1 in the FPA system control
10972 byte. This allows us to handle all cases except UNEQ and
10976 case GE: return ARM_GE;
10977 case GT: return ARM_GT;
10978 case LE: return ARM_LS;
10979 case LT: return ARM_MI;
10980 case NE: return ARM_NE;
10981 case EQ: return ARM_EQ;
10982 case ORDERED: return ARM_VC;
10983 case UNORDERED: return ARM_VS;
10984 case UNLT: return ARM_LT;
10985 case UNLE: return ARM_LE;
10986 case UNGT: return ARM_HI;
10987 case UNGE: return ARM_PL;
10988 /* UNEQ and LTGT do not have a representation. */
10989 case UNEQ: /* Fall through. */
10990 case LTGT: /* Fall through. */
10991 default: gcc_unreachable ();
10997 case NE: return ARM_NE;
10998 case EQ: return ARM_EQ;
10999 case GE: return ARM_LE;
11000 case GT: return ARM_LT;
11001 case LE: return ARM_GE;
11002 case LT: return ARM_GT;
11003 case GEU: return ARM_LS;
11004 case GTU: return ARM_CC;
11005 case LEU: return ARM_CS;
11006 case LTU: return ARM_HI;
11007 default: gcc_unreachable ();
11013 case LTU: return ARM_CS;
11014 case GEU: return ARM_CC;
11015 default: gcc_unreachable ();
11021 case NE: return ARM_NE;
11022 case EQ: return ARM_EQ;
11023 case GE: return ARM_GE;
11024 case GT: return ARM_GT;
11025 case LE: return ARM_LE;
11026 case LT: return ARM_LT;
11027 case GEU: return ARM_CS;
11028 case GTU: return ARM_HI;
11029 case LEU: return ARM_LS;
11030 case LTU: return ARM_CC;
11031 default: gcc_unreachable ();
11034 default: gcc_unreachable ();
11039 arm_final_prescan_insn (rtx insn)
11041 /* BODY will hold the body of INSN. */
11042 rtx body = PATTERN (insn);
11044 /* This will be 1 if trying to repeat the trick, and things need to be
11045 reversed if it appears to fail. */
11048 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11049 taken are clobbered, even if the rtl suggests otherwise. It also
11050 means that we have to grub around within the jump expression to find
11051 out what the conditions are when the jump isn't taken. */
11052 int jump_clobbers = 0;
11054 /* If we start with a return insn, we only succeed if we find another one. */
11055 int seeking_return = 0;
11057 /* START_INSN will hold the insn from where we start looking. This is the
11058 first insn after the following code_label if REVERSE is true. */
11059 rtx start_insn = insn;
11061 /* If in state 4, check if the target branch is reached, in order to
11062 change back to state 0. */
11063 if (arm_ccfsm_state == 4)
11065 if (insn == arm_target_insn)
11067 arm_target_insn = NULL;
11068 arm_ccfsm_state = 0;
11073 /* If in state 3, it is possible to repeat the trick, if this insn is an
11074 unconditional branch to a label, and immediately following this branch
11075 is the previous target label which is only used once, and the label this
11076 branch jumps to is not too far off. */
11077 if (arm_ccfsm_state == 3)
11079 if (simplejump_p (insn))
11081 start_insn = next_nonnote_insn (start_insn);
11082 if (GET_CODE (start_insn) == BARRIER)
11084 /* XXX Isn't this always a barrier? */
11085 start_insn = next_nonnote_insn (start_insn);
11087 if (GET_CODE (start_insn) == CODE_LABEL
11088 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11089 && LABEL_NUSES (start_insn) == 1)
11094 else if (GET_CODE (body) == RETURN)
11096 start_insn = next_nonnote_insn (start_insn);
11097 if (GET_CODE (start_insn) == BARRIER)
11098 start_insn = next_nonnote_insn (start_insn);
11099 if (GET_CODE (start_insn) == CODE_LABEL
11100 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11101 && LABEL_NUSES (start_insn) == 1)
11104 seeking_return = 1;
11113 gcc_assert (!arm_ccfsm_state || reverse);
11114 if (GET_CODE (insn) != JUMP_INSN)
11117 /* This jump might be paralleled with a clobber of the condition codes
11118 the jump should always come first */
11119 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11120 body = XVECEXP (body, 0, 0);
11123 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11124 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11127 int fail = FALSE, succeed = FALSE;
11128 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
11129 int then_not_else = TRUE;
11130 rtx this_insn = start_insn, label = 0;
11132 /* If the jump cannot be done with one instruction, we cannot
11133 conditionally execute the instruction in the inverse case. */
11134 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11140 /* Register the insn jumped to. */
11143 if (!seeking_return)
11144 label = XEXP (SET_SRC (body), 0);
11146 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11147 label = XEXP (XEXP (SET_SRC (body), 1), 0);
11148 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11150 label = XEXP (XEXP (SET_SRC (body), 2), 0);
11151 then_not_else = FALSE;
11153 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11154 seeking_return = 1;
11155 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11157 seeking_return = 1;
11158 then_not_else = FALSE;
11161 gcc_unreachable ();
11163 /* See how many insns this branch skips, and what kind of insns. If all
11164 insns are okay, and the label or unconditional branch to the same
11165 label is not too far away, succeed. */
11166 for (insns_skipped = 0;
11167 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11171 this_insn = next_nonnote_insn (this_insn);
11175 switch (GET_CODE (this_insn))
11178 /* Succeed if it is the target label, otherwise fail since
11179 control falls in from somewhere else. */
11180 if (this_insn == label)
11184 arm_ccfsm_state = 2;
11185 this_insn = next_nonnote_insn (this_insn);
11188 arm_ccfsm_state = 1;
11196 /* Succeed if the following insn is the target label.
11198 If return insns are used then the last insn in a function
11199 will be a barrier. */
11200 this_insn = next_nonnote_insn (this_insn);
11201 if (this_insn && this_insn == label)
11205 arm_ccfsm_state = 2;
11206 this_insn = next_nonnote_insn (this_insn);
11209 arm_ccfsm_state = 1;
11217 /* The AAPCS says that conditional calls should not be
11218 used since they make interworking inefficient (the
11219 linker can't transform BL<cond> into BLX). That's
11220 only a problem if the machine has BLX. */
11227 /* Succeed if the following insn is the target label, or
11228 if the following two insns are a barrier and the
11230 this_insn = next_nonnote_insn (this_insn);
11231 if (this_insn && GET_CODE (this_insn) == BARRIER)
11232 this_insn = next_nonnote_insn (this_insn);
11234 if (this_insn && this_insn == label
11235 && insns_skipped < max_insns_skipped)
11239 arm_ccfsm_state = 2;
11240 this_insn = next_nonnote_insn (this_insn);
11243 arm_ccfsm_state = 1;
11251 /* If this is an unconditional branch to the same label, succeed.
11252 If it is to another label, do nothing. If it is conditional,
11254 /* XXX Probably, the tests for SET and the PC are
11257 scanbody = PATTERN (this_insn);
11258 if (GET_CODE (scanbody) == SET
11259 && GET_CODE (SET_DEST (scanbody)) == PC)
11261 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11262 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11264 arm_ccfsm_state = 2;
11267 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11270 /* Fail if a conditional return is undesirable (e.g. on a
11271 StrongARM), but still allow this if optimizing for size. */
11272 else if (GET_CODE (scanbody) == RETURN
11273 && !use_return_insn (TRUE, NULL)
11276 else if (GET_CODE (scanbody) == RETURN
11279 arm_ccfsm_state = 2;
11282 else if (GET_CODE (scanbody) == PARALLEL)
11284 switch (get_attr_conds (this_insn))
11294 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
11299 /* Instructions using or affecting the condition codes make it
11301 scanbody = PATTERN (this_insn);
11302 if (!(GET_CODE (scanbody) == SET
11303 || GET_CODE (scanbody) == PARALLEL)
11304 || get_attr_conds (this_insn) != CONDS_NOCOND)
11307 /* A conditional cirrus instruction must be followed by
11308 a non Cirrus instruction. However, since we
11309 conditionalize instructions in this function and by
11310 the time we get here we can't add instructions
11311 (nops), because shorten_branches() has already been
11312 called, we will disable conditionalizing Cirrus
11313 instructions to be safe. */
11314 if (GET_CODE (scanbody) != USE
11315 && GET_CODE (scanbody) != CLOBBER
11316 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11326 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11327 arm_target_label = CODE_LABEL_NUMBER (label);
11330 gcc_assert (seeking_return || arm_ccfsm_state == 2);
11332 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11334 this_insn = next_nonnote_insn (this_insn);
11335 gcc_assert (!this_insn
11336 || (GET_CODE (this_insn) != BARRIER
11337 && GET_CODE (this_insn) != CODE_LABEL));
11341 /* Oh, dear! we ran off the end.. give up. */
11342 recog (PATTERN (insn), insn, NULL);
11343 arm_ccfsm_state = 0;
11344 arm_target_insn = NULL;
11347 arm_target_insn = this_insn;
11351 gcc_assert (!reverse);
11353 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11355 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11356 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11357 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11358 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11362 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11365 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11369 if (reverse || then_not_else)
11370 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11373 /* Restore recog_data (getting the attributes of other insns can
11374 destroy this array, but final.c assumes that it remains intact
11375 across this call; since the insn has been recognized already we
11376 call recog direct). */
11377 recog (PATTERN (insn), insn, NULL);
11381 /* Returns true if REGNO is a valid register
11382 for holding a quantity of type MODE. */
11384 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11386 if (GET_MODE_CLASS (mode) == MODE_CC)
11387 return regno == CC_REGNUM || regno == VFPCC_REGNUM;
11390 /* For the Thumb we only allow values bigger than SImode in
11391 registers 0 - 6, so that there is always a second low
11392 register available to hold the upper part of the value.
11393 We probably we ought to ensure that the register is the
11394 start of an even numbered register pair. */
11395 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11397 if (IS_CIRRUS_REGNUM (regno))
11398 /* We have outlawed SI values in Cirrus registers because they
11399 reside in the lower 32 bits, but SF values reside in the
11400 upper 32 bits. This causes gcc all sorts of grief. We can't
11401 even split the registers into pairs because Cirrus SI values
11402 get sign extended to 64bits-- aldyh. */
11403 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11405 if (IS_VFP_REGNUM (regno))
11407 if (mode == SFmode || mode == SImode)
11410 /* DFmode values are only valid in even register pairs. */
11411 if (mode == DFmode)
11412 return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11416 if (IS_IWMMXT_GR_REGNUM (regno))
11417 return mode == SImode;
11419 if (IS_IWMMXT_REGNUM (regno))
11420 return VALID_IWMMXT_REG_MODE (mode);
11422 /* We allow any value to be stored in the general registers.
11423 Restrict doubleword quantities to even register pairs so that we can
11425 if (regno <= LAST_ARM_REGNUM)
11426 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11428 if ( regno == FRAME_POINTER_REGNUM
11429 || regno == ARG_POINTER_REGNUM)
11430 /* We only allow integers in the fake hard registers. */
11431 return GET_MODE_CLASS (mode) == MODE_INT;
11433 /* The only registers left are the FPA registers
11434 which we only allow to hold FP values. */
11435 return GET_MODE_CLASS (mode) == MODE_FLOAT
11436 && regno >= FIRST_FPA_REGNUM
11437 && regno <= LAST_FPA_REGNUM;
11441 arm_regno_class (int regno)
11445 if (regno == STACK_POINTER_REGNUM)
11447 if (regno == CC_REGNUM)
11454 if ( regno <= LAST_ARM_REGNUM
11455 || regno == FRAME_POINTER_REGNUM
11456 || regno == ARG_POINTER_REGNUM)
11457 return GENERAL_REGS;
11459 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
11462 if (IS_CIRRUS_REGNUM (regno))
11463 return CIRRUS_REGS;
11465 if (IS_VFP_REGNUM (regno))
11468 if (IS_IWMMXT_REGNUM (regno))
11469 return IWMMXT_REGS;
11471 if (IS_IWMMXT_GR_REGNUM (regno))
11472 return IWMMXT_GR_REGS;
11477 /* Handle a special case when computing the offset
11478 of an argument from the frame pointer. */
11480 arm_debugger_arg_offset (int value, rtx addr)
11484 /* We are only interested if dbxout_parms() failed to compute the offset. */
11488 /* We can only cope with the case where the address is held in a register. */
11489 if (GET_CODE (addr) != REG)
11492 /* If we are using the frame pointer to point at the argument, then
11493 an offset of 0 is correct. */
11494 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
11497 /* If we are using the stack pointer to point at the
11498 argument, then an offset of 0 is correct. */
11499 if ((TARGET_THUMB || !frame_pointer_needed)
11500 && REGNO (addr) == SP_REGNUM)
11503 /* Oh dear. The argument is pointed to by a register rather
11504 than being held in a register, or being stored at a known
11505 offset from the frame pointer. Since GDB only understands
11506 those two kinds of argument we must translate the address
11507 held in the register into an offset from the frame pointer.
11508 We do this by searching through the insns for the function
11509 looking to see where this register gets its value. If the
11510 register is initialized from the frame pointer plus an offset
11511 then we are in luck and we can continue, otherwise we give up.
11513 This code is exercised by producing debugging information
11514 for a function with arguments like this:
11516 double func (double a, double b, int c, double d) {return d;}
11518 Without this code the stab for parameter 'd' will be set to
11519 an offset of 0 from the frame pointer, rather than 8. */
11521 /* The if() statement says:
11523 If the insn is a normal instruction
11524 and if the insn is setting the value in a register
11525 and if the register being set is the register holding the address of the argument
11526 and if the address is computing by an addition
11527 that involves adding to a register
11528 which is the frame pointer
11533 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11535 if ( GET_CODE (insn) == INSN
11536 && GET_CODE (PATTERN (insn)) == SET
11537 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
11538 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
11539 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
11540 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
11541 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
11544 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
11553 warning (0, "unable to compute real location of stacked parameter");
11554 value = 8; /* XXX magic hack */
11560 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
11563 if ((MASK) & insn_flags) \
11564 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), \
11565 BUILT_IN_MD, NULL, NULL_TREE); \
11569 struct builtin_description
11571 const unsigned int mask;
11572 const enum insn_code icode;
11573 const char * const name;
11574 const enum arm_builtins code;
11575 const enum rtx_code comparison;
11576 const unsigned int flag;
11579 static const struct builtin_description bdesc_2arg[] =
11581 #define IWMMXT_BUILTIN(code, string, builtin) \
11582 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
11583 ARM_BUILTIN_##builtin, 0, 0 },
11585 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
11586 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
11587 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
11588 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
11589 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
11590 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
11591 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
11592 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
11593 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
11594 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
11595 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
11596 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
11597 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
11598 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
11599 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
11600 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
11601 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
11602 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
11603 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
11604 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
11605 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
11606 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
11607 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
11608 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
11609 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
11610 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
11611 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
11612 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
11613 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
11614 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
11615 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
11616 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
11617 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
11618 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
11619 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
11620 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
11621 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
11622 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
11623 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
11624 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
11625 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
11626 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
11627 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
11628 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
11629 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
11630 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
11631 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
11632 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
11633 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
11634 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
11635 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
11636 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
11637 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
11638 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
11639 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
11640 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
11641 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
11642 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
11644 #define IWMMXT_BUILTIN2(code, builtin) \
11645 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
11647 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
11648 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
11649 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
11650 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
11651 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
11652 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
11653 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
11654 IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
11655 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
11656 IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
11657 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
11658 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
11659 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
11660 IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
11661 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
11662 IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
11663 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
11664 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
11665 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
11666 IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
11667 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
11668 IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
11669 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
11670 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
11671 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
11672 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
11673 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
11674 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
11675 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
11676 IWMMXT_BUILTIN2 (rordi3, WRORDI)
11677 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
11678 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
11681 static const struct builtin_description bdesc_1arg[] =
11683 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
11684 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
11685 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
11686 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
11687 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
11688 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
11689 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
11690 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
11691 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
11692 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
11693 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
11694 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
11695 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
11696 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
11697 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
11698 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
11699 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
11700 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
11703 /* Set up all the iWMMXt builtins. This is
11704 not called if TARGET_IWMMXT is zero. */
11707 arm_init_iwmmxt_builtins (void)
11709 const struct builtin_description * d;
11711 tree endlink = void_list_node;
11713 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
11714 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
11715 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
11718 = build_function_type (integer_type_node,
11719 tree_cons (NULL_TREE, integer_type_node, endlink));
11720 tree v8qi_ftype_v8qi_v8qi_int
11721 = build_function_type (V8QI_type_node,
11722 tree_cons (NULL_TREE, V8QI_type_node,
11723 tree_cons (NULL_TREE, V8QI_type_node,
11724 tree_cons (NULL_TREE,
11727 tree v4hi_ftype_v4hi_int
11728 = build_function_type (V4HI_type_node,
11729 tree_cons (NULL_TREE, V4HI_type_node,
11730 tree_cons (NULL_TREE, integer_type_node,
11732 tree v2si_ftype_v2si_int
11733 = build_function_type (V2SI_type_node,
11734 tree_cons (NULL_TREE, V2SI_type_node,
11735 tree_cons (NULL_TREE, integer_type_node,
11737 tree v2si_ftype_di_di
11738 = build_function_type (V2SI_type_node,
11739 tree_cons (NULL_TREE, long_long_integer_type_node,
11740 tree_cons (NULL_TREE, long_long_integer_type_node,
11742 tree di_ftype_di_int
11743 = build_function_type (long_long_integer_type_node,
11744 tree_cons (NULL_TREE, long_long_integer_type_node,
11745 tree_cons (NULL_TREE, integer_type_node,
11747 tree di_ftype_di_int_int
11748 = build_function_type (long_long_integer_type_node,
11749 tree_cons (NULL_TREE, long_long_integer_type_node,
11750 tree_cons (NULL_TREE, integer_type_node,
11751 tree_cons (NULL_TREE,
11754 tree int_ftype_v8qi
11755 = build_function_type (integer_type_node,
11756 tree_cons (NULL_TREE, V8QI_type_node,
11758 tree int_ftype_v4hi
11759 = build_function_type (integer_type_node,
11760 tree_cons (NULL_TREE, V4HI_type_node,
11762 tree int_ftype_v2si
11763 = build_function_type (integer_type_node,
11764 tree_cons (NULL_TREE, V2SI_type_node,
11766 tree int_ftype_v8qi_int
11767 = build_function_type (integer_type_node,
11768 tree_cons (NULL_TREE, V8QI_type_node,
11769 tree_cons (NULL_TREE, integer_type_node,
11771 tree int_ftype_v4hi_int
11772 = build_function_type (integer_type_node,
11773 tree_cons (NULL_TREE, V4HI_type_node,
11774 tree_cons (NULL_TREE, integer_type_node,
11776 tree int_ftype_v2si_int
11777 = build_function_type (integer_type_node,
11778 tree_cons (NULL_TREE, V2SI_type_node,
11779 tree_cons (NULL_TREE, integer_type_node,
11781 tree v8qi_ftype_v8qi_int_int
11782 = build_function_type (V8QI_type_node,
11783 tree_cons (NULL_TREE, V8QI_type_node,
11784 tree_cons (NULL_TREE, integer_type_node,
11785 tree_cons (NULL_TREE,
11788 tree v4hi_ftype_v4hi_int_int
11789 = build_function_type (V4HI_type_node,
11790 tree_cons (NULL_TREE, V4HI_type_node,
11791 tree_cons (NULL_TREE, integer_type_node,
11792 tree_cons (NULL_TREE,
11795 tree v2si_ftype_v2si_int_int
11796 = build_function_type (V2SI_type_node,
11797 tree_cons (NULL_TREE, V2SI_type_node,
11798 tree_cons (NULL_TREE, integer_type_node,
11799 tree_cons (NULL_TREE,
11802 /* Miscellaneous. */
11803 tree v8qi_ftype_v4hi_v4hi
11804 = build_function_type (V8QI_type_node,
11805 tree_cons (NULL_TREE, V4HI_type_node,
11806 tree_cons (NULL_TREE, V4HI_type_node,
11808 tree v4hi_ftype_v2si_v2si
11809 = build_function_type (V4HI_type_node,
11810 tree_cons (NULL_TREE, V2SI_type_node,
11811 tree_cons (NULL_TREE, V2SI_type_node,
11813 tree v2si_ftype_v4hi_v4hi
11814 = build_function_type (V2SI_type_node,
11815 tree_cons (NULL_TREE, V4HI_type_node,
11816 tree_cons (NULL_TREE, V4HI_type_node,
11818 tree v2si_ftype_v8qi_v8qi
11819 = build_function_type (V2SI_type_node,
11820 tree_cons (NULL_TREE, V8QI_type_node,
11821 tree_cons (NULL_TREE, V8QI_type_node,
11823 tree v4hi_ftype_v4hi_di
11824 = build_function_type (V4HI_type_node,
11825 tree_cons (NULL_TREE, V4HI_type_node,
11826 tree_cons (NULL_TREE,
11827 long_long_integer_type_node,
11829 tree v2si_ftype_v2si_di
11830 = build_function_type (V2SI_type_node,
11831 tree_cons (NULL_TREE, V2SI_type_node,
11832 tree_cons (NULL_TREE,
11833 long_long_integer_type_node,
11835 tree void_ftype_int_int
11836 = build_function_type (void_type_node,
11837 tree_cons (NULL_TREE, integer_type_node,
11838 tree_cons (NULL_TREE, integer_type_node,
11841 = build_function_type (long_long_unsigned_type_node, endlink);
11843 = build_function_type (long_long_integer_type_node,
11844 tree_cons (NULL_TREE, V8QI_type_node,
11847 = build_function_type (long_long_integer_type_node,
11848 tree_cons (NULL_TREE, V4HI_type_node,
11851 = build_function_type (long_long_integer_type_node,
11852 tree_cons (NULL_TREE, V2SI_type_node,
11854 tree v2si_ftype_v4hi
11855 = build_function_type (V2SI_type_node,
11856 tree_cons (NULL_TREE, V4HI_type_node,
11858 tree v4hi_ftype_v8qi
11859 = build_function_type (V4HI_type_node,
11860 tree_cons (NULL_TREE, V8QI_type_node,
11863 tree di_ftype_di_v4hi_v4hi
11864 = build_function_type (long_long_unsigned_type_node,
11865 tree_cons (NULL_TREE,
11866 long_long_unsigned_type_node,
11867 tree_cons (NULL_TREE, V4HI_type_node,
11868 tree_cons (NULL_TREE,
11872 tree di_ftype_v4hi_v4hi
11873 = build_function_type (long_long_unsigned_type_node,
11874 tree_cons (NULL_TREE, V4HI_type_node,
11875 tree_cons (NULL_TREE, V4HI_type_node,
11878 /* Normal vector binops. */
11879 tree v8qi_ftype_v8qi_v8qi
11880 = build_function_type (V8QI_type_node,
11881 tree_cons (NULL_TREE, V8QI_type_node,
11882 tree_cons (NULL_TREE, V8QI_type_node,
11884 tree v4hi_ftype_v4hi_v4hi
11885 = build_function_type (V4HI_type_node,
11886 tree_cons (NULL_TREE, V4HI_type_node,
11887 tree_cons (NULL_TREE, V4HI_type_node,
11889 tree v2si_ftype_v2si_v2si
11890 = build_function_type (V2SI_type_node,
11891 tree_cons (NULL_TREE, V2SI_type_node,
11892 tree_cons (NULL_TREE, V2SI_type_node,
11894 tree di_ftype_di_di
11895 = build_function_type (long_long_unsigned_type_node,
11896 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11897 tree_cons (NULL_TREE,
11898 long_long_unsigned_type_node,
11901 /* Add all builtins that are more or less simple operations on two
11903 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11905 /* Use one of the operands; the target can have a different mode for
11906 mask-generating compares. */
11907 enum machine_mode mode;
11913 mode = insn_data[d->icode].operand[1].mode;
11918 type = v8qi_ftype_v8qi_v8qi;
11921 type = v4hi_ftype_v4hi_v4hi;
11924 type = v2si_ftype_v2si_v2si;
11927 type = di_ftype_di_di;
11931 gcc_unreachable ();
11934 def_mbuiltin (d->mask, d->name, type, d->code);
11937 /* Add the remaining MMX insns with somewhat more complicated types. */
11938 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
11939 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
11940 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
11942 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
11943 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
11944 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
11945 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
11946 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
11947 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
11949 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
11950 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
11951 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
11952 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
11953 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
11954 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
11956 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
11957 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
11958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
11959 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
11960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
11961 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
11963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
11964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
11965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
11966 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
11967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
11968 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
11970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
11972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
11973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
11974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
11975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
11977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
11978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
11979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
11980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
11981 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
11982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
11983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
11984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
11985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
11987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
11988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
11989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
11991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
11992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
11993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
11995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
11996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
11997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
11998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
11999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12030 arm_init_builtins (void)
12032 if (TARGET_REALLY_IWMMXT)
12033 arm_init_iwmmxt_builtins ();
12036 /* Errors in the source file can cause expand_expr to return const0_rtx
12037 where we expect a vector. To avoid crashing, use one of the vector
12038 clear instructions. */
12041 safe_vector_operand (rtx x, enum machine_mode mode)
12043 if (x != const0_rtx)
12045 x = gen_reg_rtx (mode);
12047 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12048 : gen_rtx_SUBREG (DImode, x, 0)));
12052 /* Subroutine of arm_expand_builtin to take care of binop insns. */
12055 arm_expand_binop_builtin (enum insn_code icode,
12056 tree arglist, rtx target)
12059 tree arg0 = TREE_VALUE (arglist);
12060 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12061 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12062 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12063 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12064 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12065 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12067 if (VECTOR_MODE_P (mode0))
12068 op0 = safe_vector_operand (op0, mode0);
12069 if (VECTOR_MODE_P (mode1))
12070 op1 = safe_vector_operand (op1, mode1);
12073 || GET_MODE (target) != tmode
12074 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12075 target = gen_reg_rtx (tmode);
12077 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12079 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12080 op0 = copy_to_mode_reg (mode0, op0);
12081 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12082 op1 = copy_to_mode_reg (mode1, op1);
12084 pat = GEN_FCN (icode) (target, op0, op1);
12091 /* Subroutine of arm_expand_builtin to take care of unop insns. */
12094 arm_expand_unop_builtin (enum insn_code icode,
12095 tree arglist, rtx target, int do_load)
12098 tree arg0 = TREE_VALUE (arglist);
12099 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12100 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12101 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12104 || GET_MODE (target) != tmode
12105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12106 target = gen_reg_rtx (tmode);
12108 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12111 if (VECTOR_MODE_P (mode0))
12112 op0 = safe_vector_operand (op0, mode0);
12114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12115 op0 = copy_to_mode_reg (mode0, op0);
12118 pat = GEN_FCN (icode) (target, op0);
12125 /* Expand an expression EXP that calls a built-in function,
12126 with result going to TARGET if that's convenient
12127 (and in mode MODE if that's convenient).
12128 SUBTARGET may be used as the target for computing one of EXP's operands.
12129 IGNORE is nonzero if the value is to be ignored. */
12132 arm_expand_builtin (tree exp,
12134 rtx subtarget ATTRIBUTE_UNUSED,
12135 enum machine_mode mode ATTRIBUTE_UNUSED,
12136 int ignore ATTRIBUTE_UNUSED)
12138 const struct builtin_description * d;
12139 enum insn_code icode;
12140 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12141 tree arglist = TREE_OPERAND (exp, 1);
12149 int fcode = DECL_FUNCTION_CODE (fndecl);
12151 enum machine_mode tmode;
12152 enum machine_mode mode0;
12153 enum machine_mode mode1;
12154 enum machine_mode mode2;
12158 case ARM_BUILTIN_TEXTRMSB:
12159 case ARM_BUILTIN_TEXTRMUB:
12160 case ARM_BUILTIN_TEXTRMSH:
12161 case ARM_BUILTIN_TEXTRMUH:
12162 case ARM_BUILTIN_TEXTRMSW:
12163 case ARM_BUILTIN_TEXTRMUW:
12164 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12165 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12166 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12167 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12168 : CODE_FOR_iwmmxt_textrmw);
12170 arg0 = TREE_VALUE (arglist);
12171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12172 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12173 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12174 tmode = insn_data[icode].operand[0].mode;
12175 mode0 = insn_data[icode].operand[1].mode;
12176 mode1 = insn_data[icode].operand[2].mode;
12178 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12179 op0 = copy_to_mode_reg (mode0, op0);
12180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12182 /* @@@ better error message */
12183 error ("selector must be an immediate");
12184 return gen_reg_rtx (tmode);
12187 || GET_MODE (target) != tmode
12188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12189 target = gen_reg_rtx (tmode);
12190 pat = GEN_FCN (icode) (target, op0, op1);
12196 case ARM_BUILTIN_TINSRB:
12197 case ARM_BUILTIN_TINSRH:
12198 case ARM_BUILTIN_TINSRW:
12199 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12200 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12201 : CODE_FOR_iwmmxt_tinsrw);
12202 arg0 = TREE_VALUE (arglist);
12203 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12204 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12205 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12206 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12207 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12208 tmode = insn_data[icode].operand[0].mode;
12209 mode0 = insn_data[icode].operand[1].mode;
12210 mode1 = insn_data[icode].operand[2].mode;
12211 mode2 = insn_data[icode].operand[3].mode;
12213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12214 op0 = copy_to_mode_reg (mode0, op0);
12215 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12216 op1 = copy_to_mode_reg (mode1, op1);
12217 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12219 /* @@@ better error message */
12220 error ("selector must be an immediate");
12224 || GET_MODE (target) != tmode
12225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12226 target = gen_reg_rtx (tmode);
12227 pat = GEN_FCN (icode) (target, op0, op1, op2);
12233 case ARM_BUILTIN_SETWCX:
12234 arg0 = TREE_VALUE (arglist);
12235 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12236 op0 = force_reg (SImode, expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12237 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12238 emit_insn (gen_iwmmxt_tmcr (op1, op0));
12241 case ARM_BUILTIN_GETWCX:
12242 arg0 = TREE_VALUE (arglist);
12243 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12244 target = gen_reg_rtx (SImode);
12245 emit_insn (gen_iwmmxt_tmrc (target, op0));
12248 case ARM_BUILTIN_WSHUFH:
12249 icode = CODE_FOR_iwmmxt_wshufh;
12250 arg0 = TREE_VALUE (arglist);
12251 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12252 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12253 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12254 tmode = insn_data[icode].operand[0].mode;
12255 mode1 = insn_data[icode].operand[1].mode;
12256 mode2 = insn_data[icode].operand[2].mode;
12258 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12259 op0 = copy_to_mode_reg (mode1, op0);
12260 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12262 /* @@@ better error message */
12263 error ("mask must be an immediate");
12267 || GET_MODE (target) != tmode
12268 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12269 target = gen_reg_rtx (tmode);
12270 pat = GEN_FCN (icode) (target, op0, op1);
12276 case ARM_BUILTIN_WSADB:
12277 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12278 case ARM_BUILTIN_WSADH:
12279 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12280 case ARM_BUILTIN_WSADBZ:
12281 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12282 case ARM_BUILTIN_WSADHZ:
12283 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12285 /* Several three-argument builtins. */
12286 case ARM_BUILTIN_WMACS:
12287 case ARM_BUILTIN_WMACU:
12288 case ARM_BUILTIN_WALIGN:
12289 case ARM_BUILTIN_TMIA:
12290 case ARM_BUILTIN_TMIAPH:
12291 case ARM_BUILTIN_TMIATT:
12292 case ARM_BUILTIN_TMIATB:
12293 case ARM_BUILTIN_TMIABT:
12294 case ARM_BUILTIN_TMIABB:
12295 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12296 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12297 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12298 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12299 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12300 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12301 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12302 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12303 : CODE_FOR_iwmmxt_walign);
12304 arg0 = TREE_VALUE (arglist);
12305 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12306 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12307 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12308 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12309 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12310 tmode = insn_data[icode].operand[0].mode;
12311 mode0 = insn_data[icode].operand[1].mode;
12312 mode1 = insn_data[icode].operand[2].mode;
12313 mode2 = insn_data[icode].operand[3].mode;
12315 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12316 op0 = copy_to_mode_reg (mode0, op0);
12317 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12318 op1 = copy_to_mode_reg (mode1, op1);
12319 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12320 op2 = copy_to_mode_reg (mode2, op2);
12322 || GET_MODE (target) != tmode
12323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12324 target = gen_reg_rtx (tmode);
12325 pat = GEN_FCN (icode) (target, op0, op1, op2);
12331 case ARM_BUILTIN_WZERO:
12332 target = gen_reg_rtx (DImode);
12333 emit_insn (gen_iwmmxt_clrdi (target));
12340 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12341 if (d->code == (const enum arm_builtins) fcode)
12342 return arm_expand_binop_builtin (d->icode, arglist, target);
12344 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12345 if (d->code == (const enum arm_builtins) fcode)
12346 return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12348 /* @@@ Should really do something sensible here. */
12352 /* Return the number (counting from 0) of
12353 the least significant set bit in MASK. */
12356 number_of_first_bit_set (unsigned mask)
12361 (mask & (1 << bit)) == 0;
12368 /* Emit code to push or pop registers to or from the stack. F is the
12369 assembly file. MASK is the registers to push or pop. PUSH is
12370 nonzero if we should push, and zero if we should pop. For debugging
12371 output, if pushing, adjust CFA_OFFSET by the amount of space added
12372 to the stack. REAL_REGS should have the same number of bits set as
12373 MASK, and will be used instead (in the same order) to describe which
12374 registers were saved - this is used to mark the save slots when we
12375 push high registers after moving them to low registers. */
12377 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12378 unsigned long real_regs)
12381 int lo_mask = mask & 0xFF;
12382 int pushed_words = 0;
12386 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12388 /* Special case. Do not generate a POP PC statement here, do it in
12390 thumb_exit (f, -1);
12394 fprintf (f, "\t%s\t{", push ? "push" : "pop");
12396 /* Look at the low registers first. */
12397 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12401 asm_fprintf (f, "%r", regno);
12403 if ((lo_mask & ~1) != 0)
12410 if (push && (mask & (1 << LR_REGNUM)))
12412 /* Catch pushing the LR. */
12416 asm_fprintf (f, "%r", LR_REGNUM);
12420 else if (!push && (mask & (1 << PC_REGNUM)))
12422 /* Catch popping the PC. */
12423 if (TARGET_INTERWORK || TARGET_BACKTRACE
12424 || current_function_calls_eh_return)
12426 /* The PC is never poped directly, instead
12427 it is popped into r3 and then BX is used. */
12428 fprintf (f, "}\n");
12430 thumb_exit (f, -1);
12439 asm_fprintf (f, "%r", PC_REGNUM);
12443 fprintf (f, "}\n");
12445 if (push && pushed_words && dwarf2out_do_frame ())
12447 char *l = dwarf2out_cfi_label ();
12448 int pushed_mask = real_regs;
12450 *cfa_offset += pushed_words * 4;
12451 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
12454 pushed_mask = real_regs;
12455 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
12457 if (pushed_mask & 1)
12458 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
12463 /* Generate code to return from a thumb function.
12464 If 'reg_containing_return_addr' is -1, then the return address is
12465 actually on the stack, at the stack pointer. */
12467 thumb_exit (FILE *f, int reg_containing_return_addr)
12469 unsigned regs_available_for_popping;
12470 unsigned regs_to_pop;
12472 unsigned available;
12476 int restore_a4 = FALSE;
12478 /* Compute the registers we need to pop. */
12482 if (reg_containing_return_addr == -1)
12484 regs_to_pop |= 1 << LR_REGNUM;
12488 if (TARGET_BACKTRACE)
12490 /* Restore the (ARM) frame pointer and stack pointer. */
12491 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
12495 /* If there is nothing to pop then just emit the BX instruction and
12497 if (pops_needed == 0)
12499 if (current_function_calls_eh_return)
12500 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12502 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12505 /* Otherwise if we are not supporting interworking and we have not created
12506 a backtrace structure and the function was not entered in ARM mode then
12507 just pop the return address straight into the PC. */
12508 else if (!TARGET_INTERWORK
12509 && !TARGET_BACKTRACE
12510 && !is_called_in_ARM_mode (current_function_decl)
12511 && !current_function_calls_eh_return)
12513 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
12517 /* Find out how many of the (return) argument registers we can corrupt. */
12518 regs_available_for_popping = 0;
12520 /* If returning via __builtin_eh_return, the bottom three registers
12521 all contain information needed for the return. */
12522 if (current_function_calls_eh_return)
12526 /* If we can deduce the registers used from the function's
12527 return value. This is more reliable that examining
12528 regs_ever_live[] because that will be set if the register is
12529 ever used in the function, not just if the register is used
12530 to hold a return value. */
12532 if (current_function_return_rtx != 0)
12533 mode = GET_MODE (current_function_return_rtx);
12535 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12537 size = GET_MODE_SIZE (mode);
12541 /* In a void function we can use any argument register.
12542 In a function that returns a structure on the stack
12543 we can use the second and third argument registers. */
12544 if (mode == VOIDmode)
12545 regs_available_for_popping =
12546 (1 << ARG_REGISTER (1))
12547 | (1 << ARG_REGISTER (2))
12548 | (1 << ARG_REGISTER (3));
12550 regs_available_for_popping =
12551 (1 << ARG_REGISTER (2))
12552 | (1 << ARG_REGISTER (3));
12554 else if (size <= 4)
12555 regs_available_for_popping =
12556 (1 << ARG_REGISTER (2))
12557 | (1 << ARG_REGISTER (3));
12558 else if (size <= 8)
12559 regs_available_for_popping =
12560 (1 << ARG_REGISTER (3));
12563 /* Match registers to be popped with registers into which we pop them. */
12564 for (available = regs_available_for_popping,
12565 required = regs_to_pop;
12566 required != 0 && available != 0;
12567 available &= ~(available & - available),
12568 required &= ~(required & - required))
12571 /* If we have any popping registers left over, remove them. */
12573 regs_available_for_popping &= ~available;
12575 /* Otherwise if we need another popping register we can use
12576 the fourth argument register. */
12577 else if (pops_needed)
12579 /* If we have not found any free argument registers and
12580 reg a4 contains the return address, we must move it. */
12581 if (regs_available_for_popping == 0
12582 && reg_containing_return_addr == LAST_ARG_REGNUM)
12584 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12585 reg_containing_return_addr = LR_REGNUM;
12587 else if (size > 12)
12589 /* Register a4 is being used to hold part of the return value,
12590 but we have dire need of a free, low register. */
12593 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
12596 if (reg_containing_return_addr != LAST_ARG_REGNUM)
12598 /* The fourth argument register is available. */
12599 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
12605 /* Pop as many registers as we can. */
12606 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12607 regs_available_for_popping);
12609 /* Process the registers we popped. */
12610 if (reg_containing_return_addr == -1)
12612 /* The return address was popped into the lowest numbered register. */
12613 regs_to_pop &= ~(1 << LR_REGNUM);
12615 reg_containing_return_addr =
12616 number_of_first_bit_set (regs_available_for_popping);
12618 /* Remove this register for the mask of available registers, so that
12619 the return address will not be corrupted by further pops. */
12620 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
12623 /* If we popped other registers then handle them here. */
12624 if (regs_available_for_popping)
12628 /* Work out which register currently contains the frame pointer. */
12629 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
12631 /* Move it into the correct place. */
12632 asm_fprintf (f, "\tmov\t%r, %r\n",
12633 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
12635 /* (Temporarily) remove it from the mask of popped registers. */
12636 regs_available_for_popping &= ~(1 << frame_pointer);
12637 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
12639 if (regs_available_for_popping)
12643 /* We popped the stack pointer as well,
12644 find the register that contains it. */
12645 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
12647 /* Move it into the stack register. */
12648 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
12650 /* At this point we have popped all necessary registers, so
12651 do not worry about restoring regs_available_for_popping
12652 to its correct value:
12654 assert (pops_needed == 0)
12655 assert (regs_available_for_popping == (1 << frame_pointer))
12656 assert (regs_to_pop == (1 << STACK_POINTER)) */
12660 /* Since we have just move the popped value into the frame
12661 pointer, the popping register is available for reuse, and
12662 we know that we still have the stack pointer left to pop. */
12663 regs_available_for_popping |= (1 << frame_pointer);
12667 /* If we still have registers left on the stack, but we no longer have
12668 any registers into which we can pop them, then we must move the return
12669 address into the link register and make available the register that
12671 if (regs_available_for_popping == 0 && pops_needed > 0)
12673 regs_available_for_popping |= 1 << reg_containing_return_addr;
12675 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
12676 reg_containing_return_addr);
12678 reg_containing_return_addr = LR_REGNUM;
12681 /* If we have registers left on the stack then pop some more.
12682 We know that at most we will want to pop FP and SP. */
12683 if (pops_needed > 0)
12688 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12689 regs_available_for_popping);
12691 /* We have popped either FP or SP.
12692 Move whichever one it is into the correct register. */
12693 popped_into = number_of_first_bit_set (regs_available_for_popping);
12694 move_to = number_of_first_bit_set (regs_to_pop);
12696 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
12698 regs_to_pop &= ~(1 << move_to);
12703 /* If we still have not popped everything then we must have only
12704 had one register available to us and we are now popping the SP. */
12705 if (pops_needed > 0)
12709 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12710 regs_available_for_popping);
12712 popped_into = number_of_first_bit_set (regs_available_for_popping);
12714 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
12716 assert (regs_to_pop == (1 << STACK_POINTER))
12717 assert (pops_needed == 1)
12721 /* If necessary restore the a4 register. */
12724 if (reg_containing_return_addr != LR_REGNUM)
12726 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12727 reg_containing_return_addr = LR_REGNUM;
12730 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
12733 if (current_function_calls_eh_return)
12734 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12736 /* Return to caller. */
12737 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12742 thumb_final_prescan_insn (rtx insn)
12744 if (flag_print_asm_name)
12745 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
12746 INSN_ADDRESSES (INSN_UID (insn)));
12750 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
12752 unsigned HOST_WIDE_INT mask = 0xff;
12755 if (val == 0) /* XXX */
12758 for (i = 0; i < 25; i++)
12759 if ((val & (mask << i)) == val)
12765 /* Returns nonzero if the current function contains,
12766 or might contain a far jump. */
12768 thumb_far_jump_used_p (void)
12772 /* This test is only important for leaf functions. */
12773 /* assert (!leaf_function_p ()); */
12775 /* If we have already decided that far jumps may be used,
12776 do not bother checking again, and always return true even if
12777 it turns out that they are not being used. Once we have made
12778 the decision that far jumps are present (and that hence the link
12779 register will be pushed onto the stack) we cannot go back on it. */
12780 if (cfun->machine->far_jump_used)
12783 /* If this function is not being called from the prologue/epilogue
12784 generation code then it must be being called from the
12785 INITIAL_ELIMINATION_OFFSET macro. */
12786 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
12788 /* In this case we know that we are being asked about the elimination
12789 of the arg pointer register. If that register is not being used,
12790 then there are no arguments on the stack, and we do not have to
12791 worry that a far jump might force the prologue to push the link
12792 register, changing the stack offsets. In this case we can just
12793 return false, since the presence of far jumps in the function will
12794 not affect stack offsets.
12796 If the arg pointer is live (or if it was live, but has now been
12797 eliminated and so set to dead) then we do have to test to see if
12798 the function might contain a far jump. This test can lead to some
12799 false negatives, since before reload is completed, then length of
12800 branch instructions is not known, so gcc defaults to returning their
12801 longest length, which in turn sets the far jump attribute to true.
12803 A false negative will not result in bad code being generated, but it
12804 will result in a needless push and pop of the link register. We
12805 hope that this does not occur too often.
12807 If we need doubleword stack alignment this could affect the other
12808 elimination offsets so we can't risk getting it wrong. */
12809 if (regs_ever_live [ARG_POINTER_REGNUM])
12810 cfun->machine->arg_pointer_live = 1;
12811 else if (!cfun->machine->arg_pointer_live)
12815 /* Check to see if the function contains a branch
12816 insn with the far jump attribute set. */
12817 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12819 if (GET_CODE (insn) == JUMP_INSN
12820 /* Ignore tablejump patterns. */
12821 && GET_CODE (PATTERN (insn)) != ADDR_VEC
12822 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
12823 && get_attr_far_jump (insn) == FAR_JUMP_YES
12826 /* Record the fact that we have decided that
12827 the function does use far jumps. */
12828 cfun->machine->far_jump_used = 1;
12836 /* Return nonzero if FUNC must be entered in ARM mode. */
12838 is_called_in_ARM_mode (tree func)
12840 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
12842 /* Ignore the problem about functions whose address is taken. */
12843 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
12847 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
12853 /* The bits which aren't usefully expanded as rtl. */
12855 thumb_unexpanded_epilogue (void)
12858 unsigned long live_regs_mask = 0;
12859 int high_regs_pushed = 0;
12860 int had_to_push_lr;
12864 if (return_used_this_function)
12867 if (IS_NAKED (arm_current_func_type ()))
12870 live_regs_mask = thumb_compute_save_reg_mask ();
12871 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
12873 /* If we can deduce the registers used from the function's return value.
12874 This is more reliable that examining regs_ever_live[] because that
12875 will be set if the register is ever used in the function, not just if
12876 the register is used to hold a return value. */
12878 if (current_function_return_rtx != 0)
12879 mode = GET_MODE (current_function_return_rtx);
12881 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12883 size = GET_MODE_SIZE (mode);
12885 /* The prolog may have pushed some high registers to use as
12886 work registers. e.g. the testsuite file:
12887 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
12888 compiles to produce:
12889 push {r4, r5, r6, r7, lr}
12893 as part of the prolog. We have to undo that pushing here. */
12895 if (high_regs_pushed)
12897 unsigned long mask = live_regs_mask & 0xff;
12900 /* The available low registers depend on the size of the value we are
12908 /* Oh dear! We have no low registers into which we can pop
12911 ("no low registers available for popping high registers");
12913 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
12914 if (live_regs_mask & (1 << next_hi_reg))
12917 while (high_regs_pushed)
12919 /* Find lo register(s) into which the high register(s) can
12921 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12923 if (mask & (1 << regno))
12924 high_regs_pushed--;
12925 if (high_regs_pushed == 0)
12929 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
12931 /* Pop the values into the low register(s). */
12932 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
12934 /* Move the value(s) into the high registers. */
12935 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12937 if (mask & (1 << regno))
12939 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
12942 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
12943 if (live_regs_mask & (1 << next_hi_reg))
12948 live_regs_mask &= ~0x0f00;
12951 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
12952 live_regs_mask &= 0xff;
12954 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
12956 /* Pop the return address into the PC. */
12957 if (had_to_push_lr)
12958 live_regs_mask |= 1 << PC_REGNUM;
12960 /* Either no argument registers were pushed or a backtrace
12961 structure was created which includes an adjusted stack
12962 pointer, so just pop everything. */
12963 if (live_regs_mask)
12964 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
12967 /* We have either just popped the return address into the
12968 PC or it is was kept in LR for the entire function. */
12969 if (!had_to_push_lr)
12970 thumb_exit (asm_out_file, LR_REGNUM);
12974 /* Pop everything but the return address. */
12975 if (live_regs_mask)
12976 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
12979 if (had_to_push_lr)
12983 /* We have no free low regs, so save one. */
12984 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
12988 /* Get the return address into a temporary register. */
12989 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
12990 1 << LAST_ARG_REGNUM);
12994 /* Move the return address to lr. */
12995 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
12997 /* Restore the low register. */
12998 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13003 regno = LAST_ARG_REGNUM;
13008 /* Remove the argument registers that were pushed onto the stack. */
13009 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13010 SP_REGNUM, SP_REGNUM,
13011 current_function_pretend_args_size);
13013 thumb_exit (asm_out_file, regno);
13019 /* Functions to save and restore machine-specific function data. */
13020 static struct machine_function *
13021 arm_init_machine_status (void)
13023 struct machine_function *machine;
13024 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13026 #if ARM_FT_UNKNOWN != 0
13027 machine->func_type = ARM_FT_UNKNOWN;
13032 /* Return an RTX indicating where the return address to the
13033 calling function can be found. */
13035 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13040 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13043 /* Do anything needed before RTL is emitted for each function. */
13045 arm_init_expanders (void)
13047 /* Arrange to initialize and mark the machine per-function status. */
13048 init_machine_status = arm_init_machine_status;
13050 /* This is to stop the combine pass optimizing away the alignment
13051 adjustment of va_arg. */
13052 /* ??? It is claimed that this should not be necessary. */
13054 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13058 /* Like arm_compute_initial_elimination offset. Simpler because
13059 THUMB_HARD_FRAME_POINTER isn't actually the ABI specified frame pointer. */
13062 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13064 arm_stack_offsets *offsets;
13066 offsets = arm_get_frame_offsets ();
13070 case ARG_POINTER_REGNUM:
13073 case STACK_POINTER_REGNUM:
13074 return offsets->outgoing_args - offsets->saved_args;
13076 case FRAME_POINTER_REGNUM:
13077 return offsets->soft_frame - offsets->saved_args;
13079 case THUMB_HARD_FRAME_POINTER_REGNUM:
13080 case ARM_HARD_FRAME_POINTER_REGNUM:
13081 return offsets->saved_regs - offsets->saved_args;
13084 gcc_unreachable ();
13088 case FRAME_POINTER_REGNUM:
13091 case STACK_POINTER_REGNUM:
13092 return offsets->outgoing_args - offsets->soft_frame;
13094 case THUMB_HARD_FRAME_POINTER_REGNUM:
13095 case ARM_HARD_FRAME_POINTER_REGNUM:
13096 return offsets->saved_regs - offsets->soft_frame;
13099 gcc_unreachable ();
13104 gcc_unreachable ();
13109 /* Generate the rest of a function's prologue. */
13111 thumb_expand_prologue (void)
13115 HOST_WIDE_INT amount;
13116 arm_stack_offsets *offsets;
13117 unsigned long func_type;
13119 unsigned long live_regs_mask;
13121 func_type = arm_current_func_type ();
13123 /* Naked functions don't have prologues. */
13124 if (IS_NAKED (func_type))
13127 if (IS_INTERRUPT (func_type))
13129 error ("interrupt Service Routines cannot be coded in Thumb mode");
13133 live_regs_mask = thumb_compute_save_reg_mask ();
13134 /* Load the pic register before setting the frame pointer,
13135 so we can use r7 as a temporary work register. */
13137 arm_load_pic_register (thumb_find_work_register (live_regs_mask));
13139 offsets = arm_get_frame_offsets ();
13141 if (frame_pointer_needed)
13143 insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
13144 stack_pointer_rtx));
13145 RTX_FRAME_RELATED_P (insn) = 1;
13147 else if (CALLER_INTERWORKING_SLOT_SIZE > 0)
13148 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13149 stack_pointer_rtx);
13151 amount = offsets->outgoing_args - offsets->saved_regs;
13156 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13157 GEN_INT (- amount)));
13158 RTX_FRAME_RELATED_P (insn) = 1;
13164 /* The stack decrement is too big for an immediate value in a single
13165 insn. In theory we could issue multiple subtracts, but after
13166 three of them it becomes more space efficient to place the full
13167 value in the constant pool and load into a register. (Also the
13168 ARM debugger really likes to see only one stack decrement per
13169 function). So instead we look for a scratch register into which
13170 we can load the decrement, and then we subtract this from the
13171 stack pointer. Unfortunately on the thumb the only available
13172 scratch registers are the argument registers, and we cannot use
13173 these as they may hold arguments to the function. Instead we
13174 attempt to locate a call preserved register which is used by this
13175 function. If we can find one, then we know that it will have
13176 been pushed at the start of the prologue and so we can corrupt
13178 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13179 if (live_regs_mask & (1 << regno)
13180 && !(frame_pointer_needed
13181 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13184 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
13186 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13188 /* Choose an arbitrary, non-argument low register. */
13189 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13191 /* Save it by copying it into a high, scratch register. */
13192 emit_insn (gen_movsi (spare, reg));
13193 /* Add a USE to stop propagate_one_insn() from barfing. */
13194 emit_insn (gen_prologue_use (spare));
13196 /* Decrement the stack. */
13197 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13198 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13199 stack_pointer_rtx, reg));
13200 RTX_FRAME_RELATED_P (insn) = 1;
13201 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13202 plus_constant (stack_pointer_rtx,
13204 RTX_FRAME_RELATED_P (dwarf) = 1;
13206 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13209 /* Restore the low register's original value. */
13210 emit_insn (gen_movsi (reg, spare));
13212 /* Emit a USE of the restored scratch register, so that flow
13213 analysis will not consider the restore redundant. The
13214 register won't be used again in this function and isn't
13215 restored by the epilogue. */
13216 emit_insn (gen_prologue_use (reg));
13220 reg = gen_rtx_REG (SImode, regno);
13222 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13224 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13225 stack_pointer_rtx, reg));
13226 RTX_FRAME_RELATED_P (insn) = 1;
13227 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13228 plus_constant (stack_pointer_rtx,
13230 RTX_FRAME_RELATED_P (dwarf) = 1;
13232 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13236 /* If the frame pointer is needed, emit a special barrier that
13237 will prevent the scheduler from moving stores to the frame
13238 before the stack adjustment. */
13239 if (frame_pointer_needed)
13240 emit_insn (gen_stack_tie (stack_pointer_rtx,
13241 hard_frame_pointer_rtx));
13244 if (current_function_profile || !TARGET_SCHED_PROLOG)
13245 emit_insn (gen_blockage ());
13247 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13248 if (live_regs_mask & 0xff)
13249 cfun->machine->lr_save_eliminated = 0;
13251 /* If the link register is being kept alive, with the return address in it,
13252 then make sure that it does not get reused by the ce2 pass. */
13253 if (cfun->machine->lr_save_eliminated)
13254 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13259 thumb_expand_epilogue (void)
13261 HOST_WIDE_INT amount;
13262 arm_stack_offsets *offsets;
13265 /* Naked functions don't have prologues. */
13266 if (IS_NAKED (arm_current_func_type ()))
13269 offsets = arm_get_frame_offsets ();
13270 amount = offsets->outgoing_args - offsets->saved_regs;
13272 if (frame_pointer_needed)
13273 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13277 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13278 GEN_INT (amount)));
13281 /* r3 is always free in the epilogue. */
13282 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13284 emit_insn (gen_movsi (reg, GEN_INT (amount)));
13285 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13289 /* Emit a USE (stack_pointer_rtx), so that
13290 the stack adjustment will not be deleted. */
13291 emit_insn (gen_prologue_use (stack_pointer_rtx));
13293 if (current_function_profile || !TARGET_SCHED_PROLOG)
13294 emit_insn (gen_blockage ());
13296 /* Emit a clobber for each insn that will be restored in the epilogue,
13297 so that flow2 will get register lifetimes correct. */
13298 for (regno = 0; regno < 13; regno++)
13299 if (regs_ever_live[regno] && !call_used_regs[regno])
13300 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13302 if (! regs_ever_live[LR_REGNUM])
13303 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13307 thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13309 unsigned long live_regs_mask = 0;
13310 unsigned long l_mask;
13311 unsigned high_regs_pushed = 0;
13312 int cfa_offset = 0;
13315 if (IS_NAKED (arm_current_func_type ()))
13318 if (is_called_in_ARM_mode (current_function_decl))
13322 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13323 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13325 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
13327 /* Generate code sequence to switch us into Thumb mode. */
13328 /* The .code 32 directive has already been emitted by
13329 ASM_DECLARE_FUNCTION_NAME. */
13330 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13331 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13333 /* Generate a label, so that the debugger will notice the
13334 change in instruction sets. This label is also used by
13335 the assembler to bypass the ARM code when this function
13336 is called from a Thumb encoded function elsewhere in the
13337 same file. Hence the definition of STUB_NAME here must
13338 agree with the definition in gas/config/tc-arm.c. */
13340 #define STUB_NAME ".real_start_of"
13342 fprintf (f, "\t.code\t16\n");
13344 if (arm_dllexport_name_p (name))
13345 name = arm_strip_name_encoding (name);
13347 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13348 fprintf (f, "\t.thumb_func\n");
13349 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13352 if (current_function_pretend_args_size)
13354 if (cfun->machine->uses_anonymous_args)
13358 fprintf (f, "\tpush\t{");
13360 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13362 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13363 regno <= LAST_ARG_REGNUM;
13365 asm_fprintf (f, "%r%s", regno,
13366 regno == LAST_ARG_REGNUM ? "" : ", ");
13368 fprintf (f, "}\n");
13371 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13372 SP_REGNUM, SP_REGNUM,
13373 current_function_pretend_args_size);
13375 /* We don't need to record the stores for unwinding (would it
13376 help the debugger any if we did?), but record the change in
13377 the stack pointer. */
13378 if (dwarf2out_do_frame ())
13380 char *l = dwarf2out_cfi_label ();
13382 cfa_offset = cfa_offset + current_function_pretend_args_size;
13383 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13387 /* Get the registers we are going to push. */
13388 live_regs_mask = thumb_compute_save_reg_mask ();
13389 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
13390 l_mask = live_regs_mask & 0x40ff;
13391 /* Then count how many other high registers will need to be pushed. */
13392 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13394 if (TARGET_BACKTRACE)
13397 unsigned work_register;
13399 /* We have been asked to create a stack backtrace structure.
13400 The code looks like this:
13404 0 sub SP, #16 Reserve space for 4 registers.
13405 2 push {R7} Push low registers.
13406 4 add R7, SP, #20 Get the stack pointer before the push.
13407 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
13408 8 mov R7, PC Get hold of the start of this code plus 12.
13409 10 str R7, [SP, #16] Store it.
13410 12 mov R7, FP Get hold of the current frame pointer.
13411 14 str R7, [SP, #4] Store it.
13412 16 mov R7, LR Get hold of the current return address.
13413 18 str R7, [SP, #12] Store it.
13414 20 add R7, SP, #16 Point at the start of the backtrace structure.
13415 22 mov FP, R7 Put this value into the frame pointer. */
13417 work_register = thumb_find_work_register (live_regs_mask);
13420 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
13421 SP_REGNUM, SP_REGNUM);
13423 if (dwarf2out_do_frame ())
13425 char *l = dwarf2out_cfi_label ();
13427 cfa_offset = cfa_offset + 16;
13428 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13433 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13434 offset = bit_count (l_mask);
13439 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13440 offset + 16 + current_function_pretend_args_size);
13442 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13445 /* Make sure that the instruction fetching the PC is in the right place
13446 to calculate "start of backtrace creation code + 12". */
13449 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13450 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13452 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13453 ARM_HARD_FRAME_POINTER_REGNUM);
13454 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13459 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13460 ARM_HARD_FRAME_POINTER_REGNUM);
13461 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13463 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13464 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13468 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
13469 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13471 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13473 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
13474 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
13476 /* Optimisation: If we are not pushing any low registers but we are going
13477 to push some high registers then delay our first push. This will just
13478 be a push of LR and we can combine it with the push of the first high
13480 else if ((l_mask & 0xff) != 0
13481 || (high_regs_pushed == 0 && l_mask))
13482 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13484 if (high_regs_pushed)
13486 unsigned pushable_regs;
13487 unsigned next_hi_reg;
13489 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
13490 if (live_regs_mask & (1 << next_hi_reg))
13493 pushable_regs = l_mask & 0xff;
13495 if (pushable_regs == 0)
13496 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
13498 while (high_regs_pushed > 0)
13500 unsigned long real_regs_mask = 0;
13502 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
13504 if (pushable_regs & (1 << regno))
13506 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
13508 high_regs_pushed --;
13509 real_regs_mask |= (1 << next_hi_reg);
13511 if (high_regs_pushed)
13513 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
13515 if (live_regs_mask & (1 << next_hi_reg))
13520 pushable_regs &= ~((1 << regno) - 1);
13526 /* If we had to find a work register and we have not yet
13527 saved the LR then add it to the list of regs to push. */
13528 if (l_mask == (1 << LR_REGNUM))
13530 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
13532 real_regs_mask | (1 << LR_REGNUM));
13536 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
13541 /* Handle the case of a double word load into a low register from
13542 a computed memory address. The computed address may involve a
13543 register which is overwritten by the load. */
13545 thumb_load_double_from_address (rtx *operands)
13553 gcc_assert (GET_CODE (operands[0]) == REG);
13554 gcc_assert (GET_CODE (operands[1]) == MEM);
13556 /* Get the memory address. */
13557 addr = XEXP (operands[1], 0);
13559 /* Work out how the memory address is computed. */
13560 switch (GET_CODE (addr))
13563 operands[2] = gen_rtx_MEM (SImode,
13564 plus_constant (XEXP (operands[1], 0), 4));
13566 if (REGNO (operands[0]) == REGNO (addr))
13568 output_asm_insn ("ldr\t%H0, %2", operands);
13569 output_asm_insn ("ldr\t%0, %1", operands);
13573 output_asm_insn ("ldr\t%0, %1", operands);
13574 output_asm_insn ("ldr\t%H0, %2", operands);
13579 /* Compute <address> + 4 for the high order load. */
13580 operands[2] = gen_rtx_MEM (SImode,
13581 plus_constant (XEXP (operands[1], 0), 4));
13583 output_asm_insn ("ldr\t%0, %1", operands);
13584 output_asm_insn ("ldr\t%H0, %2", operands);
13588 arg1 = XEXP (addr, 0);
13589 arg2 = XEXP (addr, 1);
13591 if (CONSTANT_P (arg1))
13592 base = arg2, offset = arg1;
13594 base = arg1, offset = arg2;
13596 gcc_assert (GET_CODE (base) == REG);
13598 /* Catch the case of <address> = <reg> + <reg> */
13599 if (GET_CODE (offset) == REG)
13601 int reg_offset = REGNO (offset);
13602 int reg_base = REGNO (base);
13603 int reg_dest = REGNO (operands[0]);
13605 /* Add the base and offset registers together into the
13606 higher destination register. */
13607 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
13608 reg_dest + 1, reg_base, reg_offset);
13610 /* Load the lower destination register from the address in
13611 the higher destination register. */
13612 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
13613 reg_dest, reg_dest + 1);
13615 /* Load the higher destination register from its own address
13617 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
13618 reg_dest + 1, reg_dest + 1);
13622 /* Compute <address> + 4 for the high order load. */
13623 operands[2] = gen_rtx_MEM (SImode,
13624 plus_constant (XEXP (operands[1], 0), 4));
13626 /* If the computed address is held in the low order register
13627 then load the high order register first, otherwise always
13628 load the low order register first. */
13629 if (REGNO (operands[0]) == REGNO (base))
13631 output_asm_insn ("ldr\t%H0, %2", operands);
13632 output_asm_insn ("ldr\t%0, %1", operands);
13636 output_asm_insn ("ldr\t%0, %1", operands);
13637 output_asm_insn ("ldr\t%H0, %2", operands);
13643 /* With no registers to worry about we can just load the value
13645 operands[2] = gen_rtx_MEM (SImode,
13646 plus_constant (XEXP (operands[1], 0), 4));
13648 output_asm_insn ("ldr\t%H0, %2", operands);
13649 output_asm_insn ("ldr\t%0, %1", operands);
13653 gcc_unreachable ();
13660 thumb_output_move_mem_multiple (int n, rtx *operands)
13667 if (REGNO (operands[4]) > REGNO (operands[5]))
13670 operands[4] = operands[5];
13673 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
13674 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
13678 if (REGNO (operands[4]) > REGNO (operands[5]))
13681 operands[4] = operands[5];
13684 if (REGNO (operands[5]) > REGNO (operands[6]))
13687 operands[5] = operands[6];
13690 if (REGNO (operands[4]) > REGNO (operands[5]))
13693 operands[4] = operands[5];
13697 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
13698 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
13702 gcc_unreachable ();
13708 /* Output a call-via instruction for thumb state. */
13710 thumb_call_via_reg (rtx reg)
13712 int regno = REGNO (reg);
13715 gcc_assert (regno < LR_REGNUM);
13717 /* If we are in the normal text section we can use a single instance
13718 per compilation unit. If we are doing function sections, then we need
13719 an entry per section, since we can't rely on reachability. */
13720 if (in_text_section ())
13722 thumb_call_reg_needed = 1;
13724 if (thumb_call_via_label[regno] == NULL)
13725 thumb_call_via_label[regno] = gen_label_rtx ();
13726 labelp = thumb_call_via_label + regno;
13730 if (cfun->machine->call_via[regno] == NULL)
13731 cfun->machine->call_via[regno] = gen_label_rtx ();
13732 labelp = cfun->machine->call_via + regno;
13735 output_asm_insn ("bl\t%a0", labelp);
13739 /* Routines for generating rtl. */
13741 thumb_expand_movmemqi (rtx *operands)
13743 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
13744 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
13745 HOST_WIDE_INT len = INTVAL (operands[2]);
13746 HOST_WIDE_INT offset = 0;
13750 emit_insn (gen_movmem12b (out, in, out, in));
13756 emit_insn (gen_movmem8b (out, in, out, in));
13762 rtx reg = gen_reg_rtx (SImode);
13763 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
13764 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
13771 rtx reg = gen_reg_rtx (HImode);
13772 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
13773 plus_constant (in, offset))));
13774 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
13782 rtx reg = gen_reg_rtx (QImode);
13783 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
13784 plus_constant (in, offset))));
13785 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
13791 thumb_reload_out_hi (rtx *operands)
13793 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
13796 /* Handle reading a half-word from memory during reload. */
13798 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
13800 gcc_unreachable ();
13803 /* Return the length of a function name prefix
13804 that starts with the character 'c'. */
13806 arm_get_strip_length (int c)
13810 ARM_NAME_ENCODING_LENGTHS
13815 /* Return a pointer to a function's name with any
13816 and all prefix encodings stripped from it. */
13818 arm_strip_name_encoding (const char *name)
13822 while ((skip = arm_get_strip_length (* name)))
13828 /* If there is a '*' anywhere in the name's prefix, then
13829 emit the stripped name verbatim, otherwise prepend an
13830 underscore if leading underscores are being used. */
13832 arm_asm_output_labelref (FILE *stream, const char *name)
13837 while ((skip = arm_get_strip_length (* name)))
13839 verbatim |= (*name == '*');
13844 fputs (name, stream);
13846 asm_fprintf (stream, "%U%s", name);
13850 arm_file_end (void)
13854 if (! thumb_call_reg_needed)
13858 asm_fprintf (asm_out_file, "\t.code 16\n");
13859 ASM_OUTPUT_ALIGN (asm_out_file, 1);
13861 for (regno = 0; regno < LR_REGNUM; regno++)
13863 rtx label = thumb_call_via_label[regno];
13867 targetm.asm_out.internal_label (asm_out_file, "L",
13868 CODE_LABEL_NUMBER (label));
13869 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13876 #ifdef AOF_ASSEMBLER
13877 /* Special functions only needed when producing AOF syntax assembler. */
13881 struct pic_chain * next;
13882 const char * symname;
13885 static struct pic_chain * aof_pic_chain = NULL;
13888 aof_pic_entry (rtx x)
13890 struct pic_chain ** chainp;
13893 if (aof_pic_label == NULL_RTX)
13895 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
13898 for (offset = 0, chainp = &aof_pic_chain; *chainp;
13899 offset += 4, chainp = &(*chainp)->next)
13900 if ((*chainp)->symname == XSTR (x, 0))
13901 return plus_constant (aof_pic_label, offset);
13903 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
13904 (*chainp)->next = NULL;
13905 (*chainp)->symname = XSTR (x, 0);
13906 return plus_constant (aof_pic_label, offset);
13910 aof_dump_pic_table (FILE *f)
13912 struct pic_chain * chain;
13914 if (aof_pic_chain == NULL)
13917 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
13918 PIC_OFFSET_TABLE_REGNUM,
13919 PIC_OFFSET_TABLE_REGNUM);
13920 fputs ("|x$adcons|\n", f);
13922 for (chain = aof_pic_chain; chain; chain = chain->next)
13924 fputs ("\tDCD\t", f);
13925 assemble_name (f, chain->symname);
13930 int arm_text_section_count = 1;
13933 aof_text_section (void )
13935 static char buf[100];
13936 sprintf (buf, "\tAREA |C$$code%d|, CODE, READONLY",
13937 arm_text_section_count++);
13939 strcat (buf, ", PIC, REENTRANT");
13943 static int arm_data_section_count = 1;
13946 aof_data_section (void)
13948 static char buf[100];
13949 sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
13953 /* The AOF assembler is religiously strict about declarations of
13954 imported and exported symbols, so that it is impossible to declare
13955 a function as imported near the beginning of the file, and then to
13956 export it later on. It is, however, possible to delay the decision
13957 until all the functions in the file have been compiled. To get
13958 around this, we maintain a list of the imports and exports, and
13959 delete from it any that are subsequently defined. At the end of
13960 compilation we spit the remainder of the list out before the END
13965 struct import * next;
13969 static struct import * imports_list = NULL;
13972 aof_add_import (const char *name)
13974 struct import * new;
13976 for (new = imports_list; new; new = new->next)
13977 if (new->name == name)
13980 new = (struct import *) xmalloc (sizeof (struct import));
13981 new->next = imports_list;
13982 imports_list = new;
13987 aof_delete_import (const char *name)
13989 struct import ** old;
13991 for (old = &imports_list; *old; old = & (*old)->next)
13993 if ((*old)->name == name)
13995 *old = (*old)->next;
14001 int arm_main_function = 0;
14004 aof_dump_imports (FILE *f)
14006 /* The AOF assembler needs this to cause the startup code to be extracted
14007 from the library. Brining in __main causes the whole thing to work
14009 if (arm_main_function)
14012 fputs ("\tIMPORT __main\n", f);
14013 fputs ("\tDCD __main\n", f);
14016 /* Now dump the remaining imports. */
14017 while (imports_list)
14019 fprintf (f, "\tIMPORT\t");
14020 assemble_name (f, imports_list->name);
14022 imports_list = imports_list->next;
14027 aof_globalize_label (FILE *stream, const char *name)
14029 default_globalize_label (stream, name);
14030 if (! strcmp (name, "main"))
14031 arm_main_function = 1;
14035 aof_file_start (void)
14037 fputs ("__r0\tRN\t0\n", asm_out_file);
14038 fputs ("__a1\tRN\t0\n", asm_out_file);
14039 fputs ("__a2\tRN\t1\n", asm_out_file);
14040 fputs ("__a3\tRN\t2\n", asm_out_file);
14041 fputs ("__a4\tRN\t3\n", asm_out_file);
14042 fputs ("__v1\tRN\t4\n", asm_out_file);
14043 fputs ("__v2\tRN\t5\n", asm_out_file);
14044 fputs ("__v3\tRN\t6\n", asm_out_file);
14045 fputs ("__v4\tRN\t7\n", asm_out_file);
14046 fputs ("__v5\tRN\t8\n", asm_out_file);
14047 fputs ("__v6\tRN\t9\n", asm_out_file);
14048 fputs ("__sl\tRN\t10\n", asm_out_file);
14049 fputs ("__fp\tRN\t11\n", asm_out_file);
14050 fputs ("__ip\tRN\t12\n", asm_out_file);
14051 fputs ("__sp\tRN\t13\n", asm_out_file);
14052 fputs ("__lr\tRN\t14\n", asm_out_file);
14053 fputs ("__pc\tRN\t15\n", asm_out_file);
14054 fputs ("__f0\tFN\t0\n", asm_out_file);
14055 fputs ("__f1\tFN\t1\n", asm_out_file);
14056 fputs ("__f2\tFN\t2\n", asm_out_file);
14057 fputs ("__f3\tFN\t3\n", asm_out_file);
14058 fputs ("__f4\tFN\t4\n", asm_out_file);
14059 fputs ("__f5\tFN\t5\n", asm_out_file);
14060 fputs ("__f6\tFN\t6\n", asm_out_file);
14061 fputs ("__f7\tFN\t7\n", asm_out_file);
14066 aof_file_end (void)
14069 aof_dump_pic_table (asm_out_file);
14071 aof_dump_imports (asm_out_file);
14072 fputs ("\tEND\n", asm_out_file);
14074 #endif /* AOF_ASSEMBLER */
14077 /* Symbols in the text segment can be accessed without indirecting via the
14078 constant pool; it may take an extra binary operation, but this is still
14079 faster than indirecting via memory. Don't do this when not optimizing,
14080 since we won't be calculating al of the offsets necessary to do this
14084 arm_encode_section_info (tree decl, rtx rtl, int first)
14086 /* This doesn't work with AOF syntax, since the string table may be in
14087 a different AREA. */
14088 #ifndef AOF_ASSEMBLER
14089 if (optimize > 0 && TREE_CONSTANT (decl))
14090 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14093 /* If we are referencing a function that is weak then encode a long call
14094 flag in the function name, otherwise if the function is static or
14095 or known to be defined in this file then encode a short call flag. */
14096 if (first && DECL_P (decl))
14098 if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14099 arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14100 else if (! TREE_PUBLIC (decl))
14101 arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14104 #endif /* !ARM_PE */
14107 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14109 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14110 && !strcmp (prefix, "L"))
14112 arm_ccfsm_state = 0;
14113 arm_target_insn = NULL;
14115 default_internal_label (stream, prefix, labelno);
14118 /* Output code to add DELTA to the first argument, and then jump
14119 to FUNCTION. Used for C++ multiple inheritance. */
14121 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14122 HOST_WIDE_INT delta,
14123 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14126 static int thunk_label = 0;
14128 int mi_delta = delta;
14129 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14131 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14134 mi_delta = - mi_delta;
14137 int labelno = thunk_label++;
14138 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14139 fputs ("\tldr\tr12, ", file);
14140 assemble_name (file, label);
14141 fputc ('\n', file);
14143 while (mi_delta != 0)
14145 if ((mi_delta & (3 << shift)) == 0)
14149 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14150 mi_op, this_regno, this_regno,
14151 mi_delta & (0xff << shift));
14152 mi_delta &= ~(0xff << shift);
14158 fprintf (file, "\tbx\tr12\n");
14159 ASM_OUTPUT_ALIGN (file, 2);
14160 assemble_name (file, label);
14161 fputs (":\n", file);
14162 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14166 fputs ("\tb\t", file);
14167 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14168 if (NEED_PLT_RELOC)
14169 fputs ("(PLT)", file);
14170 fputc ('\n', file);
14175 arm_emit_vector_const (FILE *file, rtx x)
14178 const char * pattern;
14180 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14182 switch (GET_MODE (x))
14184 case V2SImode: pattern = "%08x"; break;
14185 case V4HImode: pattern = "%04x"; break;
14186 case V8QImode: pattern = "%02x"; break;
14187 default: gcc_unreachable ();
14190 fprintf (file, "0x");
14191 for (i = CONST_VECTOR_NUNITS (x); i--;)
14195 element = CONST_VECTOR_ELT (x, i);
14196 fprintf (file, pattern, INTVAL (element));
14203 arm_output_load_gr (rtx *operands)
14210 if (GET_CODE (operands [1]) != MEM
14211 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14212 || GET_CODE (reg = XEXP (sum, 0)) != REG
14213 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14214 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14215 return "wldrw%?\t%0, %1";
14217 /* Fix up an out-of-range load of a GR register. */
14218 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
14219 wcgr = operands[0];
14221 output_asm_insn ("ldr%?\t%0, %1", operands);
14223 operands[0] = wcgr;
14225 output_asm_insn ("tmcr%?\t%0, %1", operands);
14226 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
14232 arm_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
14233 int incoming ATTRIBUTE_UNUSED)
14236 /* FIXME: The ARM backend has special code to handle structure
14237 returns, and will reserve its own hidden first argument. So
14238 if this macro is enabled a *second* hidden argument will be
14239 reserved, which will break binary compatibility with old
14240 toolchains and also thunk handling. One day this should be
14244 /* Register in which address to store a structure value
14245 is passed to a function. */
14246 return gen_rtx_REG (Pmode, ARG_REGISTER (1));
14250 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
14252 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
14253 named arg and all anonymous args onto the stack.
14254 XXX I know the prologue shouldn't be pushing registers, but it is faster
14258 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
14259 enum machine_mode mode ATTRIBUTE_UNUSED,
14260 tree type ATTRIBUTE_UNUSED,
14262 int second_time ATTRIBUTE_UNUSED)
14264 cfun->machine->uses_anonymous_args = 1;
14265 if (cum->nregs < NUM_ARG_REGS)
14266 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
14269 /* Return nonzero if the CONSUMER instruction (a store) does not need
14270 PRODUCER's value to calculate the address. */
14273 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
14275 rtx value = PATTERN (producer);
14276 rtx addr = PATTERN (consumer);
14278 if (GET_CODE (value) == COND_EXEC)
14279 value = COND_EXEC_CODE (value);
14280 if (GET_CODE (value) == PARALLEL)
14281 value = XVECEXP (value, 0, 0);
14282 value = XEXP (value, 0);
14283 if (GET_CODE (addr) == COND_EXEC)
14284 addr = COND_EXEC_CODE (addr);
14285 if (GET_CODE (addr) == PARALLEL)
14286 addr = XVECEXP (addr, 0, 0);
14287 addr = XEXP (addr, 0);
14289 return !reg_overlap_mentioned_p (value, addr);
14292 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14293 have an early register shift value or amount dependency on the
14294 result of PRODUCER. */
14297 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
14299 rtx value = PATTERN (producer);
14300 rtx op = PATTERN (consumer);
14303 if (GET_CODE (value) == COND_EXEC)
14304 value = COND_EXEC_CODE (value);
14305 if (GET_CODE (value) == PARALLEL)
14306 value = XVECEXP (value, 0, 0);
14307 value = XEXP (value, 0);
14308 if (GET_CODE (op) == COND_EXEC)
14309 op = COND_EXEC_CODE (op);
14310 if (GET_CODE (op) == PARALLEL)
14311 op = XVECEXP (op, 0, 0);
14314 early_op = XEXP (op, 0);
14315 /* This is either an actual independent shift, or a shift applied to
14316 the first operand of another operation. We want the whole shift
14318 if (GET_CODE (early_op) == REG)
14321 return !reg_overlap_mentioned_p (value, early_op);
14324 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14325 have an early register shift value dependency on the result of
14329 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
14331 rtx value = PATTERN (producer);
14332 rtx op = PATTERN (consumer);
14335 if (GET_CODE (value) == COND_EXEC)
14336 value = COND_EXEC_CODE (value);
14337 if (GET_CODE (value) == PARALLEL)
14338 value = XVECEXP (value, 0, 0);
14339 value = XEXP (value, 0);
14340 if (GET_CODE (op) == COND_EXEC)
14341 op = COND_EXEC_CODE (op);
14342 if (GET_CODE (op) == PARALLEL)
14343 op = XVECEXP (op, 0, 0);
14346 early_op = XEXP (op, 0);
14348 /* This is either an actual independent shift, or a shift applied to
14349 the first operand of another operation. We want the value being
14350 shifted, in either case. */
14351 if (GET_CODE (early_op) != REG)
14352 early_op = XEXP (early_op, 0);
14354 return !reg_overlap_mentioned_p (value, early_op);
14357 /* Return nonzero if the CONSUMER (a mul or mac op) does not
14358 have an early register mult dependency on the result of
14362 arm_no_early_mul_dep (rtx producer, rtx consumer)
14364 rtx value = PATTERN (producer);
14365 rtx op = PATTERN (consumer);
14367 if (GET_CODE (value) == COND_EXEC)
14368 value = COND_EXEC_CODE (value);
14369 if (GET_CODE (value) == PARALLEL)
14370 value = XVECEXP (value, 0, 0);
14371 value = XEXP (value, 0);
14372 if (GET_CODE (op) == COND_EXEC)
14373 op = COND_EXEC_CODE (op);
14374 if (GET_CODE (op) == PARALLEL)
14375 op = XVECEXP (op, 0, 0);
14378 return (GET_CODE (op) == PLUS
14379 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
14383 /* We can't rely on the caller doing the proper promotion when
14384 using APCS or ATPCS. */
14387 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
14389 return !TARGET_AAPCS_BASED;
14393 /* AAPCS based ABIs use short enums by default. */
14396 arm_default_short_enums (void)
14398 return TARGET_AAPCS_BASED;
14402 /* AAPCS requires that anonymous bitfields affect structure alignment. */
14405 arm_align_anon_bitfield (void)
14407 return TARGET_AAPCS_BASED;
14411 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
14414 arm_cxx_guard_type (void)
14416 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
14420 /* The EABI says test the least significan bit of a guard variable. */
14423 arm_cxx_guard_mask_bit (void)
14425 return TARGET_AAPCS_BASED;
14429 /* The EABI specifies that all array cookies are 8 bytes long. */
14432 arm_get_cookie_size (tree type)
14436 if (!TARGET_AAPCS_BASED)
14437 return default_cxx_get_cookie_size (type);
14439 size = build_int_cst (sizetype, 8);
14444 /* The EABI says that array cookies should also contain the element size. */
14447 arm_cookie_has_size (void)
14449 return TARGET_AAPCS_BASED;
14453 /* The EABI says constructors and destructors should return a pointer to
14454 the object constructed/destroyed. */
14457 arm_cxx_cdtor_returns_this (void)
14459 return TARGET_AAPCS_BASED;
14462 /* The EABI says that an inline function may never be the key
14466 arm_cxx_key_method_may_be_inline (void)
14468 return !TARGET_AAPCS_BASED;
14472 arm_cxx_determine_class_data_visibility (tree decl)
14474 if (!TARGET_AAPCS_BASED)
14477 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
14478 is exported. However, on systems without dynamic vague linkage,
14479 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
14480 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
14481 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
14483 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
14484 DECL_VISIBILITY_SPECIFIED (decl) = 1;
14488 arm_cxx_class_data_always_comdat (void)
14490 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
14491 vague linkage if the class has no key function. */
14492 return !TARGET_AAPCS_BASED;
14496 /* The EABI says __aeabi_atexit should be used to register static
14500 arm_cxx_use_aeabi_atexit (void)
14502 return TARGET_AAPCS_BASED;
14507 arm_set_return_address (rtx source, rtx scratch)
14509 arm_stack_offsets *offsets;
14510 HOST_WIDE_INT delta;
14512 unsigned long saved_regs;
14514 saved_regs = arm_compute_save_reg_mask ();
14516 if ((saved_regs & (1 << LR_REGNUM)) == 0)
14517 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14520 if (frame_pointer_needed)
14521 addr = plus_constant(hard_frame_pointer_rtx, -4);
14524 /* LR will be the first saved register. */
14525 offsets = arm_get_frame_offsets ();
14526 delta = offsets->outgoing_args - (offsets->frame + 4);
14531 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
14532 GEN_INT (delta & ~4095)));
14537 addr = stack_pointer_rtx;
14539 addr = plus_constant (addr, delta);
14541 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14547 thumb_set_return_address (rtx source, rtx scratch)
14549 arm_stack_offsets *offsets;
14550 HOST_WIDE_INT delta;
14553 unsigned long mask;
14555 emit_insn (gen_rtx_USE (VOIDmode, source));
14557 mask = thumb_compute_save_reg_mask ();
14558 if (mask & (1 << LR_REGNUM))
14560 offsets = arm_get_frame_offsets ();
14562 /* Find the saved regs. */
14563 if (frame_pointer_needed)
14565 delta = offsets->soft_frame - offsets->saved_args;
14566 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
14570 delta = offsets->outgoing_args - offsets->saved_args;
14573 /* Allow for the stack frame. */
14574 if (TARGET_BACKTRACE)
14576 /* The link register is always the first saved register. */
14579 /* Construct the address. */
14580 addr = gen_rtx_REG (SImode, reg);
14581 if ((reg != SP_REGNUM && delta >= 128)
14584 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
14585 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
14589 addr = plus_constant (addr, delta);
14591 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14594 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14597 /* Implements target hook vector_mode_supported_p. */
14599 arm_vector_mode_supported_p (enum machine_mode mode)
14601 if ((mode == V2SImode)
14602 || (mode == V4HImode)
14603 || (mode == V8QImode))
14609 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
14610 ARM insns and therefore guarantee that the shift count is modulo 256.
14611 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
14612 guarantee no particular behavior for out-of-range counts. */
14614 static unsigned HOST_WIDE_INT
14615 arm_shift_truncation_mask (enum machine_mode mode)
14617 return mode == SImode ? 255 : 0;
14621 /* Map internal gcc register numbers to DWARF2 register numbers. */
14624 arm_dbx_register_number (unsigned int regno)
14629 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
14630 compatibility. The EABI defines them as registers 96-103. */
14631 if (IS_FPA_REGNUM (regno))
14632 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
14634 if (IS_VFP_REGNUM (regno))
14635 return 64 + regno - FIRST_VFP_REGNUM;
14637 if (IS_IWMMXT_GR_REGNUM (regno))
14638 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
14640 if (IS_IWMMXT_REGNUM (regno))
14641 return 112 + regno - FIRST_IWMMXT_REGNUM;
14643 gcc_unreachable ();