#include "target-def.h"
#include "debug.h"
#include "langhooks.h"
+#include "df.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
rtx);
static void arm_reorg (void);
static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
-static int current_file_function_operand (rtx);
static unsigned long arm_compute_save_reg0_reg12_mask (void);
static unsigned long arm_compute_save_reg_mask (void);
static unsigned long arm_isr_value (tree);
tree, bool);
#ifdef OBJECT_FORMAT_ELF
-static void arm_elf_asm_constructor (rtx, int);
+static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
#endif
#ifndef ARM_PE
static void arm_encode_section_info (tree, rtx, int);
static bool arm_cxx_use_aeabi_atexit (void);
static void arm_init_libfuncs (void);
static bool arm_handle_option (size_t, const char *, int);
+static void arm_target_help (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
static bool arm_cannot_copy_insn_p (rtx);
static bool arm_tls_symbol_p (rtx x);
+static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
\f
/* Initialize the GCC target structure. */
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
#undef TARGET_HANDLE_OPTION
#define TARGET_HANDLE_OPTION arm_handle_option
+#undef TARGET_HELP
+#define TARGET_HELP arm_target_help
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
#endif
#undef TARGET_CANNOT_FORCE_CONST_MEM
-#define TARGET_CANNOT_FORCE_CONST_MEM arm_tls_referenced_p
+#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE arm_mangle_type
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
+#endif
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
profile. */
#define FL_DIV (1 << 18) /* Hardware divide. */
+#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
+#define FL_NEON (1 << 20) /* Neon instructions. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
{"fpe2", FPUTYPE_FPA_EMU2},
{"fpe3", FPUTYPE_FPA_EMU2},
{"maverick", FPUTYPE_MAVERICK},
- {"vfp", FPUTYPE_VFP}
+ {"vfp", FPUTYPE_VFP},
+ {"vfp3", FPUTYPE_VFP3},
+ {"neon", FPUTYPE_NEON}
};
ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
- ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
+ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
+ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
+ ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
};
}
}
+static void
+arm_target_help (void)
+{
+ int i;
+ static int columns = 0;
+ int remaining;
+
+ /* If we have not done so already, obtain the desired maximum width of
+ the output. Note - this is a duplication of the code at the start of
+ gcc/opts.c:print_specific_help() - the two copies should probably be
+ replaced by a single function. */
+ if (columns == 0)
+ {
+ const char *p;
+
+ GET_ENVIRONMENT (p, "COLUMNS");
+ if (p != NULL)
+ {
+ int value = atoi (p);
+
+ if (value > 0)
+ columns = value;
+ }
+
+ if (columns == 0)
+ /* Use a reasonable default. */
+ columns = 80;
+ }
+
+ printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
+
+ /* The - 2 is because we know that the last entry in the array is NULL. */
+ i = ARRAY_SIZE (all_cores) - 2;
+ gcc_assert (i > 0);
+ printf (" %s", all_cores[i].name);
+ remaining = columns - (strlen (all_cores[i].name) + 4);
+ gcc_assert (remaining >= 0);
+
+ while (i--)
+ {
+ int len = strlen (all_cores[i].name);
+
+ if (remaining > len + 2)
+ {
+ printf (", %s", all_cores[i].name);
+ remaining -= len + 2;
+ }
+ else
+ {
+ if (remaining > 0)
+ printf (",");
+ printf ("\n %s", all_cores[i].name);
+ remaining = columns - (len + 4);
+ }
+ }
+
+ printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
+
+ i = ARRAY_SIZE (all_architectures) - 2;
+ gcc_assert (i > 0);
+
+ printf (" %s", all_architectures[i].name);
+ remaining = columns - (strlen (all_architectures[i].name) + 4);
+ gcc_assert (remaining >= 0);
+
+ while (i--)
+ {
+ int len = strlen (all_architectures[i].name);
+
+ if (remaining > len + 2)
+ {
+ printf (", %s", all_architectures[i].name);
+ remaining -= len + 2;
+ }
+ else
+ {
+ if (remaining > 0)
+ printf (",");
+ printf ("\n %s", all_architectures[i].name);
+ remaining = columns - (len + 4);
+ }
+ }
+ printf ("\n");
+
+}
+
/* Fix up any incompatible options that the user has specified.
This has now turned into a maze. */
void
ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
}
+ if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
+ {
+ error ("RTP PIC is incompatible with Thumb");
+ flag_pic = 0;
+ }
+
/* If stack checking is disabled, we can use r10 as the PIC register,
which keeps r9 available. The EABI specifies r9 as the PIC register. */
if (flag_pic && TARGET_SINGLE_PIC_BASE)
- arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
+ {
+ if (TARGET_VXWORKS_RTP)
+ warning (0, "RTP PIC is incompatible with -msingle-pic-base");
+ arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
+ }
+
+ if (flag_pic && TARGET_VXWORKS_RTP)
+ arm_pic_register = 9;
if (arm_pic_register_string != NULL)
{
else if (pic_register < 0 || call_used_regs[pic_register]
|| pic_register == HARD_FRAME_POINTER_REGNUM
|| pic_register == STACK_POINTER_REGNUM
- || pic_register >= PC_REGNUM)
+ || pic_register >= PC_REGNUM
+ || (TARGET_VXWORKS_RTP
+ && (unsigned int) pic_register != arm_pic_register))
error ("unable to use '%s' for PIC register", arm_pic_register_string);
else
arm_pic_register = pic_register;
if (flag_pic
&& arm_pic_register != INVALID_REGNUM
- && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
+ && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
return 0;
}
since this also requires an insn. */
if (TARGET_HARD_FLOAT && TARGET_FPA)
for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
- if (regs_ever_live[regno] && !call_used_regs[regno])
+ if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
return 0;
/* Likewise VFP regs. */
if (TARGET_HARD_FLOAT && TARGET_VFP)
for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
- if (regs_ever_live[regno] && !call_used_regs[regno])
+ if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
return 0;
if (TARGET_REALLY_IWMMXT)
for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
- if (regs_ever_live[regno] && ! call_used_regs [regno])
+ if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
return 0;
return 1;
{
HOST_WIDE_INT size;
+ size = int_size_in_bytes (type);
+
+ /* Vector values should be returned using ARM registers, not memory (unless
+ they're over 16 bytes, which will break since we only have four
+ call-clobbered registers to play with). */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ return (size < 0 || size > (4 * UNITS_PER_WORD));
+
if (!AGGREGATE_TYPE_P (type) &&
- (TREE_CODE (type) != VECTOR_TYPE) &&
!(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
/* All simple types are returned in registers.
For AAPCS, complex types are treated the same as aggregates. */
return 0;
- size = int_size_in_bytes (type);
-
if (arm_abi != ARM_ABI_APCS)
{
/* ATPCS and later return aggregate types in memory only if they are
return (size < 0 || size > UNITS_PER_WORD);
}
- /* To maximize backwards compatibility with previous versions of gcc,
- return vectors up to 4 words in registers. */
- if (TREE_CODE (type) == VECTOR_TYPE)
- return (size < 0 || size > (4 * UNITS_PER_WORD));
-
/* For the arm-wince targets we choose to be compatible with Microsoft's
ARM and Thumb compilers, which always return aggregates in memory. */
#ifndef ARM_WINCE
pcum->iwmmxt_nregs = 0;
pcum->can_split = true;
- pcum->call_cookie = CALL_NORMAL;
-
- if (TARGET_LONG_CALLS)
- pcum->call_cookie = CALL_LONG;
-
- /* Check for long call/short call attributes. The attributes
- override any command line option. */
- if (fntype)
- {
- if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
- pcum->call_cookie = CALL_SHORT;
- else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
- pcum->call_cookie = CALL_LONG;
- }
-
/* Varargs vectors are treated the same as long long.
named_count avoids having to change the way arm handles 'named' */
pcum->named_count = 0;
pcum->nregs++;
if (mode == VOIDmode)
- /* Compute operand 2 of the call insn. */
- return GEN_INT (pcum->call_cookie);
+ /* Pick an arbitrary value for operand 2 of the call insn. */
+ return const0_rtx;
/* Only allow splitting an arg between regs and memory if all preceding
args were allocated to regs. For args passed by reference we only count
{
int nregs = pcum->nregs;
- if (arm_vector_mode_supported_p (mode))
+ if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
return 0;
if (NUM_ARG_REGS > nregs
return 1;
}
-/* Encode long_call or short_call attribute by prefixing
- symbol name in DECL with a special character FLAG. */
-void
-arm_encode_call_attribute (tree decl, int flag)
-{
- const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
- int len = strlen (str);
- char * newstr;
-
- /* Do not allow weak functions to be treated as short call. */
- if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
- return;
-
- newstr = alloca (len + 2);
- newstr[0] = flag;
- strcpy (newstr + 1, str);
-
- newstr = (char *) ggc_alloc_string (newstr, len + 1);
- XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
-}
-
/* Assigns default attributes to newly defined type. This is used to
set short_call/long_call attributes for function types of
functions defined inside corresponding #pragma scopes. */
}
}
\f
-/* Return 1 if the operand is a SYMBOL_REF for a function known to be
- defined within the current compilation unit. If this cannot be
- determined, then 0 is returned. */
-static int
-current_file_function_operand (rtx sym_ref)
+/* Return true if DECL is known to be linked into section SECTION. */
+
+static bool
+arm_function_in_section_p (tree decl, section *section)
{
- /* This is a bit of a fib. A function will have a short call flag
- applied to its name if it has the short call attribute, or it has
- already been defined within the current compilation unit. */
- if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
- return 1;
+ /* We can only be certain about functions defined in the same
+ compilation unit. */
+ if (!TREE_STATIC (decl))
+ return false;
- /* The current function is always defined within the current compilation
- unit. If it s a weak definition however, then this may not be the real
- definition of the function, and so we have to say no. */
- if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
- && !DECL_WEAK (current_function_decl))
- return 1;
+ /* Make sure that SYMBOL always binds to the definition in this
+ compilation unit. */
+ if (!targetm.binds_local_p (decl))
+ return false;
- /* We cannot make the determination - default to returning 0. */
- return 0;
+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
+ if (!DECL_SECTION_NAME (decl))
+ {
+ /* Only cater for unit-at-a-time mode, where we know that the user
+ cannot later specify a section for DECL. */
+ if (!flag_unit_at_a_time)
+ return false;
+
+ /* Make sure that we will not create a unique section for DECL. */
+ if (flag_function_sections || DECL_ONE_ONLY (decl))
+ return false;
+ }
+
+ return function_section (decl) == section;
}
/* Return nonzero if a 32-bit "long_call" should be generated for
- this call. We generate a long_call if the function:
+ a call from the current function to DECL. We generate a long_call
+ if the function:
a. has an __attribute__((long call))
or b. is within the scope of a #pragma long_calls
or c. the -mlong-calls command line switch has been specified
- . and either:
- 1. -ffunction-sections is in effect
- or 2. the current function has __attribute__ ((section))
- or 3. the target function has __attribute__ ((section))
However we do not generate a long call if the function:
d. has an __attribute__ ((short_call))
or e. is inside the scope of a #pragma no_long_calls
- or f. is defined within the current compilation unit.
-
- This function will be called by C fragments contained in the machine
- description file. SYM_REF and CALL_COOKIE correspond to the matched
- rtl operands. CALL_SYMBOL is used to distinguish between
- two different callers of the function. It is set to 1 in the
- "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
- and "call_value" patterns. This is because of the difference in the
- SYM_REFs passed by these patterns. */
-int
-arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
-{
- if (!call_symbol)
- {
- if (GET_CODE (sym_ref) != MEM)
- return 0;
+ or f. is defined in the same section as the current function. */
- sym_ref = XEXP (sym_ref, 0);
- }
+bool
+arm_is_long_call_p (tree decl)
+{
+ tree attrs;
- if (GET_CODE (sym_ref) != SYMBOL_REF)
- return 0;
+ if (!decl)
+ return TARGET_LONG_CALLS;
- if (call_cookie & CALL_SHORT)
- return 0;
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("short_call", attrs))
+ return false;
- if (TARGET_LONG_CALLS)
- {
- if (flag_function_sections
- || DECL_SECTION_NAME (current_function_decl))
- /* c.3 is handled by the definition of the
- ARM_DECLARE_FUNCTION_SIZE macro. */
- return 1;
- }
+ /* For "f", be conservative, and only cater for cases in which the
+ whole of the current function is placed in the same section. */
+ if (!flag_reorder_blocks_and_partition
+ && arm_function_in_section_p (decl, current_function_section ()))
+ return false;
- if (current_file_function_operand (sym_ref))
- return 0;
+ if (lookup_attribute ("long_call", attrs))
+ return true;
- return (call_cookie & CALL_LONG)
- || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
- || TARGET_LONG_CALLS;
+ return TARGET_LONG_CALLS;
}
/* Return nonzero if it is ok to make a tail-call to DECL. */
static bool
arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
{
- int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
unsigned long func_type;
if (cfun->machine->sibcall_blocked)
if (decl == NULL || TARGET_THUMB)
return false;
- /* Get the calling method. */
- if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
- call_type = CALL_SHORT;
- else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
- call_type = CALL_LONG;
+ /* The PIC register is live on entry to VxWorks PLT entries, so we
+ must make the call before restoring the PIC register. */
+ if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
+ return false;
/* Cannot tail-call to long calls, since these are out of range of
- a branch instruction. However, if not compiling PIC, we know
- we can reach the symbol if it is in this compilation unit. */
- if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
+ a branch instruction. */
+ if (arm_is_long_call_p (decl))
return false;
/* If we are interworking and the function is not declared static
return 1;
}
+/* Record that the current function needs a PIC register. Initialize
+ cfun->machine->pic_reg if we have not already done so. */
+
+static void
+require_pic_register (void)
+{
+ /* A lot of the logic here is made obscure by the fact that this
+ routine gets called as part of the rtx cost estimation process.
+ We don't want those calls to affect any assumptions about the real
+ function; and further, we can't call entry_of_function() until we
+ start the real expansion process. */
+ if (!current_function_uses_pic_offset_table)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ if (arm_pic_register != INVALID_REGNUM)
+ {
+ cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
+
+ /* Play games to avoid marking the function as needing pic
+ if we are being called as part of the cost-estimation
+ process. */
+ if (current_ir_type () != IR_GIMPLE)
+ current_function_uses_pic_offset_table = 1;
+ }
+ else
+ {
+ rtx seq;
+
+ cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+
+ /* Play games to avoid marking the function as needing pic
+ if we are being called as part of the cost-estimation
+ process. */
+ if (current_ir_type () != IR_GIMPLE)
+ {
+ current_function_uses_pic_offset_table = 1;
+ start_sequence ();
+
+ arm_load_pic_register (0UL);
+
+ seq = get_insns ();
+ end_sequence ();
+ emit_insn_after (seq, entry_of_function ());
+ }
+ }
+ }
+}
+
rtx
legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
{
rtx insn;
int subregs = 0;
- /* If this function doesn't have a pic register, create one now.
- A lot of the logic here is made obscure by the fact that this
- routine gets called as part of the rtx cost estimation
- process. We don't want those calls to affect any assumptions
- about the real function; and further, we can't call
- entry_of_function() until we start the real expansion
- process. */
- if (!current_function_uses_pic_offset_table)
- {
- gcc_assert (!no_new_pseudos);
- if (arm_pic_register != INVALID_REGNUM)
- {
- cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
-
- /* Play games to avoid marking the function as needing pic
- if we are being called as part of the cost-estimation
- process. */
- if (current_ir_type () != IR_GIMPLE)
- current_function_uses_pic_offset_table = 1;
- }
- else
- {
- rtx seq;
-
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
-
- /* Play games to avoid marking the function as needing pic
- if we are being called as part of the cost-estimation
- process. */
- if (current_ir_type () != IR_GIMPLE)
- {
- current_function_uses_pic_offset_table = 1;
- start_sequence ();
-
- arm_load_pic_register (0UL);
-
- seq = get_insns ();
- end_sequence ();
- emit_insn_after (seq, entry_of_function ());
- }
- }
- }
+ /* If this function doesn't have a pic register, create one now. */
+ require_pic_register ();
if (reg == 0)
{
- gcc_assert (!no_new_pseudos);
+ gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
subregs = 1;
else /* TARGET_THUMB1 */
emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+ /* VxWorks does not impose a fixed gap between segments; the run-time
+ gap can be different from the object-file gap. We therefore can't
+ use GOTOFF unless we are absolutely sure that the symbol is in the
+ same segment as the GOT. Unfortunately, the flexibility of linker
+ scripts means that we can't be sure of that in general, so assume
+ that GOTOFF is never valid on VxWorks. */
if ((GET_CODE (orig) == LABEL_REF
|| (GET_CODE (orig) == SYMBOL_REF &&
SYMBOL_REF_LOCAL_P (orig)))
- && NEED_GOT_RELOC)
+ && NEED_GOT_RELOC
+ && !TARGET_VXWORKS_RTP)
pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
else
{
if (reg == 0)
{
- gcc_assert (!no_new_pseudos);
+ gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
test the index for the appropriate mode. */
if (!arm_legitimate_index_p (mode, offset, SET, 0))
{
- gcc_assert (!no_new_pseudos);
+ gcc_assert (can_create_pseudo_p ());
offset = force_reg (Pmode, offset);
}
register allocation order means that sometimes r3 might be used
but earlier argument registers might not, so check them all. */
for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
- if (!regs_ever_live[reg])
+ if (!df_regs_ever_live_p (reg))
return reg;
/* Before going on to check the call-saved registers we can try a couple
arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
{
#ifndef AOF_ASSEMBLER
- rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx;
+ rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
rtx global_offset_table;
if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
gcc_assert (flag_pic);
- /* We use an UNSPEC rather than a LABEL_REF because this label never appears
- in the code stream. */
-
- labelno = GEN_INT (pic_labelno++);
- l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
- l1 = gen_rtx_CONST (VOIDmode, l1);
-
- global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
- /* On the ARM the PC register contains 'dot + 8' at the time of the
- addition, on the Thumb it is 'dot + 4'. */
- pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
- if (GOT_PCREL)
- pic_tmp2 = gen_rtx_CONST (VOIDmode,
- gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
- else
- pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
+ pic_reg = cfun->machine->pic_reg;
+ if (TARGET_VXWORKS_RTP)
+ {
+ pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+ pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+ emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
- pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
+ emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
- if (TARGET_ARM)
- {
- emit_insn (gen_pic_load_addr_arm (cfun->machine->pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_eight (cfun->machine->pic_reg,
- cfun->machine->pic_reg, labelno));
+ pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+ emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
}
- else if (TARGET_THUMB2)
+ else
{
- /* Thumb-2 only allows very limited access to the PC. Calculate the
- address in a temporary register. */
- if (arm_pic_register != INVALID_REGNUM)
+ /* We use an UNSPEC rather than a LABEL_REF because this label
+ never appears in the code stream. */
+
+ labelno = GEN_INT (pic_labelno++);
+ l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+ l1 = gen_rtx_CONST (VOIDmode, l1);
+
+ global_offset_table
+ = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+ /* On the ARM the PC register contains 'dot + 8' at the time of the
+ addition, on the Thumb it is 'dot + 4'. */
+ pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
+ if (GOT_PCREL)
{
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
+ pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
+ pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
}
else
+ pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
+
+ pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
+ pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+
+ if (TARGET_ARM)
{
- gcc_assert (!no_new_pseudos);
- pic_tmp = gen_reg_rtx (Pmode);
+ emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
}
+ else if (TARGET_THUMB2)
+ {
+ /* Thumb-2 only allows very limited access to the PC. Calculate the
+ address in a temporary register. */
+ if (arm_pic_register != INVALID_REGNUM)
+ {
+ pic_tmp = gen_rtx_REG (SImode,
+ thumb_find_work_register (saved_regs));
+ }
+ else
+ {
+ gcc_assert (can_create_pseudo_p ());
+ pic_tmp = gen_reg_rtx (Pmode);
+ }
- emit_insn (gen_pic_load_addr_thumb2 (cfun->machine->pic_reg, pic_rtx));
- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
- emit_insn (gen_addsi3(cfun->machine->pic_reg, cfun->machine->pic_reg,
- pic_tmp));
- }
- else /* TARGET_THUMB1 */
- {
- if (arm_pic_register != INVALID_REGNUM
- && REGNO (cfun->machine->pic_reg) > LAST_LO_REGNUM)
+ emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
+ emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
+ emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
+ }
+ else /* TARGET_THUMB1 */
{
- /* We will have pushed the pic register, so we should always be
- able to find a work register. */
- pic_tmp = gen_rtx_REG (SImode,
- thumb_find_work_register (saved_regs));
- emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
- emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+ if (arm_pic_register != INVALID_REGNUM
+ && REGNO (pic_reg) > LAST_LO_REGNUM)
+ {
+ /* We will have pushed the pic register, so we should always be
+ able to find a work register. */
+ pic_tmp = gen_rtx_REG (SImode,
+ thumb_find_work_register (saved_regs));
+ emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
+ emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+ }
+ else
+ emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
}
- else
- emit_insn (gen_pic_load_addr_thumb1 (cfun->machine->pic_reg, pic_rtx));
- emit_insn (gen_pic_add_dot_plus_four (cfun->machine->pic_reg,
- cfun->machine->pic_reg, labelno));
}
/* Need to emit this whether or not we obey regdecls,
since setjmp/longjmp can cause life info to screw up. */
- emit_insn (gen_rtx_USE (VOIDmode, cfun->machine->pic_reg));
+ emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
#endif /* AOF_ASSEMBLER */
}
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
return 1;
- else if (mode == TImode)
+ else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
return 0;
else if (code == PLUS)
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
return 1;
- else if (mode == TImode)
+ else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
return 0;
else if (code == PLUS)
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
+ if (TARGET_NEON
+ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ return (code == CONST_INT
+ && INTVAL (index) < 1016
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
return (code == CONST_INT
&& INTVAL (index) < 1024
&& (INTVAL (index) & 3) == 0);
}
+ if (TARGET_NEON
+ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
+ return (code == CONST_INT
+ && INTVAL (index) < 1016
+ && INTVAL (index) > -1024
+ && (INTVAL (index) & 3) == 0);
+
if (arm_address_register_rtx_p (index, strict_p)
&& (GET_MODE_SIZE (mode) <= 4))
return 1;
return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
+
+bool
+arm_cannot_force_const_mem (rtx x)
+{
+ rtx base, offset;
+
+ if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+ {
+ split_const (x, &base, &offset);
+ if (GET_CODE (base) == SYMBOL_REF
+ && !offset_within_block_p (base, INTVAL (offset)))
+ return true;
+ }
+ return arm_tls_referenced_p (x);
+}
\f
#define REG_OR_SUBREG_REG(X) \
(GET_CODE (X) == REG \
return 6;
case CONST_DOUBLE:
- if (arm_const_double_rtx (x))
+ if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
return outer == SET ? 2 : -1;
else if ((outer == COMPARE || outer == PLUS)
&& neg_const_double_rtx_ok_for_fpa (x))
return 0;
}
-\f
-/* Predicates for `match_operand' and `match_operator'. */
-
-/* Return nonzero if OP is a valid Cirrus memory address pattern. */
-int
-cirrus_memory_offset (rtx op)
-{
- /* Reject eliminable registers. */
- if (! (reload_in_progress || reload_completed)
- && ( reg_mentioned_p (frame_pointer_rtx, op)
- || reg_mentioned_p (arg_pointer_rtx, op)
- || reg_mentioned_p (virtual_incoming_args_rtx, op)
- || reg_mentioned_p (virtual_outgoing_args_rtx, op)
- || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
- || reg_mentioned_p (virtual_stack_vars_rtx, op)))
- return 0;
- if (GET_CODE (op) == MEM)
- {
- rtx ind;
- ind = XEXP (op, 0);
+/* VFPv3 has a fairly wide range of representable immediates, formed from
+ "quarter-precision" floating-point values. These can be evaluated using this
+ formula (with ^ for exponentiation):
- /* Match: (mem (reg)). */
- if (GET_CODE (ind) == REG)
- return 1;
+ -1^s * n * 2^-r
- /* Match:
- (mem (plus (reg)
- (const))). */
- if (GET_CODE (ind) == PLUS
- && GET_CODE (XEXP (ind, 0)) == REG
- && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
- && GET_CODE (XEXP (ind, 1)) == CONST_INT)
- return 1;
- }
+ Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
+ 16 <= n <= 31 and 0 <= r <= 7.
- return 0;
-}
+ These values are mapped onto an 8-bit integer ABCDEFGH s.t.
-/* Return TRUE if OP is a valid coprocessor memory address pattern.
- WB is true if full writeback address modes are allowed and is false
- if limited writeback address modes (POST_INC and PRE_DEC) are
- allowed. */
+ - A (most-significant) is the sign bit.
+ - BCD are the exponent (encoded as r XOR 3).
+ - EFGH are the mantissa (encoded as n - 16).
+*/
-int
-arm_coproc_mem_operand (rtx op, bool wb)
+/* Return an integer index for a VFPv3 immediate operand X suitable for the
+ fconst[sd] instruction, or -1 if X isn't suitable. */
+static int
+vfp3_const_double_index (rtx x)
{
- rtx ind;
+ REAL_VALUE_TYPE r, m;
+ int sign, exponent;
+ unsigned HOST_WIDE_INT mantissa, mant_hi;
+ unsigned HOST_WIDE_INT mask;
+ HOST_WIDE_INT m1, m2;
+ int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
- /* Reject eliminable registers. */
- if (! (reload_in_progress || reload_completed)
- && ( reg_mentioned_p (frame_pointer_rtx, op)
- || reg_mentioned_p (arg_pointer_rtx, op)
- || reg_mentioned_p (virtual_incoming_args_rtx, op)
- || reg_mentioned_p (virtual_outgoing_args_rtx, op)
- || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
- || reg_mentioned_p (virtual_stack_vars_rtx, op)))
- return FALSE;
+ if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
+ return -1;
- /* Constants are converted into offsets from labels. */
- if (GET_CODE (op) != MEM)
- return FALSE;
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- ind = XEXP (op, 0);
+ /* We can't represent these things, so detect them first. */
+ if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
+ return -1;
- if (reload_completed
- && (GET_CODE (ind) == LABEL_REF
- || (GET_CODE (ind) == CONST
- && GET_CODE (XEXP (ind, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
- && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
- return TRUE;
+ /* Extract sign, exponent and mantissa. */
+ sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+ r = REAL_VALUE_ABS (r);
+ exponent = REAL_EXP (&r);
+ /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+ highest (sign) bit, with a fixed binary point at bit point_pos.
+ WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
+ bits for the mantissa, this may fail (low bits would be lost). */
+ real_ldexp (&m, &r, point_pos - exponent);
+ REAL_VALUE_TO_INT (&m1, &m2, m);
+ mantissa = m1;
+ mant_hi = m2;
+
+ /* If there are bits set in the low part of the mantissa, we can't
+ represent this value. */
+ if (mantissa != 0)
+ return -1;
- /* Match: (mem (reg)). */
- if (GET_CODE (ind) == REG)
- return arm_address_register_rtx_p (ind, 0);
+ /* Now make it so that mantissa contains the most-significant bits, and move
+ the point_pos to indicate that the least-significant bits have been
+ discarded. */
+ point_pos -= HOST_BITS_PER_WIDE_INT;
+ mantissa = mant_hi;
- /* Autoincremment addressing modes. POST_INC and PRE_DEC are
- acceptable in any case (subject to verification by
- arm_address_register_rtx_p). We need WB to be true to accept
- PRE_INC and POST_DEC. */
+ /* We can permit four significant bits of mantissa only, plus a high bit
+ which is always 1. */
+ mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+ if ((mantissa & mask) != 0)
+ return -1;
+
+ /* Now we know the mantissa is in range, chop off the unneeded bits. */
+ mantissa >>= point_pos - 5;
+
+ /* The mantissa may be zero. Disallow that case. (It's possible to load the
+ floating-point immediate zero with Neon using an integer-zero load, but
+ that case is handled elsewhere.) */
+ if (mantissa == 0)
+ return -1;
+
+ gcc_assert (mantissa >= 16 && mantissa <= 31);
+
+ /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
+ normalized significands are in the range [1, 2). (Our mantissa is shifted
+ left 4 places at this point relative to normalized IEEE754 values). GCC
+ internally uses [0.5, 1) (see real.c), so the exponent returned from
+ REAL_EXP must be altered. */
+ exponent = 5 - exponent;
+
+ if (exponent < 0 || exponent > 7)
+ return -1;
+
+ /* Sign, mantissa and exponent are now in the correct form to plug into the
+ formulae described in the comment above. */
+ return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
+}
+
+/* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
+int
+vfp3_const_double_rtx (rtx x)
+{
+ if (!TARGET_VFP3)
+ return 0;
+
+ return vfp3_const_double_index (x) != -1;
+}
+
+/* Recognize immediates which can be used in various Neon instructions. Legal
+ immediates are described by the following table (for VMVN variants, the
+ bitwise inverse of the constant shown is recognized. In either case, VMOV
+ is output and the correct instruction to use for a given constant is chosen
+ by the assembler). The constant shown is replicated across all elements of
+ the destination vector.
+
+ insn elems variant constant (binary)
+ ---- ----- ------- -----------------
+ vmov i32 0 00000000 00000000 00000000 abcdefgh
+ vmov i32 1 00000000 00000000 abcdefgh 00000000
+ vmov i32 2 00000000 abcdefgh 00000000 00000000
+ vmov i32 3 abcdefgh 00000000 00000000 00000000
+ vmov i16 4 00000000 abcdefgh
+ vmov i16 5 abcdefgh 00000000
+ vmvn i32 6 00000000 00000000 00000000 abcdefgh
+ vmvn i32 7 00000000 00000000 abcdefgh 00000000
+ vmvn i32 8 00000000 abcdefgh 00000000 00000000
+ vmvn i32 9 abcdefgh 00000000 00000000 00000000
+ vmvn i16 10 00000000 abcdefgh
+ vmvn i16 11 abcdefgh 00000000
+ vmov i32 12 00000000 00000000 abcdefgh 11111111
+ vmvn i32 13 00000000 00000000 abcdefgh 11111111
+ vmov i32 14 00000000 abcdefgh 11111111 11111111
+ vmvn i32 15 00000000 abcdefgh 11111111 11111111
+ vmov i8 16 abcdefgh
+ vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
+ eeeeeeee ffffffff gggggggg hhhhhhhh
+ vmov f32 18 aBbbbbbc defgh000 00000000 00000000
+
+ For case 18, B = !b. Representable values are exactly those accepted by
+ vfp3_const_double_index, but are output as floating-point numbers rather
+ than indices.
+
+ Variants 0-5 (inclusive) may also be used as immediates for the second
+ operand of VORR/VBIC instructions.
+
+ The INVERSE argument causes the bitwise inverse of the given operand to be
+ recognized instead (used for recognizing legal immediates for the VAND/VORN
+ pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
+ *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
+ output, rather than the real insns vbic/vorr).
+
+ INVERSE makes no difference to the recognition of float vectors.
+
+ The return value is the variant of immediate as shown in the above table, or
+ -1 if the given value doesn't match any of the listed patterns.
+*/
+static int
+neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
+ rtx *modconst, int *elementwidth)
+{
+#define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
+ matches = 1; \
+ for (i = 0; i < idx; i += (STRIDE)) \
+ if (!(TEST)) \
+ matches = 0; \
+ if (matches) \
+ { \
+ immtype = (CLASS); \
+ elsize = (ELSIZE); \
+ break; \
+ }
+
+ unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+ unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+ unsigned char bytes[16];
+ int immtype = -1, matches;
+ unsigned int invmask = inverse ? 0xff : 0;
+
+ /* Vectors of float constants. */
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ rtx el0 = CONST_VECTOR_ELT (op, 0);
+ REAL_VALUE_TYPE r0;
+
+ if (!vfp3_const_double_rtx (el0))
+ return -1;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
+
+ for (i = 1; i < n_elts; i++)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, i);
+ REAL_VALUE_TYPE re;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
+
+ if (!REAL_VALUES_EQUAL (r0, re))
+ return -1;
+ }
+
+ if (modconst)
+ *modconst = CONST_VECTOR_ELT (op, 0);
+
+ if (elementwidth)
+ *elementwidth = 0;
+
+ return 18;
+ }
+
+ /* Splat vector constant out into a byte vector. */
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx el = CONST_VECTOR_ELT (op, i);
+ unsigned HOST_WIDE_INT elpart;
+ unsigned int part, parts;
+
+ if (GET_CODE (el) == CONST_INT)
+ {
+ elpart = INTVAL (el);
+ parts = 1;
+ }
+ else if (GET_CODE (el) == CONST_DOUBLE)
+ {
+ elpart = CONST_DOUBLE_LOW (el);
+ parts = 2;
+ }
+ else
+ gcc_unreachable ();
+
+ for (part = 0; part < parts; part++)
+ {
+ unsigned int byte;
+ for (byte = 0; byte < innersize; byte++)
+ {
+ bytes[idx++] = (elpart & 0xff) ^ invmask;
+ elpart >>= BITS_PER_UNIT;
+ }
+ if (GET_CODE (el) == CONST_DOUBLE)
+ elpart = CONST_DOUBLE_HIGH (el);
+ }
+ }
+
+ /* Sanity check. */
+ gcc_assert (idx == GET_MODE_SIZE (mode));
+
+ do
+ {
+ CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+ && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+ CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+ && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+ CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+ CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+ && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
+
+ CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
+
+ CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
+
+ CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+ && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+ CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+ && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+ CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+ CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+ && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
+
+ CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
+
+ CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
+
+ CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+ && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+ CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+ && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+ CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+ CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+ && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+ CHECK (1, 8, 16, bytes[i] == bytes[0]);
+
+ CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+ && bytes[i] == bytes[(i + 8) % idx]);
+ }
+ while (0);
+
+ if (immtype == -1)
+ return -1;
+
+ if (elementwidth)
+ *elementwidth = elsize;
+
+ if (modconst)
+ {
+ unsigned HOST_WIDE_INT imm = 0;
+
+ /* Un-invert bytes of recognized vector, if neccessary. */
+ if (invmask != 0)
+ for (i = 0; i < idx; i++)
+ bytes[i] ^= invmask;
+
+ if (immtype == 17)
+ {
+ /* FIXME: Broken on 32-bit H_W_I hosts. */
+ gcc_assert (sizeof (HOST_WIDE_INT) == 8);
+
+ for (i = 0; i < 8; i++)
+ imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
+ << (i * BITS_PER_UNIT);
+
+ *modconst = GEN_INT (imm);
+ }
+ else
+ {
+ unsigned HOST_WIDE_INT imm = 0;
+
+ for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+ imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+
+ *modconst = GEN_INT (imm);
+ }
+ }
+
+ return immtype;
+#undef CHECK
+}
+
+/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
+ VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
+ float elements), and a modified constant (whatever should be output for a
+ VMOV) in *MODCONST. */
+
+int
+neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
+ rtx *modconst, int *elementwidth)
+{
+ rtx tmpconst;
+ int tmpwidth;
+ int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
+
+ if (retval == -1)
+ return 0;
+
+ if (modconst)
+ *modconst = tmpconst;
+
+ if (elementwidth)
+ *elementwidth = tmpwidth;
+
+ return 1;
+}
+
+/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
+ the immediate is valid, write a constant suitable for using as an operand
+ to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
+ *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
+
+int
+neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
+ rtx *modconst, int *elementwidth)
+{
+ rtx tmpconst;
+ int tmpwidth;
+ int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
+
+ if (retval < 0 || retval > 5)
+ return 0;
+
+ if (modconst)
+ *modconst = tmpconst;
+
+ if (elementwidth)
+ *elementwidth = tmpwidth;
+
+ return 1;
+}
+
+/* Return a string suitable for output of Neon immediate logic operation
+ MNEM. */
+
+char *
+neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
+ int inverse, int quad)
+{
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
+
+ gcc_assert (is_valid != 0);
+
+ if (quad)
+ sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
+ else
+ sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
+
+ return templ;
+}
+
+/* Output a sequence of pairwise operations to implement a reduction.
+ NOTE: We do "too much work" here, because pairwise operations work on two
+ registers-worth of operands in one go. Unfortunately we can't exploit those
+ extra calculations to do the full operation in fewer steps, I don't think.
+ Although all vector elements of the result but the first are ignored, we
+ actually calculate the same result in each of the elements. An alternative
+ such as initially loading a vector with zero to use as each of the second
+ operands would use up an additional register and take an extra instruction,
+ for no particular gain. */
+
+void
+neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+ rtx (*reduc) (rtx, rtx, rtx))
+{
+ enum machine_mode inner = GET_MODE_INNER (mode);
+ unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
+ rtx tmpsum = op1;
+
+ for (i = parts / 2; i >= 1; i /= 2)
+ {
+ rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
+ emit_insn (reduc (dest, tmpsum, tmpsum));
+ tmpsum = dest;
+ }
+}
+
+/* Initialise a vector with non-constant elements. FIXME: We can do better
+ than the current implementation (building a vector on the stack and then
+ loading it) in many cases. See rs6000.c. */
+
+void
+neon_expand_vector_init (rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner = GET_MODE_INNER (mode);
+ unsigned int i, n_elts = GET_MODE_NUNITS (mode);
+ rtx mem;
+
+ gcc_assert (VECTOR_MODE_P (mode));
+
+ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+ for (i = 0; i < n_elts; i++)
+ emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
+ XVECEXP (vals, 0, i));
+
+ emit_move_insn (target, mem);
+}
+
+/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
+ ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
+ reported source locations are bogus. */
+
+static void
+bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+ const char *err)
+{
+ HOST_WIDE_INT lane;
+
+ gcc_assert (GET_CODE (operand) == CONST_INT);
+
+ lane = INTVAL (operand);
+
+ if (lane < low || lane >= high)
+ error (err);
+}
+
+/* Bounds-check lanes. */
+
+void
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+ bounds_check (operand, low, high, "lane out of range");
+}
+
+/* Bounds-check constants. */
+
+void
+neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+ bounds_check (operand, low, high, "constant out of range");
+}
+
+HOST_WIDE_INT
+neon_element_bits (enum machine_mode mode)
+{
+ if (mode == DImode)
+ return GET_MODE_BITSIZE (mode);
+ else
+ return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+}
+
+\f
+/* Predicates for `match_operand' and `match_operator'. */
+
+/* Return nonzero if OP is a valid Cirrus memory address pattern. */
+int
+cirrus_memory_offset (rtx op)
+{
+ /* Reject eliminable registers. */
+ if (! (reload_in_progress || reload_completed)
+ && ( reg_mentioned_p (frame_pointer_rtx, op)
+ || reg_mentioned_p (arg_pointer_rtx, op)
+ || reg_mentioned_p (virtual_incoming_args_rtx, op)
+ || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+ || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+ || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+ return 0;
+
+ if (GET_CODE (op) == MEM)
+ {
+ rtx ind;
+
+ ind = XEXP (op, 0);
+
+ /* Match: (mem (reg)). */
+ if (GET_CODE (ind) == REG)
+ return 1;
+
+ /* Match:
+ (mem (plus (reg)
+ (const))). */
+ if (GET_CODE (ind) == PLUS
+ && GET_CODE (XEXP (ind, 0)) == REG
+ && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+ && GET_CODE (XEXP (ind, 1)) == CONST_INT)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return TRUE if OP is a valid coprocessor memory address pattern.
+ WB is true if full writeback address modes are allowed and is false
+ if limited writeback address modes (POST_INC and PRE_DEC) are
+ allowed. */
+
+int
+arm_coproc_mem_operand (rtx op, bool wb)
+{
+ rtx ind;
+
+ /* Reject eliminable registers. */
+ if (! (reload_in_progress || reload_completed)
+ && ( reg_mentioned_p (frame_pointer_rtx, op)
+ || reg_mentioned_p (arg_pointer_rtx, op)
+ || reg_mentioned_p (virtual_incoming_args_rtx, op)
+ || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+ || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+ || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+ return FALSE;
+
+ /* Constants are converted into offsets from labels. */
+ if (GET_CODE (op) != MEM)
+ return FALSE;
+
+ ind = XEXP (op, 0);
+
+ if (reload_completed
+ && (GET_CODE (ind) == LABEL_REF
+ || (GET_CODE (ind) == CONST
+ && GET_CODE (XEXP (ind, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+ return TRUE;
+
+ /* Match: (mem (reg)). */
+ if (GET_CODE (ind) == REG)
+ return arm_address_register_rtx_p (ind, 0);
+
+ /* Autoincremment addressing modes. POST_INC and PRE_DEC are
+ acceptable in any case (subject to verification by
+ arm_address_register_rtx_p). We need WB to be true to accept
+ PRE_INC and POST_DEC. */
if (GET_CODE (ind) == POST_INC
|| GET_CODE (ind) == PRE_DEC
|| (wb
return FALSE;
}
+/* Return TRUE if OP is a memory operand which we can load or store a vector
+ to/from. If CORE is true, we're moving from ARM registers not Neon
+ registers. */
+int
+neon_vector_mem_operand (rtx op, bool core)
+{
+ rtx ind;
+
+ /* Reject eliminable registers. */
+ if (! (reload_in_progress || reload_completed)
+ && ( reg_mentioned_p (frame_pointer_rtx, op)
+ || reg_mentioned_p (arg_pointer_rtx, op)
+ || reg_mentioned_p (virtual_incoming_args_rtx, op)
+ || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+ || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+ || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+ return FALSE;
+
+ /* Constants are converted into offsets from labels. */
+ if (GET_CODE (op) != MEM)
+ return FALSE;
+
+ ind = XEXP (op, 0);
+
+ if (reload_completed
+ && (GET_CODE (ind) == LABEL_REF
+ || (GET_CODE (ind) == CONST
+ && GET_CODE (XEXP (ind, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+ return TRUE;
+
+ /* Match: (mem (reg)). */
+ if (GET_CODE (ind) == REG)
+ return arm_address_register_rtx_p (ind, 0);
+
+ /* Allow post-increment with Neon registers. */
+ if (!core && GET_CODE (ind) == POST_INC)
+ return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+#if 0
+ /* FIXME: We can support this too if we use VLD1/VST1. */
+ if (!core
+ && GET_CODE (ind) == POST_MODIFY
+ && arm_address_register_rtx_p (XEXP (ind, 0), 0)
+ && GET_CODE (XEXP (ind, 1)) == PLUS
+ && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
+ ind = XEXP (ind, 1);
+#endif
+
+ /* Match:
+ (plus (reg)
+ (const)). */
+ if (!core
+ && GET_CODE (ind) == PLUS
+ && GET_CODE (XEXP (ind, 0)) == REG
+ && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+ && GET_CODE (XEXP (ind, 1)) == CONST_INT
+ && INTVAL (XEXP (ind, 1)) > -1024
+ && INTVAL (XEXP (ind, 1)) < 1016
+ && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+ return TRUE;
+
+ return FALSE;
+}
+
+/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
+ type. */
+int
+neon_struct_mem_operand (rtx op)
+{
+ rtx ind;
+
+ /* Reject eliminable registers. */
+ if (! (reload_in_progress || reload_completed)
+ && ( reg_mentioned_p (frame_pointer_rtx, op)
+ || reg_mentioned_p (arg_pointer_rtx, op)
+ || reg_mentioned_p (virtual_incoming_args_rtx, op)
+ || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+ || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+ || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+ return FALSE;
+
+ /* Constants are converted into offsets from labels. */
+ if (GET_CODE (op) != MEM)
+ return FALSE;
+
+ ind = XEXP (op, 0);
+
+ if (reload_completed
+ && (GET_CODE (ind) == LABEL_REF
+ || (GET_CODE (ind) == CONST
+ && GET_CODE (XEXP (ind, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+ && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+ return TRUE;
+
+ /* Match: (mem (reg)). */
+ if (GET_CODE (ind) == REG)
+ return arm_address_register_rtx_p (ind, 0);
+
+ return FALSE;
+}
+
/* Return true if X is a register that will be eliminated later on. */
int
arm_eliminable_register (rtx x)
enum reg_class
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
{
+ if (TARGET_NEON
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ && neon_vector_mem_operand (x, FALSE))
+ return NO_REGS;
+
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
return NO_REGS;
{
rtx pat = PATTERN (insn);
- if (GET_CODE (pat) == PARALLEL
- && GET_CODE (XVECEXP (pat, 0, 0)) == SET)
+ if (GET_CODE (pat) == SET)
{
- rtx rhs = SET_SRC (XVECEXP (pat, 0, 0));
+ rtx rhs = SET_SRC (pat);
if (GET_CODE (rhs) == UNSPEC
&& XINT (rhs, 1) == UNSPEC_PIC_BASE)
placed at the start of the pool. */
if (ARM_DOUBLEWORD_ALIGN
&& max_mp == NULL
- && fix->fix_size == 8
- && mp->fix_size != 8)
+ && fix->fix_size >= 8
+ && mp->fix_size < 8)
{
max_mp = mp;
max_address = mp->max_address;
/* For now, we do not allow the insertion of 8-byte alignment
requiring nodes anywhere but at the start of the pool. */
if (ARM_DOUBLEWORD_ALIGN
- && fix->fix_size == 8 && mp->fix_size != 8)
+ && fix->fix_size >= 8 && mp->fix_size < 8)
return NULL;
else
min_mp = mp;
placed at the start of the pool. */
else if (ARM_DOUBLEWORD_ALIGN
&& min_mp == NULL
- && fix->fix_size == 8
+ && fix->fix_size >= 8
&& mp->fix_size < 8)
{
min_mp = mp;
if (ARM_DOUBLEWORD_ALIGN)
for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
- if (mp->refcount > 0 && mp->fix_size == 8)
+ if (mp->refcount > 0 && mp->fix_size >= 8)
{
align64 = 1;
break;
break;
#endif
+#ifdef HAVE_consttable_16
+ case 16:
+ scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
+ break;
+
+#endif
default:
gcc_unreachable ();
}
/* If an entry requires 8-byte alignment then assume all constant pools
require 4 bytes of padding. Trying to do this later on a per-pool
basis is awkward because existing pool entries have to be modified. */
- if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
+ if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
minipool_pad = 4;
if (dump_file)
count++;
}
+ /* FLDMD may not load more than 16 doubleword registers at a time. Split the
+ load into multiple parts if we have to handle more than 16 registers. */
+ if (count > 16)
+ {
+ vfp_output_fldmd (stream, base, reg, 16);
+ vfp_output_fldmd (stream, base, reg + 16, count - 16);
+ return;
+ }
+
fputc ('\t', stream);
asm_fprintf (stream, "fldmfdd\t%r!, {", base);
count++;
}
+ /* FSTMD may not store more than 16 doubleword registers at once. Split
+ larger stores into multiple parts (up to a maximum of two, in
+ practice). */
+ if (count > 16)
+ {
+ int saved;
+ /* NOTE: base_reg is an internal register number, so each D register
+ counts as 2. */
+ saved = vfp_emit_fstmd (base_reg + 32, count - 16);
+ saved += vfp_emit_fstmd (base_reg, 16);
+ return saved;
+ }
+
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
return count * 8;
}
+/* Emit a call instruction with pattern PAT. ADDR is the address of
+ the call target. */
+
+void
+arm_emit_call_insn (rtx pat, rtx addr)
+{
+ rtx insn;
+
+ insn = emit_call_insn (pat);
+
+ /* The PIC register is live on entry to VxWorks PIC PLT entries.
+ If the call might use such an entry, add a use of the PIC register
+ to the instruction's CALL_INSN_FUNCTION_USAGE. */
+ if (TARGET_VXWORKS_RTP
+ && flag_pic
+ && GET_CODE (addr) == SYMBOL_REF
+ && (SYMBOL_REF_DECL (addr)
+ ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+ : !SYMBOL_REF_LOCAL_P (addr)))
+ {
+ require_pic_register ();
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
+ }
+}
/* Output a 'call' insn. */
const char *
return "";
}
+/* Output a move, load or store for quad-word vectors in ARM registers. Only
+ handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
+
+const char *
+output_move_quad (rtx *operands)
+{
+ if (REG_P (operands[0]))
+ {
+ /* Load, or reg->reg move. */
+
+ if (MEM_P (operands[1]))
+ {
+ switch (GET_CODE (XEXP (operands[1], 0)))
+ {
+ case REG:
+ output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+ break;
+
+ case LABEL_REF:
+ case CONST:
+ output_asm_insn ("adr%?\t%0, %1", operands);
+ output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ rtx ops[2];
+ int dest, src, i;
+
+ gcc_assert (REG_P (operands[1]));
+
+ dest = REGNO (operands[0]);
+ src = REGNO (operands[1]);
+
+ /* This seems pretty dumb, but hopefully GCC won't try to do it
+ very often. */
+ if (dest < src)
+ for (i = 0; i < 4; i++)
+ {
+ ops[0] = gen_rtx_REG (SImode, dest + i);
+ ops[1] = gen_rtx_REG (SImode, src + i);
+ output_asm_insn ("mov%?\t%0, %1", ops);
+ }
+ else
+ for (i = 3; i >= 0; i--)
+ {
+ ops[0] = gen_rtx_REG (SImode, dest + i);
+ ops[1] = gen_rtx_REG (SImode, src + i);
+ output_asm_insn ("mov%?\t%0, %1", ops);
+ }
+ }
+ }
+ else
+ {
+ gcc_assert (MEM_P (operands[0]));
+ gcc_assert (REG_P (operands[1]));
+ gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
+
+ switch (GET_CODE (XEXP (operands[0], 0)))
+ {
+ case REG:
+ output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ return "";
+}
+
/* Output a VFP load or store instruction. */
const char *
int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
const char *template;
char buff[50];
+ enum machine_mode mode;
reg = operands[!load];
mem = operands[load];
+ mode = GET_MODE (reg);
+
gcc_assert (REG_P (reg));
gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
- gcc_assert (GET_MODE (reg) == SFmode
- || GET_MODE (reg) == DFmode
- || GET_MODE (reg) == SImode
- || GET_MODE (reg) == DImode);
+ gcc_assert (mode == SFmode
+ || mode == DFmode
+ || mode == SImode
+ || mode == DImode
+ || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
return "";
}
+/* Output a Neon quad-word load or store, or a load or store for
+ larger structure modes. We could also support post-modify forms using
+ VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
+ yet.
+ WARNING: The ordering of elements in memory is weird in big-endian mode,
+ because we use VSTM instead of VST1, to make it easy to make vector stores
+ via ARM registers write values in the same order as stores direct from Neon
+ registers. For example, the byte ordering of a quadword vector with 16-byte
+ elements like this:
+
+ [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
+
+ will be (with lowest address first, h = most-significant byte,
+ l = least-significant byte of element):
+
+ [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
+ e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
+
+ When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
+ rN in the order:
+
+ dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
+
+ So that STM/LDM can be used on vectors in ARM registers, and the same memory
+ layout will result as if VSTM/VLDM were used. */
+
+const char *
+output_move_neon (rtx *operands)
+{
+ rtx reg, mem, addr, ops[2];
+ int regno, load = REG_P (operands[0]);
+ const char *template;
+ char buff[50];
+ enum machine_mode mode;
+
+ reg = operands[!load];
+ mem = operands[load];
+
+ mode = GET_MODE (reg);
+
+ gcc_assert (REG_P (reg));
+ regno = REGNO (reg);
+ gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
+ || NEON_REGNO_OK_FOR_QUAD (regno));
+ gcc_assert (VALID_NEON_DREG_MODE (mode)
+ || VALID_NEON_QREG_MODE (mode)
+ || VALID_NEON_STRUCT_MODE (mode));
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ /* Strip off const from addresses like (const (plus (...))). */
+ if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+ addr = XEXP (addr, 0);
+
+ switch (GET_CODE (addr))
+ {
+ case POST_INC:
+ template = "v%smia%%?\t%%0!, %%h1";
+ ops[0] = XEXP (addr, 0);
+ ops[1] = reg;
+ break;
+
+ case POST_MODIFY:
+ /* FIXME: Not currently enabled in neon_vector_mem_operand. */
+ gcc_unreachable ();
+
+ case LABEL_REF:
+ case PLUS:
+ {
+ int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
+ int i;
+ int overlap = -1;
+ for (i = 0; i < nregs; i++)
+ {
+ /* We're only using DImode here because it's a convenient size. */
+ ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
+ ops[1] = adjust_address (mem, SImode, 8 * i);
+ if (reg_overlap_mentioned_p (ops[0], mem))
+ {
+ gcc_assert (overlap == -1);
+ overlap = i;
+ }
+ else
+ {
+ sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+ output_asm_insn (buff, ops);
+ }
+ }
+ if (overlap != -1)
+ {
+ ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
+ ops[1] = adjust_address (mem, SImode, 8 * overlap);
+ sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+ output_asm_insn (buff, ops);
+ }
+
+ return "";
+ }
+
+ default:
+ template = "v%smia%%?\t%%m0, %%h1";
+ ops[0] = mem;
+ ops[1] = reg;
+ }
+
+ sprintf (buff, template, load ? "ld" : "st");
+ output_asm_insn (buff, ops);
+
+ return "";
+}
+
/* Output an ADD r, s, #n where n may be too big for one instruction.
If adding zero to one register, output nothing. */
const char *
max_reg = 12;
for (reg = 0; reg <= max_reg; reg++)
- if (regs_ever_live[reg]
- || (! current_function_is_leaf && call_used_regs [reg]))
+ if (df_regs_ever_live_p (reg)
+ || (! current_function_is_leaf && call_used_regs[reg]))
save_reg_mask |= (1 << reg);
/* Also save the pic base register if necessary. */
/* In the normal case we only need to save those registers
which are call saved and which are used by this function. */
for (reg = 0; reg <= last_reg; reg++)
- if (regs_ever_live[reg] && ! call_used_regs [reg])
+ if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
save_reg_mask |= (1 << reg);
/* Handle the frame pointer as a special case. */
- if (TARGET_THUMB2 && frame_pointer_needed)
+ if (! TARGET_APCS_FRAME
+ && ! frame_pointer_needed
+ && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
+ && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
else if (! TARGET_APCS_FRAME
&& ! frame_pointer_needed
- && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
+ && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
&& ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
if (flag_pic
&& !TARGET_SINGLE_PIC_BASE
&& arm_pic_register != INVALID_REGNUM
- && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
+ && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
|| current_function_uses_pic_offset_table))
save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
now and then popping it back into the PC. This incurs extra memory
accesses though, so we only do it when optimizing for size, and only
if we know that we will not need a fancy return sequence. */
- if (regs_ever_live [LR_REGNUM]
- || (save_reg_mask
- && optimize_size
- && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
- && !current_function_calls_eh_return))
+ if (df_regs_ever_live_p (LR_REGNUM)
+ || (save_reg_mask
+ && optimize_size
+ && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+ && !current_function_calls_eh_return))
save_reg_mask |= 1 << LR_REGNUM;
if (cfun->machine->lr_save_eliminated)
mask = 0;
for (reg = 0; reg < 12; reg ++)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
mask |= 1 << reg;
if (flag_pic
have to push it. Use LAST_LO_REGNUM as our fallback
choice for the register to select. */
reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
+ /* Make sure the register returned by thumb_find_work_register is
+ not part of the return value. */
+ if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
+ reg = LAST_LO_REGNUM;
if (! call_used_regs[reg])
mask |= 1 << reg;
regno < LAST_VFP_REGNUM;
regno += 2)
{
- if ((!regs_ever_live[regno] || call_used_regs[regno])
- && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
+ if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
+ && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
{
if (count > 0)
{
if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
floats_offset += 12;
asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
{
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
floats_offset += 12;
start_reg = FIRST_VFP_REGNUM;
for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
{
- if ((!regs_ever_live[reg] || call_used_regs[reg])
- && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
+ if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+ && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
{
if (start_reg != reg)
vfp_output_fldmd (f, IP_REGNUM,
lrm_count += (lrm_count % 2 ? 2 : 1);
for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
reg, FP_REGNUM, lrm_count * 4);
if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
{
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
reg, SP_REGNUM);
}
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
{
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
if (reg - start_reg == 3)
{
start_reg = FIRST_VFP_REGNUM;
for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
{
- if ((!regs_ever_live[reg] || call_used_regs[reg])
- && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
+ if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+ && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
{
if (start_reg != reg)
vfp_output_fldmd (f, SP_REGNUM,
}
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
/* If we can, restore the LR into the PC. */
return !cfun->machine->lr_save_eliminated
&& (!leaf_function_p ()
|| thumb_far_jump_used_p ()
- || regs_ever_live [LR_REGNUM]);
+ || df_regs_ever_live_p (LR_REGNUM));
}
for (regno = FIRST_IWMMXT_REGNUM;
regno <= LAST_IWMMXT_REGNUM;
regno++)
- if (regs_ever_live [regno] && ! call_used_regs [regno])
+ if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
saved += 8;
}
{
/* Space for saved FPA registers. */
for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
- if (regs_ever_live[regno] && ! call_used_regs[regno])
+ if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
saved += 12;
/* Space for saved VFP registers. */
if (leaf && frame_size == 0)
{
offsets->outgoing_args = offsets->soft_frame;
+ offsets->locals_base = offsets->soft_frame;
return offsets;
}
rtx insn;
for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
- if (regs_ever_live[reg] && ! call_used_regs [reg])
+ if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
{
insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
insn = gen_rtx_MEM (V2SImode, insn);
if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
insn = gen_rtx_MEM (XFmode, insn);
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
{
- if (regs_ever_live[reg] && !call_used_regs[reg])
+ if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
{
if (start_reg - reg == 3)
{
for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
{
- if ((!regs_ever_live[reg] || call_used_regs[reg])
- && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
+ if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+ && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
{
if (start_reg != reg)
saved_size += vfp_emit_fstmd (start_reg,
doesn't need to be unwound, as it doesn't contain a value
inherited from the caller. */
- if (regs_ever_live[3] == 0)
+ if (df_regs_ever_live_p (3) == false)
insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
else if (args_to_push == 0)
{
if (IS_NESTED (func_type))
{
/* Recover the static chain register. */
- if (regs_ever_live [3] == 0
+ if (!df_regs_ever_live_p (3)
|| saved_pretend_args)
insn = gen_rtx_REG (SImode, 3);
else /* if (current_function_pretend_args_size == 0) */
/* If the link register is being kept alive, with the return address in it,
then make sure that it does not get reused by the ce2 pass. */
if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
- {
- emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
- cfun->machine->lr_save_eliminated = 1;
- }
+ cfun->machine->lr_save_eliminated = 1;
}
\f
/* Print condition code to STREAM. Helper function for arm_print_operand. */
fputc('s', stream);
break;
+ /* %# is a "break" sequence. It doesn't output anything, but is used to
+ seperate e.g. operand numbers from following text, if that text consists
+ of further digits which we don't want to be part of the operand
+ number. */
+ case '#':
+ return;
+
case 'N':
{
REAL_VALUE_TYPE r;
}
return;
+ /* An integer without a preceding # sign. */
+ case 'c':
+ gcc_assert (GET_CODE (x) == CONST_INT);
+ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ return;
+
case 'B':
if (GET_CODE (x) == CONST_INT)
{
asm_fprintf (stream, "%r", REGNO (x) + 1);
return;
+ case 'J':
+ if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
+ return;
+
+ case 'K':
+ if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
+ return;
+
case 'm':
asm_fprintf (stream, "%r",
GET_CODE (XEXP (x, 0)) == REG
REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
return;
+ /* Like 'M', but writing doubleword vector registers, for use by Neon
+ insns. */
+ case 'h':
+ {
+ int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
+ int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
+ if (numregs == 1)
+ asm_fprintf (stream, "{d%d}", regno);
+ else
+ asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
+ }
+ return;
+
case 'd':
/* CONST_TRUE_RTX means always -- that's the default. */
if (x == const_true_rtx)
want to do that. */
if (x == const_true_rtx)
{
- output_operand_lossage ("instruction never exectued");
+ output_operand_lossage ("instruction never executed");
return;
}
if (!COMPARISON_P (x))
}
return;
- /* Print a VFP double precision register name. */
+ /* Print a VFP/Neon double precision or quad precision register name. */
case 'P':
+ case 'q':
{
int mode = GET_MODE (x);
- int num;
+ int is_quad = (code == 'q');
+ int regno;
- if (mode != DImode && mode != DFmode)
+ if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
{
output_operand_lossage ("invalid operand for code '%c'", code);
return;
return;
}
- num = REGNO(x) - FIRST_VFP_REGNUM;
- if (num & 1)
+ regno = REGNO (x);
+ if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
+ || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
{
output_operand_lossage ("invalid operand for code '%c'", code);
return;
}
- fprintf (stream, "d%d", num >> 1);
+ fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
+ (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
+ }
+ return;
+
+ /* These two codes print the low/high doubleword register of a Neon quad
+ register, respectively. For pair-structure types, can also print
+ low/high quadword registers. */
+ case 'e':
+ case 'f':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if ((GET_MODE_SIZE (mode) != 16
+ && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!NEON_REGNO_OK_FOR_QUAD (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ if (GET_MODE_SIZE (mode) == 16)
+ fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
+ + (code == 'f' ? 1 : 0));
+ else
+ fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
+ + (code == 'f' ? 1 : 0));
+ }
+ return;
+
+ /* Print a VFPv3 floating-point constant, represented as an integer
+ index. */
+ case 'G':
+ {
+ int index = vfp3_const_double_index (x);
+ gcc_assert (index != -1);
+ fprintf (stream, "%d", index);
+ }
+ return;
+
+ /* Print bits representing opcode features for Neon.
+
+ Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
+ and polynomials as unsigned.
+
+ Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
+
+ Bit 2 is 1 for rounding functions, 0 otherwise. */
+
+ /* Identify the type as 's', 'u', 'p' or 'f'. */
+ case 'T':
+ {
+ HOST_WIDE_INT bits = INTVAL (x);
+ fputc ("uspf"[bits & 3], stream);
+ }
+ return;
+
+ /* Likewise, but signed and unsigned integers are both 'i'. */
+ case 'F':
+ {
+ HOST_WIDE_INT bits = INTVAL (x);
+ fputc ("iipf"[bits & 3], stream);
+ }
+ return;
+
+ /* As for 'T', but emit 'u' instead of 'p'. */
+ case 't':
+ {
+ HOST_WIDE_INT bits = INTVAL (x);
+ fputc ("usuf"[bits & 3], stream);
+ }
+ return;
+
+ /* Bit 2: rounding (vs none). */
+ case 'O':
+ {
+ HOST_WIDE_INT bits = INTVAL (x);
+ fputs ((bits & 4) != 0 ? "r" : "", stream);
}
return;
break;
case CONST_DOUBLE:
- fprintf (stream, "#%s", fp_immediate_constant (x));
+ if (TARGET_NEON)
+ {
+ char fpstr[20];
+ real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+ sizeof (fpstr), 0, 1);
+ fprintf (stream, "#%s", fpstr);
+ }
+ else
+ fprintf (stream, "#%s", fp_immediate_constant (x));
break;
default:
static bool
arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
{
+ enum machine_mode mode;
+
if (size == UNITS_PER_WORD && aligned_p)
{
fputs ("\t.word\t", asm_out_file);
if (NEED_GOT_RELOC && flag_pic && making_const_table &&
(GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
{
- if (GET_CODE (x) == SYMBOL_REF
- && (CONSTANT_POOL_ADDRESS_P (x)
- || SYMBOL_REF_LOCAL_P (x)))
- fputs ("(GOTOFF)", asm_out_file);
- else if (GET_CODE (x) == LABEL_REF)
- fputs ("(GOTOFF)", asm_out_file);
- else
+ /* See legitimize_pic_address for an explanation of the
+ TARGET_VXWORKS_RTP check. */
+ if (TARGET_VXWORKS_RTP
+ || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
fputs ("(GOT)", asm_out_file);
+ else
+ fputs ("(GOTOFF)", asm_out_file);
}
fputc ('\n', asm_out_file);
return true;
}
- if (arm_vector_mode_supported_p (GET_MODE (x)))
+ mode = GET_MODE (x);
+
+ if (arm_vector_mode_supported_p (mode))
{
int i, units;
+ unsigned int invmask = 0, parts_per_word;
gcc_assert (GET_CODE (x) == CONST_VECTOR);
units = CONST_VECTOR_NUNITS (x);
+ size = GET_MODE_SIZE (GET_MODE_INNER (mode));
- switch (GET_MODE (x))
- {
- case V2SImode: size = 4; break;
- case V4HImode: size = 2; break;
- case V8QImode: size = 1; break;
- default:
- gcc_unreachable ();
- }
+ /* For big-endian Neon vectors, we must permute the vector to the form
+ which, when loaded by a VLDR or VLDM instruction, will give a vector
+ with the elements in the right order. */
+ if (TARGET_NEON && WORDS_BIG_ENDIAN)
+ {
+ parts_per_word = UNITS_PER_WORD / size;
+ /* FIXME: This might be wrong for 64-bit vector elements, but we don't
+ support those anywhere yet. */
+ invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
+ }
- for (i = 0; i < units; i++)
- {
- rtx elt;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ for (i = 0; i < units; i++)
+ {
+ rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
+ assemble_integer
+ (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
+ }
+ else
+ for (i = 0; i < units; i++)
+ {
+ rtx elt = CONST_VECTOR_ELT (x, i);
+ REAL_VALUE_TYPE rval;
- elt = CONST_VECTOR_ELT (x, i);
- assemble_integer
- (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
- }
+ REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
+
+ assemble_real
+ (rval, GET_MODE_INNER (mode),
+ i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
+ }
return true;
}
return default_assemble_integer (x, size, aligned_p);
}
-
-/* Add a function to the list of static constructors. */
-
static void
-arm_elf_asm_constructor (rtx symbol, int priority)
+arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
{
section *s;
if (!TARGET_AAPCS_BASED)
{
- default_named_section_asm_out_constructor (symbol, priority);
+ (is_ctor ?
+ default_named_section_asm_out_constructor
+ : default_named_section_asm_out_destructor) (symbol, priority);
return;
}
if (priority != DEFAULT_INIT_PRIORITY)
{
char buf[18];
- sprintf (buf, ".init_array.%.5u", priority);
+ sprintf (buf, "%s.%.5u",
+ is_ctor ? ".init_array" : ".fini_array",
+ priority);
s = get_section (buf, SECTION_WRITE, NULL_TREE);
}
- else
+ else if (is_ctor)
s = ctors_section;
+ else
+ s = dtors_section;
switch_to_section (s);
assemble_align (POINTER_SIZE);
output_addr_const (asm_out_file, symbol);
fputs ("(target1)\n", asm_out_file);
}
+
+/* Add a function to the list of static constructors. */
+
+static void
+arm_elf_asm_constructor (rtx symbol, int priority)
+{
+ arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
+}
+
+/* Add a function to the list of static destructors. */
+
+static void
+arm_elf_asm_destructor (rtx symbol, int priority)
+{
+ arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
+}
#endif
\f
/* A finite state machine takes care of noticing whether or not instructions
&& IS_VFP_REGNUM (regno))
{
if (mode == SFmode || mode == SImode)
- return TRUE;
+ return VFP_REGNO_OK_FOR_SINGLE (regno);
- /* DFmode values are only valid in even register pairs. */
if (mode == DFmode)
- return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
+ return VFP_REGNO_OK_FOR_DOUBLE (regno);
+
+ if (TARGET_NEON)
+ return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
+ || (VALID_NEON_QREG_MODE (mode)
+ && NEON_REGNO_OK_FOR_QUAD (regno))
+ || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
+ || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
+ || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+ || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
+ || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
+
return FALSE;
}
/* We allow any value to be stored in the general registers.
Restrict doubleword quantities to even register pairs so that we can
- use ldrd. */
+ use ldrd. Do not allow Neon structure opaque modes in general registers;
+ they would use too many. */
if (regno <= LAST_ARM_REGNUM)
- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
+ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
+ && !VALID_NEON_STRUCT_MODE (mode);
if (regno == FRAME_POINTER_REGNUM
|| regno == ARG_POINTER_REGNUM)
return CIRRUS_REGS;
if (IS_VFP_REGNUM (regno))
- return VFP_REGS;
+ {
+ if (regno <= D7_VFP_REGNUM)
+ return VFP_D0_D7_REGS;
+ else if (regno <= LAST_LO_VFP_REGNUM)
+ return VFP_LO_REGS;
+ else
+ return VFP_HI_REGS;
+ }
if (IS_IWMMXT_REGNUM (regno))
return IWMMXT_REGS;
IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
- IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
+ IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
- IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
+ IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
- IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
+ IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
- IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
+ IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
- IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
+ IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
- IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
+ IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
NULL, const_nothrow);
}
+typedef enum {
+ T_V8QI = 0x0001,
+ T_V4HI = 0x0002,
+ T_V2SI = 0x0004,
+ T_V2SF = 0x0008,
+ T_DI = 0x0010,
+ T_V16QI = 0x0020,
+ T_V8HI = 0x0040,
+ T_V4SI = 0x0080,
+ T_V4SF = 0x0100,
+ T_V2DI = 0x0200,
+ T_TI = 0x0400,
+ T_EI = 0x0800,
+ T_OI = 0x1000
+} neon_builtin_type_bits;
+
+#define v8qi_UP T_V8QI
+#define v4hi_UP T_V4HI
+#define v2si_UP T_V2SI
+#define v2sf_UP T_V2SF
+#define di_UP T_DI
+#define v16qi_UP T_V16QI
+#define v8hi_UP T_V8HI
+#define v4si_UP T_V4SI
+#define v4sf_UP T_V4SF
+#define v2di_UP T_V2DI
+#define ti_UP T_TI
+#define ei_UP T_EI
+#define oi_UP T_OI
+
+#define UP(X) X##_UP
+
+#define T_MAX 13
+
+typedef enum {
+ NEON_BINOP,
+ NEON_TERNOP,
+ NEON_UNOP,
+ NEON_GETLANE,
+ NEON_SETLANE,
+ NEON_CREATE,
+ NEON_DUP,
+ NEON_DUPLANE,
+ NEON_COMBINE,
+ NEON_SPLIT,
+ NEON_LANEMUL,
+ NEON_LANEMULL,
+ NEON_LANEMULH,
+ NEON_LANEMAC,
+ NEON_SCALARMUL,
+ NEON_SCALARMULL,
+ NEON_SCALARMULH,
+ NEON_SCALARMAC,
+ NEON_CONVERT,
+ NEON_FIXCONV,
+ NEON_SELECT,
+ NEON_RESULTPAIR,
+ NEON_REINTERP,
+ NEON_VTBL,
+ NEON_VTBX,
+ NEON_LOAD1,
+ NEON_LOAD1LANE,
+ NEON_STORE1,
+ NEON_STORE1LANE,
+ NEON_LOADSTRUCT,
+ NEON_LOADSTRUCTLANE,
+ NEON_STORESTRUCT,
+ NEON_STORESTRUCTLANE,
+ NEON_LOGICBINOP,
+ NEON_SHIFTINSERT,
+ NEON_SHIFTIMM,
+ NEON_SHIFTACC
+} neon_itype;
+
+typedef struct {
+ const char *name;
+ const neon_itype itype;
+ const neon_builtin_type_bits bits;
+ const enum insn_code codes[T_MAX];
+ const unsigned int num_vars;
+ unsigned int base_fcode;
+} neon_builtin_datum;
+
+#define CF(N,X) CODE_FOR_neon_##N##X
+
+#define VAR1(T, N, A) \
+ #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
+#define VAR2(T, N, A, B) \
+ #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
+#define VAR3(T, N, A, B, C) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C), \
+ { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
+#define VAR4(T, N, A, B, C, D) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
+#define VAR5(T, N, A, B, C, D, E) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
+#define VAR6(T, N, A, B, C, D, E, F) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+ CF (N, G) }, 7, 0
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+ | UP (H), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+ CF (N, G), CF (N, H) }, 8, 0
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+ | UP (H) | UP (I), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+ CF (N, G), CF (N, H), CF (N, I) }, 9, 0
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+ #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+ | UP (H) | UP (I) | UP (J), \
+ { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+ CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
+
+/* The mode entries in the following table correspond to the "key" type of the
+ instruction variant, i.e. equivalent to that which would be specified after
+ the assembler mnemonic, which usually refers to the last vector operand.
+ (Signed/unsigned/polynomial types are not differentiated between though, and
+ are all mapped onto the same mode for a given element size.) The modes
+ listed per instruction should be the same as those defined for that
+ instruction's pattern in neon.md.
+ WARNING: Variants should be listed in the same increasing order as
+ neon_builtin_type_bits. */
+
+static neon_builtin_datum neon_builtin_data[] =
+{
+ { VAR10 (BINOP, vadd,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
+ { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
+ { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
+ { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
+ { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
+ { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
+ { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
+ { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
+ { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
+ { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
+ { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
+ { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
+ { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
+ { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
+ { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
+ { VAR2 (BINOP, vqdmull, v4hi, v2si) },
+ { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
+ { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
+ { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
+ { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
+ { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR10 (BINOP, vsub,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
+ { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
+ { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
+ { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR2 (BINOP, vcage, v2sf, v4sf) },
+ { VAR2 (BINOP, vcagt, v2sf, v4sf) },
+ { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
+ { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
+ { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
+ { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
+ { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
+ { VAR2 (BINOP, vrecps, v2sf, v4sf) },
+ { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
+ { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+ { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ { VAR2 (UNOP, vcnt, v8qi, v16qi) },
+ { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
+ { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
+ { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ /* FIXME: vget_lane supports more variants than this! */
+ { VAR10 (GETLANE, vget_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (SETLANE, vset_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR10 (DUP, vdup_n,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (DUPLANE, vdup_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
+ { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
+ { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
+ { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
+ { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
+ { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
+ { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
+ { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
+ { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
+ { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
+ { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
+ { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
+ { VAR10 (BINOP, vext,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
+ { VAR2 (UNOP, vrev16, v8qi, v16qi) },
+ { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
+ { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
+ { VAR10 (SELECT, vbsl,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR1 (VTBL, vtbl1, v8qi) },
+ { VAR1 (VTBL, vtbl2, v8qi) },
+ { VAR1 (VTBL, vtbl3, v8qi) },
+ { VAR1 (VTBL, vtbl4, v8qi) },
+ { VAR1 (VTBX, vtbx1, v8qi) },
+ { VAR1 (VTBX, vtbx2, v8qi) },
+ { VAR1 (VTBX, vtbx3, v8qi) },
+ { VAR1 (VTBX, vtbx4, v8qi) },
+ { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+ { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOAD1, vld1,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOAD1LANE, vld1_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOAD1, vld1_dup,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (STORE1, vst1,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (STORE1LANE, vst1_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR9 (LOADSTRUCT,
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (LOADSTRUCTLANE, vld2_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR9 (STORESTRUCT, vst2,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (STORESTRUCTLANE, vst2_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR9 (LOADSTRUCT,
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (LOADSTRUCTLANE, vld3_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR9 (STORESTRUCT, vst3,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (STORESTRUCTLANE, vst3_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR9 (LOADSTRUCT, vld4,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (LOADSTRUCTLANE, vld4_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
+ { VAR9 (STORESTRUCT, vst4,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+ { VAR7 (STORESTRUCTLANE, vst4_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+ { VAR10 (LOGICBINOP, vand,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOGICBINOP, vorr,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (BINOP, veor,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOGICBINOP, vbic,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+ { VAR10 (LOGICBINOP, vorn,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
+};
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
+static void
+arm_init_neon_builtins (void)
+{
+ unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
+
+ /* Create distinguished type nodes for NEON vector element types,
+ and pointers to values of such types, so we can detect them later. */
+ tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+ tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+ tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+ tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+ tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
+ tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
+ tree neon_float_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
+ layout_type (neon_float_type_node);
+
+ /* Define typedefs which exactly correspond to the modes we are basing vector
+ types on. If you change these names you'll need to change
+ the table used by arm_mangle_type too. */
+ (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
+ "__builtin_neon_qi");
+ (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
+ "__builtin_neon_hi");
+ (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
+ "__builtin_neon_si");
+ (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
+ "__builtin_neon_sf");
+ (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
+ "__builtin_neon_di");
+
+ (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
+ "__builtin_neon_poly8");
+ (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
+ "__builtin_neon_poly16");
+
+ tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
+ tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
+ tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
+ tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
+ tree float_pointer_node = build_pointer_type (neon_float_type_node);
+
+ /* Next create constant-qualified versions of the above types. */
+ tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
+ TYPE_QUAL_CONST);
+ tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
+ TYPE_QUAL_CONST);
+ tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
+ TYPE_QUAL_CONST);
+ tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
+ TYPE_QUAL_CONST);
+ tree const_float_node = build_qualified_type (neon_float_type_node,
+ TYPE_QUAL_CONST);
+
+ tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
+ tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
+ tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
+ tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
+ tree const_float_pointer_node = build_pointer_type (const_float_node);
+
+ /* Now create vector types based on our NEON element types. */
+ /* 64-bit vectors. */
+ tree V8QI_type_node =
+ build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
+ tree V4HI_type_node =
+ build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+ tree V2SI_type_node =
+ build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
+ tree V2SF_type_node =
+ build_vector_type_for_mode (neon_float_type_node, V2SFmode);
+ /* 128-bit vectors. */
+ tree V16QI_type_node =
+ build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
+ tree V8HI_type_node =
+ build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
+ tree V4SI_type_node =
+ build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
+ tree V4SF_type_node =
+ build_vector_type_for_mode (neon_float_type_node, V4SFmode);
+ tree V2DI_type_node =
+ build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
+
+ /* Unsigned integer types for various mode sizes. */
+ tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
+ tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
+ tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
+ tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+
+ (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
+ "__builtin_neon_uqi");
+ (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
+ "__builtin_neon_uhi");
+ (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
+ "__builtin_neon_usi");
+ (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
+ "__builtin_neon_udi");
+
+ /* Opaque integer types for structures of vectors. */
+ tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
+ tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
+ tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
+ tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
+
+ (*lang_hooks.types.register_builtin_type) (intTI_type_node,
+ "__builtin_neon_ti");
+ (*lang_hooks.types.register_builtin_type) (intEI_type_node,
+ "__builtin_neon_ei");
+ (*lang_hooks.types.register_builtin_type) (intOI_type_node,
+ "__builtin_neon_oi");
+ (*lang_hooks.types.register_builtin_type) (intCI_type_node,
+ "__builtin_neon_ci");
+ (*lang_hooks.types.register_builtin_type) (intXI_type_node,
+ "__builtin_neon_xi");
+
+ /* Pointers to vector types. */
+ tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
+ tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
+ tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
+ tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
+ tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
+ tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
+ tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
+ tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
+ tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
+
+ /* Operations which return results as pairs. */
+ tree void_ftype_pv8qi_v8qi_v8qi =
+ build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
+ V8QI_type_node, NULL);
+ tree void_ftype_pv4hi_v4hi_v4hi =
+ build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
+ V4HI_type_node, NULL);
+ tree void_ftype_pv2si_v2si_v2si =
+ build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
+ V2SI_type_node, NULL);
+ tree void_ftype_pv2sf_v2sf_v2sf =
+ build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
+ V2SF_type_node, NULL);
+ tree void_ftype_pdi_di_di =
+ build_function_type_list (void_type_node, intDI_pointer_node,
+ neon_intDI_type_node, neon_intDI_type_node, NULL);
+ tree void_ftype_pv16qi_v16qi_v16qi =
+ build_function_type_list (void_type_node, V16QI_pointer_node,
+ V16QI_type_node, V16QI_type_node, NULL);
+ tree void_ftype_pv8hi_v8hi_v8hi =
+ build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
+ V8HI_type_node, NULL);
+ tree void_ftype_pv4si_v4si_v4si =
+ build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
+ V4SI_type_node, NULL);
+ tree void_ftype_pv4sf_v4sf_v4sf =
+ build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
+ V4SF_type_node, NULL);
+ tree void_ftype_pv2di_v2di_v2di =
+ build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
+ V2DI_type_node, NULL);
+
+ tree reinterp_ftype_dreg[5][5];
+ tree reinterp_ftype_qreg[5][5];
+ tree dreg_types[5], qreg_types[5];
+
+ dreg_types[0] = V8QI_type_node;
+ dreg_types[1] = V4HI_type_node;
+ dreg_types[2] = V2SI_type_node;
+ dreg_types[3] = V2SF_type_node;
+ dreg_types[4] = neon_intDI_type_node;
+
+ qreg_types[0] = V16QI_type_node;
+ qreg_types[1] = V8HI_type_node;
+ qreg_types[2] = V4SI_type_node;
+ qreg_types[3] = V4SF_type_node;
+ qreg_types[4] = V2DI_type_node;
+
+ for (i = 0; i < 5; i++)
+ {
+ int j;
+ for (j = 0; j < 5; j++)
+ {
+ reinterp_ftype_dreg[i][j]
+ = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
+ reinterp_ftype_qreg[i][j]
+ = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
+ {
+ neon_builtin_datum *d = &neon_builtin_data[i];
+ unsigned int j, codeidx = 0;
+
+ d->base_fcode = fcode;
+
+ for (j = 0; j < T_MAX; j++)
+ {
+ const char* const modenames[] = {
+ "v8qi", "v4hi", "v2si", "v2sf", "di",
+ "v16qi", "v8hi", "v4si", "v4sf", "v2di"
+ };
+ char namebuf[60];
+ tree ftype = NULL;
+ enum insn_code icode;
+ int is_load = 0, is_store = 0;
+
+ if ((d->bits & (1 << j)) == 0)
+ continue;
+
+ icode = d->codes[codeidx++];
+
+ switch (d->itype)
+ {
+ case NEON_LOAD1:
+ case NEON_LOAD1LANE:
+ case NEON_LOADSTRUCT:
+ case NEON_LOADSTRUCTLANE:
+ is_load = 1;
+ /* Fall through. */
+ case NEON_STORE1:
+ case NEON_STORE1LANE:
+ case NEON_STORESTRUCT:
+ case NEON_STORESTRUCTLANE:
+ if (!is_load)
+ is_store = 1;
+ /* Fall through. */
+ case NEON_UNOP:
+ case NEON_BINOP:
+ case NEON_LOGICBINOP:
+ case NEON_SHIFTINSERT:
+ case NEON_TERNOP:
+ case NEON_GETLANE:
+ case NEON_SETLANE:
+ case NEON_CREATE:
+ case NEON_DUP:
+ case NEON_DUPLANE:
+ case NEON_SHIFTIMM:
+ case NEON_SHIFTACC:
+ case NEON_COMBINE:
+ case NEON_SPLIT:
+ case NEON_CONVERT:
+ case NEON_FIXCONV:
+ case NEON_LANEMUL:
+ case NEON_LANEMULL:
+ case NEON_LANEMULH:
+ case NEON_LANEMAC:
+ case NEON_SCALARMUL:
+ case NEON_SCALARMULL:
+ case NEON_SCALARMULH:
+ case NEON_SCALARMAC:
+ case NEON_SELECT:
+ case NEON_VTBL:
+ case NEON_VTBX:
+ {
+ int k;
+ tree return_type = void_type_node, args = void_list_node;
+
+ /* Build a function type directly from the insn_data for this
+ builtin. The build_function_type() function takes care of
+ removing duplicates for us. */
+ for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
+ {
+ tree eltype;
+
+ if (is_load && k == 1)
+ {
+ /* Neon load patterns always have the memory operand
+ (a SImode pointer) in the operand 1 position. We
+ want a const pointer to the element type in that
+ position. */
+ gcc_assert (insn_data[icode].operand[k].mode == SImode);
+
+ switch (1 << j)
+ {
+ case T_V8QI:
+ case T_V16QI:
+ eltype = const_intQI_pointer_node;
+ break;
+
+ case T_V4HI:
+ case T_V8HI:
+ eltype = const_intHI_pointer_node;
+ break;
+
+ case T_V2SI:
+ case T_V4SI:
+ eltype = const_intSI_pointer_node;
+ break;
+
+ case T_V2SF:
+ case T_V4SF:
+ eltype = const_float_pointer_node;
+ break;
+
+ case T_DI:
+ case T_V2DI:
+ eltype = const_intDI_pointer_node;
+ break;
+
+ default: gcc_unreachable ();
+ }
+ }
+ else if (is_store && k == 0)
+ {
+ /* Similarly, Neon store patterns use operand 0 as
+ the memory location to store to (a SImode pointer).
+ Use a pointer to the element type of the store in
+ that position. */
+ gcc_assert (insn_data[icode].operand[k].mode == SImode);
+
+ switch (1 << j)
+ {
+ case T_V8QI:
+ case T_V16QI:
+ eltype = intQI_pointer_node;
+ break;
+
+ case T_V4HI:
+ case T_V8HI:
+ eltype = intHI_pointer_node;
+ break;
+
+ case T_V2SI:
+ case T_V4SI:
+ eltype = intSI_pointer_node;
+ break;
+
+ case T_V2SF:
+ case T_V4SF:
+ eltype = float_pointer_node;
+ break;
+
+ case T_DI:
+ case T_V2DI:
+ eltype = intDI_pointer_node;
+ break;
+
+ default: gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (insn_data[icode].operand[k].mode)
+ {
+ case VOIDmode: eltype = void_type_node; break;
+ /* Scalars. */
+ case QImode: eltype = neon_intQI_type_node; break;
+ case HImode: eltype = neon_intHI_type_node; break;
+ case SImode: eltype = neon_intSI_type_node; break;
+ case SFmode: eltype = neon_float_type_node; break;
+ case DImode: eltype = neon_intDI_type_node; break;
+ case TImode: eltype = intTI_type_node; break;
+ case EImode: eltype = intEI_type_node; break;
+ case OImode: eltype = intOI_type_node; break;
+ case CImode: eltype = intCI_type_node; break;
+ case XImode: eltype = intXI_type_node; break;
+ /* 64-bit vectors. */
+ case V8QImode: eltype = V8QI_type_node; break;
+ case V4HImode: eltype = V4HI_type_node; break;
+ case V2SImode: eltype = V2SI_type_node; break;
+ case V2SFmode: eltype = V2SF_type_node; break;
+ /* 128-bit vectors. */
+ case V16QImode: eltype = V16QI_type_node; break;
+ case V8HImode: eltype = V8HI_type_node; break;
+ case V4SImode: eltype = V4SI_type_node; break;
+ case V4SFmode: eltype = V4SF_type_node; break;
+ case V2DImode: eltype = V2DI_type_node; break;
+ default: gcc_unreachable ();
+ }
+ }
+
+ if (k == 0 && !is_store)
+ return_type = eltype;
+ else
+ args = tree_cons (NULL_TREE, eltype, args);
+ }
+
+ ftype = build_function_type (return_type, args);
+ }
+ break;
+
+ case NEON_RESULTPAIR:
+ {
+ switch (insn_data[icode].operand[1].mode)
+ {
+ case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
+ case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
+ case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
+ case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
+ case DImode: ftype = void_ftype_pdi_di_di; break;
+ case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
+ case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
+ case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
+ case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
+ case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+ default: gcc_unreachable ();
+ }
+ }
+ break;
+
+ case NEON_REINTERP:
+ {
+ /* We iterate over 5 doubleword types, then 5 quadword
+ types. */
+ int rhs = j % 5;
+ switch (insn_data[icode].operand[0].mode)
+ {
+ case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
+ case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
+ case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
+ case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
+ case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
+ case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
+ case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
+ case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
+ case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
+ case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
+ default: gcc_unreachable ();
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (ftype != NULL);
+
+ sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+
+ add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
+ NULL_TREE);
+ }
+ }
+}
+
static void
arm_init_builtins (void)
{
if (TARGET_REALLY_IWMMXT)
arm_init_iwmmxt_builtins ();
+
+ if (TARGET_NEON)
+ arm_init_neon_builtins ();
}
/* Errors in the source file can cause expand_expr to return const0_rtx
return target;
}
+static int
+neon_builtin_compare (const void *a, const void *b)
+{
+ const neon_builtin_datum *key = a;
+ const neon_builtin_datum *memb = b;
+ unsigned int soughtcode = key->base_fcode;
+
+ if (soughtcode >= memb->base_fcode
+ && soughtcode < memb->base_fcode + memb->num_vars)
+ return 0;
+ else if (soughtcode < memb->base_fcode)
+ return -1;
+ else
+ return 1;
+}
+
+static enum insn_code
+locate_neon_builtin_icode (int fcode, neon_itype *itype)
+{
+ neon_builtin_datum key, *found;
+ int idx;
+
+ key.base_fcode = fcode;
+ found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
+ sizeof (neon_builtin_data[0]), neon_builtin_compare);
+ gcc_assert (found);
+ idx = fcode - (int) found->base_fcode;
+ gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
+
+ if (itype)
+ *itype = found->itype;
+
+ return found->codes[idx];
+}
+
+typedef enum {
+ NEON_ARG_COPY_TO_REG,
+ NEON_ARG_CONSTANT,
+ NEON_ARG_STOP
+} builtin_arg;
+
+#define NEON_MAX_BUILTIN_ARGS 5
+
+/* Expand a Neon builtin. */
+static rtx
+arm_expand_neon_args (rtx target, int icode, int have_retval,
+ tree exp, ...)
+{
+ va_list ap;
+ rtx pat;
+ tree arg[NEON_MAX_BUILTIN_ARGS];
+ rtx op[NEON_MAX_BUILTIN_ARGS];
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
+ int argc = 0;
+
+ if (have_retval
+ && (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
+ target = gen_reg_rtx (tmode);
+
+ va_start (ap, exp);
+
+ for (;;)
+ {
+ builtin_arg thisarg = va_arg (ap, int);
+
+ if (thisarg == NEON_ARG_STOP)
+ break;
+ else
+ {
+ arg[argc] = CALL_EXPR_ARG (exp, argc);
+ op[argc] = expand_normal (arg[argc]);
+ mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
+
+ switch (thisarg)
+ {
+ case NEON_ARG_COPY_TO_REG:
+ /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
+ if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+ (op[argc], mode[argc]))
+ op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+ break;
+
+ case NEON_ARG_CONSTANT:
+ /* FIXME: This error message is somewhat unhelpful. */
+ if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+ (op[argc], mode[argc]))
+ error ("argument must be a constant");
+ break;
+
+ case NEON_ARG_STOP:
+ gcc_unreachable ();
+ }
+
+ argc++;
+ }
+ }
+
+ va_end (ap);
+
+ if (have_retval)
+ switch (argc)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ else
+ switch (argc)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!pat)
+ return 0;
+
+ emit_insn (pat);
+
+ return target;
+}
+
+/* Expand a Neon builtin. These are "special" because they don't have symbolic
+ constants defined per-instruction or per instruction-variant. Instead, the
+ required info is looked up in the table neon_builtin_data. */
+static rtx
+arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+{
+ neon_itype itype;
+ enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+
+ switch (itype)
+ {
+ case NEON_UNOP:
+ case NEON_CONVERT:
+ case NEON_DUPLANE:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_BINOP:
+ case NEON_SETLANE:
+ case NEON_SCALARMUL:
+ case NEON_SCALARMULL:
+ case NEON_SCALARMULH:
+ case NEON_SHIFTINSERT:
+ case NEON_LOGICBINOP:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ NEON_ARG_STOP);
+
+ case NEON_TERNOP:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+ NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_GETLANE:
+ case NEON_FIXCONV:
+ case NEON_SHIFTIMM:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
+ NEON_ARG_STOP);
+
+ case NEON_CREATE:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+ case NEON_DUP:
+ case NEON_SPLIT:
+ case NEON_REINTERP:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+ case NEON_COMBINE:
+ case NEON_VTBL:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+ case NEON_RESULTPAIR:
+ return arm_expand_neon_args (target, icode, 0, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+ NEON_ARG_STOP);
+
+ case NEON_LANEMUL:
+ case NEON_LANEMULL:
+ case NEON_LANEMULH:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_LANEMAC:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+ NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_SHIFTACC:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_SCALARMAC:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+ NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+ case NEON_SELECT:
+ case NEON_VTBX:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+ NEON_ARG_STOP);
+
+ case NEON_LOAD1:
+ case NEON_LOADSTRUCT:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+ case NEON_LOAD1LANE:
+ case NEON_LOADSTRUCTLANE:
+ return arm_expand_neon_args (target, icode, 1, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ NEON_ARG_STOP);
+
+ case NEON_STORE1:
+ case NEON_STORESTRUCT:
+ return arm_expand_neon_args (target, icode, 0, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+ case NEON_STORE1LANE:
+ case NEON_STORESTRUCTLANE:
+ return arm_expand_neon_args (target, icode, 0, exp,
+ NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+ NEON_ARG_STOP);
+ }
+
+ gcc_unreachable ();
+}
+
+/* Emit code to reinterpret one Neon type as another, without altering bits. */
+void
+neon_reinterpret (rtx dest, rtx src)
+{
+ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
+}
+
+/* Emit code to place a Neon pair result in memory locations (with equal
+ registers). */
+void
+neon_emit_pair_result_insn (enum machine_mode mode,
+ rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
+ rtx op1, rtx op2)
+{
+ rtx mem = gen_rtx_MEM (mode, destaddr);
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx tmp2 = gen_reg_rtx (mode);
+
+ emit_insn (intfn (tmp1, op1, tmp2, op2));
+
+ emit_move_insn (mem, tmp1);
+ mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
+ emit_move_insn (mem, tmp2);
+}
+
+/* Set up operands for a register copy from src to dest, taking care not to
+ clobber registers in the process.
+ FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
+ be called with a large N, so that should be OK. */
+
+void
+neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
+{
+ unsigned int copied = 0, opctr = 0;
+ unsigned int done = (1 << count) - 1;
+ unsigned int i, j;
+
+ while (copied != done)
+ {
+ for (i = 0; i < count; i++)
+ {
+ int good = 1;
+
+ for (j = 0; good && j < count; j++)
+ if (i != j && (copied & (1 << j)) == 0
+ && reg_overlap_mentioned_p (src[j], dest[i]))
+ good = 0;
+
+ if (good)
+ {
+ operands[opctr++] = dest[i];
+ operands[opctr++] = src[i];
+ copied |= 1 << i;
+ }
+ }
+ }
+
+ gcc_assert (opctr == count * 2);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
enum machine_mode mode1;
enum machine_mode mode2;
+ if (fcode >= ARM_BUILTIN_NEON_BASE)
+ return arm_expand_neon_builtin (fcode, exp, target);
+
switch (fcode)
{
case ARM_BUILTIN_TEXTRMSB:
{
/* If we can deduce the registers used from the function's
return value. This is more reliable that examining
- regs_ever_live[] because that will be set if the register is
+ df_regs_ever_live_p () because that will be set if the register is
ever used in the function, not just if the register is used
to hold a return value. */
If we need doubleword stack alignment this could affect the other
elimination offsets so we can't risk getting it wrong. */
- if (regs_ever_live [ARG_POINTER_REGNUM])
+ if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
cfun->machine->arg_pointer_live = 1;
else if (!cfun->machine->arg_pointer_live)
return 0;
high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
/* If we can deduce the registers used from the function's return value.
- This is more reliable that examining regs_ever_live[] because that
+ This is more reliable that examining df_regs_ever_live_p () because that
will be set if the register is ever used in the function, not just if
the register is used to hold a return value. */
size = arm_size_return_regs ();
cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
if (live_regs_mask & 0xff)
cfun->machine->lr_save_eliminated = 0;
-
- /* If the link register is being kept alive, with the return address in it,
- then make sure that it does not get reused by the ce2 pass. */
- if (cfun->machine->lr_save_eliminated)
- emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
}
amount = offsets->locals_base - offsets->saved_regs;
}
+ gcc_assert (amount >= 0);
if (amount)
{
if (amount < 512)
/* Emit a clobber for each insn that will be restored in the epilogue,
so that flow2 will get register lifetimes correct. */
for (regno = 0; regno < 13; regno++)
- if (regs_ever_live[regno] && !call_used_regs[regno])
+ if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
- if (! regs_ever_live[LR_REGNUM])
+ if (! df_regs_ever_live_p (LR_REGNUM))
emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
}
}
else
{
+ int set_float_abi_attributes = 0;
switch (arm_fpu_arch)
{
case FPUTYPE_FPA:
fpu_name = "maverick";
break;
case FPUTYPE_VFP:
- if (TARGET_HARD_FLOAT)
- asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
- if (TARGET_HARD_FLOAT_ABI)
- asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
fpu_name = "vfp";
+ set_float_abi_attributes = 1;
+ break;
+ case FPUTYPE_VFP3:
+ fpu_name = "vfp3";
+ set_float_abi_attributes = 1;
+ break;
+ case FPUTYPE_NEON:
+ fpu_name = "neon";
+ set_float_abi_attributes = 1;
break;
default:
abort();
}
+ if (set_float_abi_attributes)
+ {
+ if (TARGET_HARD_FLOAT)
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
+ if (TARGET_HARD_FLOAT_ABI)
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
+ }
}
asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
#endif
- /* If we are referencing a function that is weak then encode a long call
- flag in the function name, otherwise if the function is static or
- or known to be defined in this file then encode a short call flag. */
- if (first && DECL_P (decl))
- {
- if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
- arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
- else if (! TREE_PUBLIC (decl))
- arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
- }
-
default_encode_section_info (decl, rtl, first);
}
#endif /* !ARM_PE */
&& !reg_overlap_mentioned_p (value, XEXP (op, 0)));
}
-
/* We can't rely on the caller doing the proper promotion when
using APCS or ATPCS. */
return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
}
+/* Return non-zero if the consumer (a multiply-accumulate instruction)
+ has an accumulator dependency on the result of the producer (a
+ multiplication instruction) and no other dependency on that result. */
+int
+arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+{
+ rtx mul = PATTERN (producer);
+ rtx mac = PATTERN (consumer);
+ rtx mul_result;
+ rtx mac_op0, mac_op1, mac_acc;
+
+ if (GET_CODE (mul) == COND_EXEC)
+ mul = COND_EXEC_CODE (mul);
+ if (GET_CODE (mac) == COND_EXEC)
+ mac = COND_EXEC_CODE (mac);
+
+ /* Check that mul is of the form (set (...) (mult ...))
+ and mla is of the form (set (...) (plus (mult ...) (...))). */
+ if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
+ || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
+ || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
+ return 0;
+
+ mul_result = XEXP (mul, 0);
+ mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
+ mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
+ mac_acc = XEXP (XEXP (mac, 1), 1);
+
+ return (reg_overlap_mentioned_p (mul_result, mac_acc)
+ && !reg_overlap_mentioned_p (mul_result, mac_op0)
+ && !reg_overlap_mentioned_p (mul_result, mac_op1));
+}
+
/* The EABI says test the least significant bit of a guard variable. */
bool
arm_vector_mode_supported_p (enum machine_mode mode)
{
+ /* Neon also supports V2SImode, etc. listed in the clause below. */
+ if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
+ || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+ return true;
+
if ((mode == V2SImode)
|| (mode == V4HImode)
|| (mode == V8QImode))
if (IS_FPA_REGNUM (regno))
return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
+ /* FIXME: VFPv3 register numbering. */
if (IS_VFP_REGNUM (regno))
return 64 + regno - FIRST_VFP_REGNUM;
return TRUE;
}
+/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
+
+static void
+arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+ gcc_assert (size == 4);
+ fputs ("\t.word\t", file);
+ output_addr_const (file, x);
+ fputs ("(tlsldo)", file);
+}
+
bool
arm_output_addr_const_extra (FILE *fp, rtx x)
{
}
}
+/* A table and a function to perform ARM-specific name mangling for
+ NEON vector types in order to conform to the AAPCS (see "Procedure
+ Call Standard for the ARM Architecture", Appendix A). To qualify
+ for emission with the mangled names defined in that document, a
+ vector type must not only be of the correct mode but also be
+ composed of NEON vector element types (e.g. __builtin_neon_qi). */
+typedef struct
+{
+ enum machine_mode mode;
+ const char *element_type_name;
+ const char *aapcs_name;
+} arm_mangle_map_entry;
+
+static arm_mangle_map_entry arm_mangle_map[] = {
+ /* 64-bit containerized types. */
+ { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
+ { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
+ { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
+ { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
+ { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
+ { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
+ { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
+ { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
+ { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
+ /* 128-bit containerized types. */
+ { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
+ { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
+ { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
+ { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
+ { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
+ { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
+ { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
+ { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
+ { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
+ { VOIDmode, NULL, NULL }
+};
+
+const char *
+arm_mangle_type (tree type)
+{
+ arm_mangle_map_entry *pos = arm_mangle_map;
+
+ if (TREE_CODE (type) != VECTOR_TYPE)
+ return NULL;
+
+ /* Check the mode of the vector type, and the name of the vector
+ element type, against the table. */
+ while (pos->mode != VOIDmode)
+ {
+ tree elt_type = TREE_TYPE (type);
+
+ if (pos->mode == TYPE_MODE (type)
+ && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
+ && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
+ pos->element_type_name))
+ return pos->aapcs_name;
+
+ pos++;
+ }
+
+ /* Use the default mangling for unrecognized (possibly user-defined)
+ vector types. */
+ return NULL;
+}
+
#include "gt-arm.h"