-/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
-#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "insn-attr.h"
#include "output.h"
#include "basic-block.h"
#include "integrate.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
#include "ggc.h"
#include "hashtab.h"
#include "tm_p.h"
#include "cfglayout.h"
#include "sched-int.h"
#include "params.h"
-#include "assert.h"
#include "machmode.h"
#include "gimple.h"
#include "tm-constrs.h"
char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
/* Prototypes and external defs. */
+static void spu_option_override (void);
+static void spu_option_init_struct (struct gcc_options *opts);
+static void spu_option_default_params (void);
static void spu_init_builtins (void);
static tree spu_builtin_decl (unsigned, bool);
-static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
-static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
+static bool spu_scalar_mode_supported_p (enum machine_mode mode);
+static bool spu_vector_mode_supported_p (enum machine_mode mode);
static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
+ bool, addr_space_t);
static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
static rtx get_pic_reg (void);
static int need_to_save_reg (int regno, int saving);
static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
int flags,
- unsigned char *no_add_attrs);
+ bool *no_add_attrs);
static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
int flags,
- unsigned char *no_add_attrs);
+ bool *no_add_attrs);
static int spu_naked_function_p (tree func);
-static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- const_tree type, unsigned char named);
+static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named);
+static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named);
+static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named);
static tree spu_build_builtin_va_list (void);
static void spu_va_start (tree, rtx);
static tree spu_gimplify_va_arg_expr (tree valist, tree type,
static int reg_aligned_for_addr (rtx x);
static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
static void spu_asm_globalize_label (FILE * file, const char *name);
-static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
- int *total, bool speed);
-static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
+static bool spu_rtx_costs (rtx x, int code, int outer_code,
+ int *total, bool speed);
+static bool spu_function_ok_for_sibcall (tree decl, tree exp);
static void spu_init_libfuncs (void);
static bool spu_return_in_memory (const_tree type, const_tree fntype);
static void fix_range (const char *);
static void spu_encode_section_info (tree, rtx, int);
static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
+ addr_space_t);
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
-static int spu_builtin_vectorization_cost (bool);
+static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
static bool spu_vector_alignment_reachable (const_tree, bool);
static tree spu_builtin_vec_perm (tree, tree *);
+static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
+static enum machine_mode spu_addr_space_address_mode (addr_space_t);
+static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
+static rtx spu_addr_space_convert (rtx, tree, tree);
static int spu_sms_res_mii (struct ddg *g);
static void asm_file_start (void);
static unsigned int spu_section_type_flags (tree, const char *, int);
+static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
+static void spu_unique_section (tree, int);
static rtx spu_expand_load (rtx, rtx, rtx, int);
static void spu_trampoline_init (rtx, tree, rtx);
-
-extern const char *reg_names[];
+static void spu_conditional_register_usage (void);
/* Which instruction set architecture to use. */
int spu_arch;
static enum machine_mode
spu_libgcc_shift_count_mode (void);
+
+/* Pointer mode for __ea references. */
+#define EAmode (spu_ea_model != 32 ? DImode : SImode)
+
\f
/* Table of machine attributes. */
static const struct attribute_spec spu_attribute_table[] =
\f
/* TARGET overrides. */
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+ spu_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS spu_init_builtins
#undef TARGET_BUILTIN_DECL
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
+/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
+ and .quad for the debugger. When it is known that the assembler is fixed,
+ these can be removed. */
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
/* The .8byte directive doesn't seem to work well for a 32 bit
architecture. */
#undef TARGET_ASM_UNALIGNED_DI_OP
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG spu_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
+
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
-#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
-#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
#undef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION spu_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
+
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE spu_option_override
+
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
+
+#undef TARGET_OPTION_DEFAULT_PARAMS
+#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
+
struct gcc_target targetm = TARGET_INITIALIZER;
-void
-spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
+static void
+spu_option_init_struct (struct gcc_options *opts)
+{
+ /* With so many registers this is better on by default. */
+ opts->x_flag_rename_registers = 1;
+}
+
+/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
+static void
+spu_option_default_params (void)
{
/* Override some of the default param values. With so many registers
larger values are better for these params. */
- MAX_PENDING_LIST_LENGTH = 128;
-
- /* With so many registers this is better on by default. */
- flag_rename_registers = 1;
+ set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
}
-/* Sometimes certain combinations of command options do not make sense
- on a particular target machine. You can define a macro
- OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
- executed once just after all the command options have been parsed. */
-void
-spu_override_options (void)
+/* Implement TARGET_OPTION_OVERRIDE. */
+static void
+spu_option_override (void)
{
/* Small loops will be unpeeled at -O3. For SPU it is more important
to keep code small by default. */
- if (!flag_unroll_loops && !flag_peel_loops
- && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
- PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
+ if (!flag_unroll_loops && !flag_peel_loops)
+ maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 1,
+ global_options.x_param_values,
+ global_options_set.x_param_values);
flag_omit_frame_pointer = 1;
else if (strcmp (&spu_arch_string[0], "celledp") == 0)
spu_arch = PROCESSOR_CELLEDP;
else
- error ("Unknown architecture '%s'", &spu_arch_string[0]);
+ error ("unknown architecture %qs", &spu_arch_string[0]);
}
/* Determine processor to tune for. */
else if (strcmp (&spu_tune_string[0], "celledp") == 0)
spu_tune = PROCESSOR_CELLEDP;
else
- error ("Unknown architecture '%s'", &spu_tune_string[0]);
+ error ("unknown architecture %qs", &spu_tune_string[0]);
}
/* Change defaults according to the processor architecture. */
HOST_WIDE_INT width = INTVAL (ops[1]);
HOST_WIDE_INT start = INTVAL (ops[2]);
HOST_WIDE_INT maskbits;
- enum machine_mode dst_mode, src_mode;
+ enum machine_mode dst_mode;
rtx dst = ops[0], src = ops[3];
- int dst_size, src_size;
+ int dst_size;
rtx mask;
rtx shift_reg;
int shift;
src = force_reg (m, convert_to_mode (m, src, 0));
}
src = adjust_operand (src, 0);
- src_mode = GET_MODE (src);
- src_size = GET_MODE_BITSIZE (GET_MODE (src));
mask = gen_reg_rtx (dst_mode);
shift_reg = gen_reg_rtx (dst_mode);
if (eq_rtx == 0)
abort ();
emit_insn (eq_rtx);
- ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
+ ior_code = optab_handler (ior_optab, comp_mode);
gcc_assert (ior_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (ior_code)
(compare_result, compare_result, eq_result));
gcc_unreachable ();
}
-extern char call_used_regs[];
-
/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
caller saved register. For leaf functions it is more efficient to
use a volatile register because we won't need to save and restore the
rtx pic_reg = pic_offset_table_rtx;
if (!reload_completed && !reload_in_progress)
abort ();
+ if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
+ pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
return pic_reg;
}
rtx scratch_reg_0, scratch_reg_1;
rtx insn, real;
- /* A NOTE_INSN_DELETED is supposed to be at the start and end of
- the "toplevel" insn chain. */
- emit_note (NOTE_INSN_DELETED);
-
if (flag_pic && optimize == 0)
crtl->uses_pic_offset_table = 1;
}
}
- emit_note (NOTE_INSN_DELETED);
}
void
rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx jump, scratch_reg_0;
- /* A NOTE_INSN_DELETED is supposed to be at the start and end of
- the "toplevel" insn chain. */
- emit_note (NOTE_INSN_DELETED);
-
if (spu_naked_function_p (current_function_decl))
return;
emit_barrier_after (jump);
}
- emit_note (NOTE_INSN_DELETED);
}
rtx
return;
/* If we have a Basic block note, emit it after the basic block note. */
- if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
+ if (NOTE_INSN_BASIC_BLOCK_P (before))
before = NEXT_INSN (before);
branch_label = gen_label_rtx ();
return FALSE;
}
+/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
+
+static int
+ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+ rtx x = *px;
+ tree decl;
+
+ if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ rtx plus = XEXP (x, 0);
+ rtx op0 = XEXP (plus, 0);
+ rtx op1 = XEXP (plus, 1);
+ if (GET_CODE (op1) == CONST_INT)
+ x = op0;
+ }
+
+ return (GET_CODE (x) == SYMBOL_REF
+ && (decl = SYMBOL_REF_DECL (x)) != 0
+ && TREE_CODE (decl) == VAR_DECL
+ && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
/* We accept:
- any 32-bit constant (SImode, SFmode)
- any constant that can be generated with fsmbi (any mode)
{
if (GET_CODE (x) == HIGH)
x = XEXP (x, 0);
+
+ /* Reject any __ea qualified reference. These can't appear in
+ instructions but must be forced to the constant pool. */
+ if (for_each_rtx (&x, ea_symbol_ref, 0))
+ return 0;
+
/* V4SI with all identical symbols is valid. */
if (!flag_pic
&& GET_MODE (x) == V4SImode
switch (GET_CODE (x))
{
case LABEL_REF:
+ return !TARGET_LARGE_MEM;
+
case SYMBOL_REF:
case CONST:
+ /* Keep __ea references until reload so that spu_expand_mov can see them
+ in MEMs. */
+ if (ea_symbol_ref (&x, 0))
+ return !reload_in_progress && !reload_completed;
return !TARGET_LARGE_MEM;
case CONST_INT:
return FALSE;
}
+/* Like spu_legitimate_address_p, except with named addresses. */
+static bool
+spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+ bool reg_ok_strict, addr_space_t as)
+{
+ if (as == ADDR_SPACE_EA)
+ return (REG_P (x) && (GET_MODE (x) == EAmode));
+
+ else if (as != ADDR_SPACE_GENERIC)
+ gcc_unreachable ();
+
+ return spu_legitimate_address_p (mode, x, reg_ok_strict);
+}
+
/* When the address is reg + const_int, force the const_int into a
register. */
rtx
return x;
}
+/* Like spu_legitimate_address, except with named address support. */
+static rtx
+spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+ addr_space_t as)
+{
+ if (as != ADDR_SPACE_GENERIC)
+ return x;
+
+ return spu_legitimize_address (x, oldx, mode);
+}
+
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
struct attribute_spec.handler. */
static tree
return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
}
-rtx
-spu_function_arg (CUMULATIVE_ARGS cum,
+static rtx
+spu_function_arg (CUMULATIVE_ARGS *cum,
enum machine_mode mode,
- tree type, int named ATTRIBUTE_UNUSED)
+ const_tree type, bool named ATTRIBUTE_UNUSED)
{
int byte_size;
- if (cum >= MAX_REGISTER_ARGS)
+ if (*cum >= MAX_REGISTER_ARGS)
return 0;
byte_size = ((mode == BLKmode)
/* The ABI does not allow parameters to be passed partially in
reg and partially in stack. */
- if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
+ if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
return 0;
/* Make sure small structs are left justified in a register. */
byte_size = 4;
smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
+ gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
const0_rtx);
return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
}
else
- return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
+ return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
+}
+
+static void
+spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+ *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+ ? 1
+ : mode == BLKmode
+ ? ((int_size_in_bytes (type) + 15) / 16)
+ : mode == VOIDmode
+ ? 1
+ : HARD_REGNO_NREGS (cum, mode));
}
/* Variable sized types are passed by reference. */
DECL_ALIGN (f_skip) = 128;
DECL_USER_ALIGN (f_skip) = 1;
- TREE_CHAIN (record) = type_decl;
+ TYPE_STUB_DECL (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_args;
- TREE_CHAIN (f_args) = f_skip;
+ DECL_CHAIN (f_args) = f_skip;
/* We know this is being padded and we want it too. It is an internal
type so hide the warnings from the user. */
tree args, skip, t;
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
- f_skip = TREE_CHAIN (f_args);
+ f_skip = DECL_CHAIN (f_args);
- valist = build_va_arg_indirect_ref (valist);
+ valist = build_simple_mem_ref (valist);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
bool pass_by_reference_p;
f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
- f_skip = TREE_CHAIN (f_args);
+ f_skip = DECL_CHAIN (f_args);
- valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
+ valist = build_simple_mem_ref (valist);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
/* cum currently points to the last named argument, we want to
start at the next argument. */
- FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
+ spu_function_arg_advance (&ncum, mode, type, true);
offset = -STACK_POINTER_OFFSET;
for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
}
}
\f
-void
+static void
spu_conditional_register_usage (void)
{
if (flag_pic)
return 0;
}
+static GTY(()) rtx cache_fetch; /* __cache_fetch function */
+static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
+static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
+
+/* MEM is known to be an __ea qualified memory access. Emit a call to
+ fetch the ppu memory to local store, and return its address in local
+ store. */
+
+static void
+ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+ if (is_store)
+ {
+ rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
+ if (!cache_fetch_dirty)
+ cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
+ emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
+ 2, ea_addr, EAmode, ndirty, SImode);
+ }
+ else
+ {
+ if (!cache_fetch)
+ cache_fetch = init_one_libfunc ("__cache_fetch");
+ emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
+ 1, ea_addr, EAmode);
+ }
+}
+
+/* Like ea_load_store, but do the cache tag comparison and, for stores,
+ dirty bit marking, inline.
+
+ The cache control data structure is an array of
+
+ struct __cache_tag_array
+ {
+ unsigned int tag_lo[4];
+ unsigned int tag_hi[4];
+ void *data_pointer[4];
+ int reserved[4];
+ vector unsigned short dirty_bits[4];
+ } */
+
+static void
+ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+ rtx ea_addr_si;
+ HOST_WIDE_INT v;
+ rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
+ rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
+ rtx index_mask = gen_reg_rtx (SImode);
+ rtx tag_arr = gen_reg_rtx (Pmode);
+ rtx splat_mask = gen_reg_rtx (TImode);
+ rtx splat = gen_reg_rtx (V4SImode);
+ rtx splat_hi = NULL_RTX;
+ rtx tag_index = gen_reg_rtx (Pmode);
+ rtx block_off = gen_reg_rtx (SImode);
+ rtx tag_addr = gen_reg_rtx (Pmode);
+ rtx tag = gen_reg_rtx (V4SImode);
+ rtx cache_tag = gen_reg_rtx (V4SImode);
+ rtx cache_tag_hi = NULL_RTX;
+ rtx cache_ptrs = gen_reg_rtx (TImode);
+ rtx cache_ptrs_si = gen_reg_rtx (SImode);
+ rtx tag_equal = gen_reg_rtx (V4SImode);
+ rtx tag_equal_hi = NULL_RTX;
+ rtx tag_eq_pack = gen_reg_rtx (V4SImode);
+ rtx tag_eq_pack_si = gen_reg_rtx (SImode);
+ rtx eq_index = gen_reg_rtx (SImode);
+ rtx bcomp, hit_label, hit_ref, cont_label, insn;
+
+ if (spu_ea_model != 32)
+ {
+ splat_hi = gen_reg_rtx (V4SImode);
+ cache_tag_hi = gen_reg_rtx (V4SImode);
+ tag_equal_hi = gen_reg_rtx (V4SImode);
+ }
+
+ emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
+ emit_move_insn (tag_arr, tag_arr_sym);
+ v = 0x0001020300010203LL;
+ emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
+ ea_addr_si = ea_addr;
+ if (spu_ea_model != 32)
+ ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
+
+ /* tag_index = ea_addr & (tag_array_size - 128) */
+ emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
+
+ /* splat ea_addr to all 4 slots. */
+ emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
+ /* Similarly for high 32 bits of ea_addr. */
+ if (spu_ea_model != 32)
+ emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
+
+ /* block_off = ea_addr & 127 */
+ emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
+
+ /* tag_addr = tag_arr + tag_index */
+ emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
+
+ /* Read cache tags. */
+ emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
+ if (spu_ea_model != 32)
+ emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
+ plus_constant (tag_addr, 16)));
+
+ /* tag = ea_addr & -128 */
+ emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
+
+ /* Read all four cache data pointers. */
+ emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
+ plus_constant (tag_addr, 32)));
+
+ /* Compare tags. */
+ emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
+ if (spu_ea_model != 32)
+ {
+ emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
+ emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
+ }
+
+ /* At most one of the tags compare equal, so tag_equal has one
+ 32-bit slot set to all 1's, with the other slots all zero.
+ gbb picks off low bit from each byte in the 128-bit registers,
+ so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
+ we have a hit. */
+ emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
+ emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
+
+ /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
+ emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
+
+ /* Allowing us to rotate the corresponding cache data pointer to slot0.
+ (rotating eq_index mod 16 bytes). */
+ emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
+ emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
+
+ /* Add block offset to form final data address. */
+ emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
+
+ /* Check that we did hit. */
+ hit_label = gen_label_rtx ();
+ hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
+ bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ hit_ref, pc_rtx)));
+ /* Say that this branch is very likely to happen. */
+ v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
+ add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
+
+ ea_load_store (mem, is_store, ea_addr, data_addr);
+ cont_label = gen_label_rtx ();
+ emit_jump_insn (gen_jump (cont_label));
+ emit_barrier ();
+
+ emit_label (hit_label);
+
+ if (is_store)
+ {
+ HOST_WIDE_INT v_hi;
+ rtx dirty_bits = gen_reg_rtx (TImode);
+ rtx dirty_off = gen_reg_rtx (SImode);
+ rtx dirty_128 = gen_reg_rtx (TImode);
+ rtx neg_block_off = gen_reg_rtx (SImode);
+
+ /* Set up mask with one dirty bit per byte of the mem we are
+ writing, starting from top bit. */
+ v_hi = v = -1;
+ v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
+ if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
+ {
+ v_hi = v;
+ v = 0;
+ }
+ emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
+
+ /* Form index into cache dirty_bits. eq_index is one of
+ 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
+ 0x40, 0x50, 0x60 or 0x70 which just happens to be the
+ offset to each of the four dirty_bits elements. */
+ emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
+
+ emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
+
+ /* Rotate bit mask to proper bit. */
+ emit_insn (gen_negsi2 (neg_block_off, block_off));
+ emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
+ emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
+
+ /* Or in the new dirty bits. */
+ emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
+
+ /* Store. */
+ emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
+ }
+
+ emit_label (cont_label);
+}
+
+static rtx
+expand_ea_mem (rtx mem, bool is_store)
+{
+ rtx ea_addr;
+ rtx data_addr = gen_reg_rtx (Pmode);
+ rtx new_mem;
+
+ ea_addr = force_reg (EAmode, XEXP (mem, 0));
+ if (optimize_size || optimize == 0)
+ ea_load_store (mem, is_store, ea_addr, data_addr);
+ else
+ ea_load_store_inline (mem, is_store, ea_addr, data_addr);
+
+ if (ea_alias_set == -1)
+ ea_alias_set = new_alias_set ();
+
+ /* We generate a new MEM RTX to refer to the copy of the data
+ in the cache. We do not copy memory attributes (except the
+ alignment) from the original MEM, as they may no longer apply
+ to the cache copy. */
+ new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
+ set_mem_alias_set (new_mem, ea_alias_set);
+ set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
+
+ return new_mem;
+}
+
int
spu_expand_mov (rtx * ops, enum machine_mode mode)
{
if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
{
- enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
+ enum insn_code icode = convert_optab_handler (trunc_optab,
+ mode, imode);
emit_insn (GEN_FCN (icode) (ops[0], from));
}
else
}
}
if (MEM_P (ops[0]))
- return spu_split_store (ops);
+ {
+ if (MEM_ADDR_SPACE (ops[0]))
+ ops[0] = expand_ea_mem (ops[0], true);
+ return spu_split_store (ops);
+ }
if (MEM_P (ops[1]))
- return spu_split_load (ops);
+ {
+ if (MEM_ADDR_SPACE (ops[1]))
+ ops[1] = expand_ea_mem (ops[1], false);
+ return spu_split_load (ops);
+ }
return 0;
}
}
}
+ gcc_assert (aform == 0 || aform == 1);
reg = gen_reg_rtx (TImode);
scalar = store_with_one_insn_p (ops[0]);
struct spu_builtin_description spu_builtins[] = {
#define DEF_BUILTIN(fcode, icode, name, type, params) \
- {fcode, icode, name, type, params, NULL_TREE},
+ {fcode, icode, name, type, params},
#include "spu-builtins.def"
#undef DEF_BUILTIN
};
-/* Returns the rs6000 builtin decl for CODE. */
+static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
+
+/* Returns the spu builtin decl for CODE. */
static tree
spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
if (code >= NUM_SPU_BUILTINS)
return error_mark_node;
- return spu_builtins[code].fndecl;
+ return spu_builtin_decls[code];
}
p = build_function_type (spu_builtin_types[d->parm[0]], p);
sprintf (name, "__builtin_%s", d->name);
- d->fndecl =
- add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
- NULL, NULL_TREE);
+ spu_builtin_decls[i] =
+ add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
if (d->fcode == SPU_MASK_FOR_LOAD)
- TREE_READONLY (d->fndecl) = 1;
+ TREE_READONLY (spu_builtin_decls[i]) = 1;
/* These builtins don't throw. */
- TREE_NOTHROW (d->fndecl) = 1;
+ TREE_NOTHROW (spu_builtin_decls[i]) = 1;
}
}
{
enum insn_code nor_code;
rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
- nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
+ nor_code = optab_handler (one_cmpl_optab, dest_mode);
gcc_assert (nor_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
if (dmode != dest_mode)
c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
- ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
+ ior_code = optab_handler (ior_optab, dest_mode);
gcc_assert (ior_code != CODE_FOR_nothing);
emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
if (dmode != dest_mode)
int range = p - SPU_BTI_7;
if (!CONSTANT_P (op))
- error ("%s expects an integer literal in the range [%d, %d].",
+ error ("%s expects an integer literal in the range [%d, %d]",
d->name,
spu_builtin_range[range].low, spu_builtin_range[range].high);
/* The default for v is 0 which is valid in every range. */
if (v < spu_builtin_range[range].low
|| v > spu_builtin_range[range].high)
- error ("%s expects an integer literal in the range [%d, %d]. ("
- HOST_WIDE_INT_PRINT_DEC ")",
+ error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
d->name,
spu_builtin_range[range].low, spu_builtin_range[range].high,
v);
|| (GET_CODE (op) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (op))
|| (v & ((1 << lsbits) - 1)) != 0)
- warning (0, "%d least significant bits of %s are ignored.", lsbits,
+ warning (0, "%d least significant bits of %s are ignored", lsbits,
d->name);
}
}
/* get addr */
arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
addr = memory_address (mode, op);
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
struct spu_builtin_description *d;
if (fcode < NUM_SPU_BUILTINS)
{
case V8HImode:
if (TYPE_UNSIGNED (type))
- return spu_builtins[SPU_MULE_0].fndecl;
+ return spu_builtin_decls[SPU_MULE_0];
else
- return spu_builtins[SPU_MULE_1].fndecl;
+ return spu_builtin_decls[SPU_MULE_1];
break;
default:
return NULL_TREE;
{
case V8HImode:
if (TYPE_UNSIGNED (type))
- return spu_builtins[SPU_MULO_1].fndecl;
+ return spu_builtin_decls[SPU_MULO_1];
else
- return spu_builtins[SPU_MULO_0].fndecl;
+ return spu_builtin_decls[SPU_MULO_0];
break;
default:
return NULL_TREE;
static tree
spu_builtin_mask_for_load (void)
{
- struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
- gcc_assert (d);
- return d->fndecl;
+ return spu_builtin_decls[SPU_MASK_FOR_LOAD];
}
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-spu_builtin_vectorization_cost (bool runtime_test)
-{
- /* If the branch of the runtime test is taken - i.e. - the vectorized
- version is skipped - this incurs a misprediction cost (because the
- vectorized version is expected to be the fall-through). So we subtract
- the latency of a mispredicted branch from the costs that are incurred
- when the vectorized version is executed. */
- if (runtime_test)
- return -19;
- else
- return 0;
+spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
+{
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ case vector_stmt:
+ case vector_load:
+ case vector_store:
+ case vec_to_scalar:
+ case scalar_to_vec:
+ case cond_branch_not_taken:
+ case vec_perm:
+ return 1;
+
+ case scalar_store:
+ return 10;
+
+ case scalar_load:
+ /* Load + rotate. */
+ return 2;
+
+ case unaligned_load:
+ return 2;
+
+ case cond_branch_taken:
+ return 6;
+
+ default:
+ gcc_unreachable ();
+ }
}
/* Return true iff, data reference of TYPE can reach vector alignment (16)
tree
spu_builtin_vec_perm (tree type, tree *mask_element_type)
{
- struct spu_builtin_description *d;
-
*mask_element_type = unsigned_char_type_node;
switch (TYPE_MODE (type))
{
case V16QImode:
if (TYPE_UNSIGNED (type))
- d = &spu_builtins[SPU_SHUFFLE_0];
+ return spu_builtin_decls[SPU_SHUFFLE_0];
else
- d = &spu_builtins[SPU_SHUFFLE_1];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_1];
case V8HImode:
if (TYPE_UNSIGNED (type))
- d = &spu_builtins[SPU_SHUFFLE_2];
+ return spu_builtin_decls[SPU_SHUFFLE_2];
else
- d = &spu_builtins[SPU_SHUFFLE_3];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_3];
case V4SImode:
if (TYPE_UNSIGNED (type))
- d = &spu_builtins[SPU_SHUFFLE_4];
+ return spu_builtin_decls[SPU_SHUFFLE_4];
else
- d = &spu_builtins[SPU_SHUFFLE_5];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_5];
case V2DImode:
if (TYPE_UNSIGNED (type))
- d = &spu_builtins[SPU_SHUFFLE_6];
+ return spu_builtin_decls[SPU_SHUFFLE_6];
else
- d = &spu_builtins[SPU_SHUFFLE_7];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_7];
case V4SFmode:
- d = &spu_builtins[SPU_SHUFFLE_8];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_8];
case V2DFmode:
- d = &spu_builtins[SPU_SHUFFLE_9];
- break;
+ return spu_builtin_decls[SPU_SHUFFLE_9];
default:
return NULL_TREE;
}
+}
+
+/* Return the appropriate mode for a named address pointer. */
+static enum machine_mode
+spu_addr_space_pointer_mode (addr_space_t addrspace)
+{
+ switch (addrspace)
+ {
+ case ADDR_SPACE_GENERIC:
+ return ptr_mode;
+ case ADDR_SPACE_EA:
+ return EAmode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the appropriate mode for a named address address. */
+static enum machine_mode
+spu_addr_space_address_mode (addr_space_t addrspace)
+{
+ switch (addrspace)
+ {
+ case ADDR_SPACE_GENERIC:
+ return Pmode;
+ case ADDR_SPACE_EA:
+ return EAmode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Determine if one named address space is a subset of another. */
+
+static bool
+spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+ gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
+ gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
+
+ if (subset == superset)
+ return true;
+
+ /* If we have -mno-address-space-conversion, treat __ea and generic as not
+ being subsets but instead as disjoint address spaces. */
+ else if (!TARGET_ADDRESS_SPACE_CONVERSION)
+ return false;
+
+ else
+ return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
+}
+
+/* Convert from one address space to another. */
+static rtx
+spu_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+ addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+ addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+
+ gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
+ gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
+
+ if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
+ {
+ rtx result, ls;
+
+ ls = gen_const_mem (DImode,
+ gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+ set_mem_align (ls, 128);
+
+ result = gen_reg_rtx (Pmode);
+ ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
+ op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
+ ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+ ls, const0_rtx, Pmode, 1);
+
+ emit_insn (gen_subsi3 (result, op, ls));
+
+ return result;
+ }
+
+ else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
+ {
+ rtx result, ls;
+
+ ls = gen_const_mem (DImode,
+ gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+ set_mem_align (ls, 128);
+
+ result = gen_reg_rtx (EAmode);
+ ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
+ op = force_reg (Pmode, op);
+ ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+ ls, const0_rtx, EAmode, 1);
+ op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
+
+ if (EAmode == SImode)
+ emit_insn (gen_addsi3 (result, op, ls));
+ else
+ emit_insn (gen_adddi3 (result, op, ls));
+
+ return result;
+ }
- gcc_assert (d);
- return d->fndecl;
+ else
+ gcc_unreachable ();
}
+
/* Count the total number of instructions in each pipe and return the
maximum, which is used as the Minimum Iteration Interval (MII)
in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
rtx insn = g->nodes[i].insn;
int p = get_pipe (insn) + 2;
- assert (p >= 0);
- assert (p < 4);
+ gcc_assert (p >= 0);
+ gcc_assert (p < 4);
t[p]++;
if (dump_file && INSN_P (insn))
/* .toe needs to have type @nobits. */
if (strcmp (name, ".toe") == 0)
return SECTION_BSS;
+ /* Don't load _ea into the current address space. */
+ if (strcmp (name, "._ea") == 0)
+ return SECTION_WRITE | SECTION_DEBUG;
return default_section_type_flags (decl, name, reloc);
}
+/* Implement targetm.select_section. */
+static section *
+spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+ /* Variables and constants defined in the __ea address space
+ go into a special section named "._ea". */
+ if (TREE_TYPE (decl) != error_mark_node
+ && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
+ {
+ /* We might get called with string constants, but get_named_section
+ doesn't like them as they are not DECLs. Also, we need to set
+ flags in that case. */
+ if (!DECL_P (decl))
+ return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
+
+ return get_named_section (decl, "._ea", reloc);
+ }
+
+ return default_elf_select_section (decl, reloc, align);
+}
+
+/* Implement targetm.unique_section. */
+static void
+spu_unique_section (tree decl, int reloc)
+{
+ /* We don't support unique section names in the __ea address
+ space for now. */
+ if (TREE_TYPE (decl) != error_mark_node
+ && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
+ return;
+
+ default_unique_section (decl, reloc);
+}
+
/* Generate a constant or register which contains 2^SCALE. We assume
the result is valid for MODE. Currently, MODE must be V4SFmode and
SCALE must be SImode. */
}
}
+void
+spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
+{
+ fprintf (file, "# profile\n");
+ fprintf (file, "brsl $75, _mcount\n");
+}
+
#include "gt-spu.h"