X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fbfin%2Fbfin.c;h=9af7fab73fe83de9d5b4d1027bac567167369c3a;hp=c635c9768384056570a2e09acf8be3dce31ca605;hb=cedee41af78b8d6739a2eca82fa466ccebca610a;hpb=88eaee2d9e837474fca5895d712c4958305ca3f5 diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c index c635c976838..9af7fab73fe 100644 --- a/gcc/config/bfin/bfin.c +++ b/gcc/config/bfin/bfin.c @@ -1,12 +1,12 @@ /* The Blackfin code generation auxiliary output file. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. Contributed by Analog Devices. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT @@ -15,9 +15,8 @@ License for more details. You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING. If not, write to - the Free Software Foundation, 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ + along with GCC; see the file COPYING3. If not see + . */ #include "config.h" #include "system.h" @@ -43,12 +42,26 @@ #include "expr.h" #include "toplev.h" #include "recog.h" +#include "optabs.h" #include "ggc.h" #include "integrate.h" +#include "cgraph.h" #include "langhooks.h" #include "bfin-protos.h" #include "tm-preds.h" +#include "tm-constrs.h" #include "gt-bfin.h" +#include "basic-block.h" +#include "cfglayout.h" +#include "timevar.h" +#include "df.h" + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct machine_function GTY(()) +{ + int has_hardware_loops; +}; /* Test and compare insns in bfin.md store the information needed to generate branch and scc insns here. */ @@ -72,6 +85,157 @@ static int arg_regs[] = FUNCTION_ARG_REGISTERS; /* Nonzero if -mshared-library-id was given. */ static int bfin_lib_id_given; +/* Nonzero if -fschedule-insns2 was given. We override it and + call the scheduler ourselves during reorg. */ +static int bfin_flag_schedule_insns2; + +/* Determines whether we run variable tracking in machine dependent + reorganization. */ +static int bfin_flag_var_tracking; + +/* -mcpu support */ +bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN; + +/* -msi-revision support. There are three special values: + -1 -msi-revision=none. + 0xffff -msi-revision=any. */ +int bfin_si_revision; + +/* The workarounds enabled */ +unsigned int bfin_workarounds = 0; + +struct bfin_cpu +{ + const char *name; + bfin_cpu_t type; + int si_revision; + unsigned int workarounds; +}; + +struct bfin_cpu bfin_cpus[] = +{ + {"bf522", BFIN_CPU_BF522, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf522", BFIN_CPU_BF522, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf523", BFIN_CPU_BF523, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf523", BFIN_CPU_BF523, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf524", BFIN_CPU_BF524, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf524", BFIN_CPU_BF524, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf525", BFIN_CPU_BF525, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf525", BFIN_CPU_BF525, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf526", BFIN_CPU_BF526, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf526", BFIN_CPU_BF526, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf527", BFIN_CPU_BF527, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf527", BFIN_CPU_BF527, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf531", BFIN_CPU_BF531, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf531", BFIN_CPU_BF531, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf531", BFIN_CPU_BF531, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf532", BFIN_CPU_BF532, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf532", BFIN_CPU_BF532, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf532", BFIN_CPU_BF532, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf533", BFIN_CPU_BF533, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf533", BFIN_CPU_BF533, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf533", BFIN_CPU_BF533, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf534", BFIN_CPU_BF534, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf534", BFIN_CPU_BF534, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf534", BFIN_CPU_BF534, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf536", BFIN_CPU_BF536, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf536", BFIN_CPU_BF536, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf536", BFIN_CPU_BF536, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf537", BFIN_CPU_BF537, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf537", BFIN_CPU_BF537, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf537", BFIN_CPU_BF537, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {"bf538", BFIN_CPU_BF538, 0x0004, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf538", BFIN_CPU_BF538, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf538", BFIN_CPU_BF538, 0x0002, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf539", BFIN_CPU_BF539, 0x0004, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf539", BFIN_CPU_BF539, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf539", BFIN_CPU_BF539, 0x0002, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf542", BFIN_CPU_BF542, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf542", BFIN_CPU_BF542, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf544", BFIN_CPU_BF544, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf544", BFIN_CPU_BF544, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf547", BFIN_CPU_BF547, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf547", BFIN_CPU_BF547, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf548", BFIN_CPU_BF548, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf548", BFIN_CPU_BF548, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf549", BFIN_CPU_BF549, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS}, + {"bf549", BFIN_CPU_BF549, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS}, + + {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS}, + {"bf561", BFIN_CPU_BF561, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + {"bf561", BFIN_CPU_BF561, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS}, + + {NULL, 0, 0, 0} +}; + +int splitting_for_sched; + static void bfin_globalize_label (FILE *stream, const char *name) { @@ -87,6 +251,13 @@ output_file_start (void) FILE *file = asm_out_file; int i; + /* Variable tracking should be run after all optimizations which change order + of insns. It also needs a valid CFG. This can't be done in + override_options, because flag_var_tracking is finalized after + that. */ + bfin_flag_var_tracking = flag_var_tracking; + flag_var_tracking = 0; + fprintf (file, ".file \"%s\";\n", input_filename); for (i = 0; arg_regs[i] >= 0; i++) @@ -108,7 +279,8 @@ conditional_register_usage (void) /* Examine machine-dependent attributes of function type FUNTYPE and return its type. See the definition of E_FUNKIND. */ -static e_funkind funkind (tree funtype) +static e_funkind +funkind (const_tree funtype) { tree attrs = TYPE_ATTRIBUTES (funtype); if (lookup_attribute ("interrupt_handler", attrs)) @@ -127,42 +299,37 @@ static e_funkind funkind (tree funtype) necessary. PICREG is the register holding the pointer to the PIC offset table. */ -rtx +static rtx legitimize_pic_address (rtx orig, rtx reg, rtx picreg) { rtx addr = orig; - rtx new = orig; + rtx new_rtx = orig; if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF) { - if (GET_CODE (addr) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (addr)) - reg = new = orig; + int unspec; + rtx tmp; + + if (TARGET_ID_SHARED_LIBRARY) + unspec = UNSPEC_MOVE_PIC; + else if (GET_CODE (addr) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (addr)) + unspec = UNSPEC_FUNCDESC_GOT17M4; else - { - if (reg == 0) - { - gcc_assert (!no_new_pseudos); - reg = gen_reg_rtx (Pmode); - } + unspec = UNSPEC_MOVE_FDPIC; - if (flag_pic == 2) - { - emit_insn (gen_movsi_high_pic (reg, addr)); - emit_insn (gen_movsi_low_pic (reg, reg, addr)); - emit_insn (gen_addsi3 (reg, reg, picreg)); - new = gen_const_mem (Pmode, reg); - } - else - { - rtx tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), - UNSPEC_MOVE_PIC); - new = gen_const_mem (Pmode, - gen_rtx_PLUS (Pmode, picreg, tmp)); - } - emit_move_insn (reg, new); + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); } + + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec); + new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp)); + + emit_move_insn (reg, new_rtx); if (picreg == pic_offset_table_rtx) - current_function_uses_pic_offset_table = 1; + crtl->uses_pic_offset_table = 1; return reg; } @@ -181,7 +348,7 @@ legitimize_pic_address (rtx orig, rtx reg, rtx picreg) if (reg == 0) { - gcc_assert (!no_new_pseudos); + gcc_assert (can_create_pseudo_p ()); reg = gen_reg_rtx (Pmode); } @@ -205,28 +372,21 @@ legitimize_pic_address (rtx orig, rtx reg, rtx picreg) return gen_rtx_PLUS (Pmode, base, addr); } - return new; + return new_rtx; } /* Stack frame layout. */ -/* Compute the number of DREGS to save with a push_multiple operation. - This could include registers that aren't modified in the function, - since push_multiple only takes a range of registers. - If IS_INTHANDLER, then everything that is live must be saved, even - if normally call-clobbered. */ - -static int -n_dregs_to_save (bool is_inthandler) +/* For a given REGNO, determine whether it must be saved in the function + prologue. IS_INTHANDLER specifies whether we're generating a normal + prologue or an interrupt/exception one. */ +static bool +must_save_p (bool is_inthandler, unsigned regno) { - unsigned i; - - for (i = REG_R0; i <= REG_R7; i++) + if (D_REGNO_P (regno)) { - if (regs_ever_live[i] && (is_inthandler || ! call_used_regs[i])) - return REG_R7 - i + 1; - - if (current_function_calls_eh_return) + bool is_eh_return_reg = false; + if (crtl->calls_eh_return) { unsigned j; for (j = 0; ; j++) @@ -234,29 +394,71 @@ n_dregs_to_save (bool is_inthandler) unsigned test = EH_RETURN_DATA_REGNO (j); if (test == INVALID_REGNUM) break; - if (test == i) - return REG_R7 - i + 1; + if (test == regno) + is_eh_return_reg = true; } } + return (is_eh_return_reg + || (df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno]))); } - return 0; + else if (P_REGNO_P (regno)) + { + return ((df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno])) + || (!TARGET_FDPIC + && regno == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf)))); + } + else + return ((is_inthandler || !call_used_regs[regno]) + && (df_regs_ever_live_p (regno) + || (!leaf_function_p () && call_used_regs[regno]))); + +} + +/* Compute the number of DREGS to save with a push_multiple operation. + This could include registers that aren't modified in the function, + since push_multiple only takes a range of registers. + If IS_INTHANDLER, then everything that is live must be saved, even + if normally call-clobbered. + If CONSECUTIVE, return the number of registers we can save in one + instruction with a push/pop multiple instruction. */ + +static int +n_dregs_to_save (bool is_inthandler, bool consecutive) +{ + int count = 0; + unsigned i; + + for (i = REG_R7 + 1; i-- != REG_R0;) + { + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + } + return count; } /* Like n_dregs_to_save, but compute number of PREGS to save. */ static int -n_pregs_to_save (bool is_inthandler) +n_pregs_to_save (bool is_inthandler, bool consecutive) { + int count = 0; unsigned i; - for (i = REG_P0; i <= REG_P5; i++) - if ((regs_ever_live[i] && (is_inthandler || ! call_used_regs[i])) - || (i == PIC_OFFSET_TABLE_REGNUM - && (current_function_uses_pic_offset_table - || (TARGET_ID_SHARED_LIBRARY && ! current_function_is_leaf)))) - return REG_P5 - i + 1; - return 0; + for (i = REG_P5 + 1; i-- != REG_P0;) + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + return count; } /* Determine if we are going to save the frame pointer in the prologue. */ @@ -264,7 +466,7 @@ n_pregs_to_save (bool is_inthandler) static bool must_save_fp_p (void) { - return frame_pointer_needed || regs_ever_live[REG_FP]; + return frame_pointer_needed || df_regs_ever_live_p (REG_FP); } static bool @@ -272,7 +474,7 @@ stack_frame_needed_p (void) { /* EH return puts a new return address into the frame using an address relative to the frame pointer. */ - if (current_function_calls_eh_return) + if (crtl->calls_eh_return) return true; return frame_pointer_needed; } @@ -285,48 +487,94 @@ stack_frame_needed_p (void) static void expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler) { - int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler); - int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler); - int dregno = REG_R7 + 1 - ndregs; - int pregno = REG_P5 + 1 - npregs; - int total = ndregs + npregs; - int i; - rtx pat, insn, val; + rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); + rtx predec = gen_rtx_MEM (SImode, predec1); + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int dregno, pregno; + int total_consec = ndregs_consec + npregs_consec; + int i, d_to_save; + + if (saveall || is_inthandler) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT)); + RTX_FRAME_RELATED_P (insn) = 1; + } - if (total == 0) - return; + if (total_consec != 0) + { + rtx insn; + rtx val = GEN_INT (-total_consec * 4); + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2)); + + XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val), + UNSPEC_PUSH_MULTIPLE); + XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, + spreg, + val)); + RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1; + d_to_save = ndregs_consec; + dregno = REG_R7 + 1 - ndregs_consec; + pregno = REG_P5 + 1 - npregs_consec; + for (i = 0; i < total_consec; i++) + { + rtx memref = gen_rtx_MEM (word_mode, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (- i * 4 - 4))); + rtx subpat; + if (d_to_save > 0) + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + dregno++)); + d_to_save--; + } + else + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + pregno++)); + } + XVECEXP (pat, 0, i + 1) = subpat; + RTX_FRAME_RELATED_P (subpat) = 1; + } + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } - val = GEN_INT (-total * 4); - pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total + 2)); - XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val), - UNSPEC_PUSH_MULTIPLE); - XVECEXP (pat, 0, total + 1) = gen_rtx_SET (VOIDmode, spreg, - gen_rtx_PLUS (Pmode, spreg, - val)); - RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total + 1)) = 1; - for (i = 0; i < total; i++) - { - rtx memref = gen_rtx_MEM (word_mode, - gen_rtx_PLUS (Pmode, spreg, - GEN_INT (- i * 4 - 4))); - rtx subpat; - if (ndregs > 0) - { - subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, - dregno++)); + for (dregno = REG_R0; ndregs != ndregs_consec; dregno++) + { + if (must_save_p (is_inthandler, dregno)) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno)); + RTX_FRAME_RELATED_P (insn) = 1; ndregs--; } - else + } + for (pregno = REG_P0; npregs != npregs_consec; pregno++) + { + if (must_save_p (is_inthandler, pregno)) { - subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, - pregno++)); - npregs++; + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno)); + RTX_FRAME_RELATED_P (insn) = 1; + npregs--; } - XVECEXP (pat, 0, i + 1) = subpat; - RTX_FRAME_RELATED_P (subpat) = 1; } - insn = emit_insn (pat); - RTX_FRAME_RELATED_P (insn) = 1; + for (i = REG_P7 + 1; i < REG_CC; i++) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + rtx insn; + if (i == REG_A0 || i == REG_A1) + insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1), + gen_rtx_REG (PDImode, i)); + else + insn = emit_move_insn (predec, gen_rtx_REG (SImode, i)); + RTX_FRAME_RELATED_P (insn) = 1; + } } /* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we @@ -337,45 +585,92 @@ expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler) static void expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler) { - int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler); - int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler); - int total = ndregs + npregs; - int i, regno; - rtx pat, insn; + rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); + rtx postinc = gen_rtx_MEM (SImode, postinc1); - if (total == 0) - return; + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int total_consec = ndregs_consec + npregs_consec; + int i, regno; + rtx insn; - pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total + 1)); - XVECEXP (pat, 0, 0) = gen_rtx_SET (VOIDmode, spreg, - gen_rtx_PLUS (Pmode, spreg, - GEN_INT (total * 4))); + /* A slightly crude technique to stop flow from trying to delete "dead" + insns. */ + MEM_VOLATILE_P (postinc) = 1; - if (npregs > 0) - regno = REG_P5 + 1; - else - regno = REG_R7 + 1; + for (i = REG_CC - 1; i > REG_P7; i--) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + if (i == REG_A0 || i == REG_A1) + { + rtx mem = gen_rtx_MEM (PDImode, postinc1); + MEM_VOLATILE_P (mem) = 1; + emit_move_insn (gen_rtx_REG (PDImode, i), mem); + } + else + emit_move_insn (gen_rtx_REG (SImode, i), postinc); + } - for (i = 0; i < total; i++) + regno = REG_P5 - npregs_consec; + for (; npregs != npregs_consec; regno--) + { + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + npregs--; + } + } + regno = REG_R7 - ndregs_consec; + for (; ndregs != ndregs_consec; regno--) { - rtx addr = (i > 0 - ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4)) - : spreg); - rtx memref = gen_rtx_MEM (word_mode, addr); + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + ndregs--; + } + } - regno--; - XVECEXP (pat, 0, i + 1) - = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref); + if (total_consec != 0) + { + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1)); + XVECEXP (pat, 0, 0) + = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (total_consec * 4))); + + if (npregs_consec > 0) + regno = REG_P5 + 1; + else + regno = REG_R7 + 1; - if (npregs > 0) + for (i = 0; i < total_consec; i++) { - if (--npregs == 0) - regno = REG_R7 + 1; + rtx addr = (i > 0 + ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4)) + : spreg); + rtx memref = gen_rtx_MEM (word_mode, addr); + + regno--; + XVECEXP (pat, 0, i + 1) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref); + + if (npregs_consec > 0) + { + if (--npregs_consec == 0) + regno = REG_R7 + 1; + } } - } - insn = emit_insn (pat); - RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (saveall || is_inthandler) + emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc); } /* Perform any needed actions needed for a function that is receiving a @@ -457,9 +752,10 @@ n_regs_saved_by_prologue (void) tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE || (is_inthandler && !current_function_is_leaf)); - int ndregs = all ? 8 : n_dregs_to_save (is_inthandler); - int npregs = all ? 6 : n_pregs_to_save (is_inthandler); + int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false); int n = ndregs + npregs; + int i; if (all || stack_frame_needed_p ()) /* We use a LINK instruction in this case. */ @@ -472,23 +768,24 @@ n_regs_saved_by_prologue (void) n++; } + if (fkind != SUBROUTINE || all) + /* Increment once for ASTAT. */ + n++; + if (fkind != SUBROUTINE) { - int i; - - /* Increment once for ASTAT. */ - n++; - /* RETE/X/N. */ if (lookup_attribute ("nesting", attrs)) n++; - - for (i = REG_P7 + 1; i < REG_CC; i++) - if (all - || regs_ever_live[i] - || (!leaf_function_p () && call_used_regs[i])) - n += i == REG_A0 || i == REG_A1 ? 2 : 1; } + + for (i = REG_P7 + 1; i < REG_CC; i++) + if (all + || (fkind != SUBROUTINE + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + n += i == REG_A0 || i == REG_A1 ? 2 : 1; + return n; } @@ -505,9 +802,9 @@ bfin_initial_elimination_offset (int from, int to) if (to == STACK_POINTER_REGNUM) { - if (current_function_outgoing_args_size >= FIXED_STACK_AREA) - offset += current_function_outgoing_args_size; - else if (current_function_outgoing_args_size) + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + offset += crtl->outgoing_args_size; + else if (crtl->outgoing_args_size) offset += FIXED_STACK_AREA; offset += get_frame_size (); @@ -541,36 +838,69 @@ frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related) RTX_FRAME_RELATED_P (insn) = 1; } -/* Generate efficient code to add a value to the frame pointer. We - can use P1 as a scratch register. Set RTX_FRAME_RELATED_P on the - generated insns if FRAME is nonzero. */ +/* Generate efficient code to add a value to a P register. + Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ static void -add_to_sp (rtx spreg, HOST_WIDE_INT value, int frame) +add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p) { if (value == 0) return; /* Choose whether to use a sequence using a temporary register, or - a sequence with multiple adds. We can add a signed 7 bit value + a sequence with multiple adds. We can add a signed 7-bit value in one instruction. */ if (value > 120 || value < -120) { - rtx tmpreg = gen_rtx_REG (SImode, REG_P1); + rtx tmpreg; + rtx tmpreg2; rtx insn; - if (frame) - frame_related_constant_load (tmpreg, value, TRUE); + tmpreg2 = NULL_RTX; + + /* For prologue or normal epilogue, P1 can be safely used + as the temporary register. For sibcall epilogue, we try to find + a call used P register, which will be restored in epilogue. + If we cannot find such a P register, we have to use one I register + to help us. */ + + if (epilogue_p >= 0) + tmpreg = gen_rtx_REG (SImode, REG_P1); else { - insn = emit_move_insn (tmpreg, GEN_INT (value)); - if (frame) - RTX_FRAME_RELATED_P (insn) = 1; + int i; + for (i = REG_P0; i <= REG_P5; i++) + if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) + || (!TARGET_FDPIC + && i == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY + && ! current_function_is_leaf)))) + break; + if (i <= REG_P5) + tmpreg = gen_rtx_REG (SImode, i); + else + { + tmpreg = gen_rtx_REG (SImode, REG_P1); + tmpreg2 = gen_rtx_REG (SImode, REG_I0); + emit_move_insn (tmpreg2, tmpreg); + } } - insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg)); + if (frame) + frame_related_constant_load (tmpreg, value, TRUE); + else + insn = emit_move_insn (tmpreg, GEN_INT (value)); + + insn = emit_insn (gen_addsi3 (reg, reg, tmpreg)); if (frame) RTX_FRAME_RELATED_P (insn) = 1; + + if (tmpreg2 != NULL_RTX) + emit_move_insn (tmpreg, tmpreg2); } else do @@ -585,7 +915,7 @@ add_to_sp (rtx spreg, HOST_WIDE_INT value, int frame) it's no good. */ size = -60; - insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (size))); + insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); if (frame) RTX_FRAME_RELATED_P (insn) = 1; value -= size; @@ -638,10 +968,10 @@ emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size) static HOST_WIDE_INT arg_area_size (void) { - if (current_function_outgoing_args_size) + if (crtl->outgoing_args_size) { - if (current_function_outgoing_args_size >= FIXED_STACK_AREA) - return current_function_outgoing_args_size; + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + return crtl->outgoing_args_size; else return FIXED_STACK_AREA; } @@ -678,14 +1008,17 @@ do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all) rtx insn = emit_insn (pat); RTX_FRAME_RELATED_P (insn) = 1; } - add_to_sp (spreg, -frame_size, 1); + add_to_reg (spreg, -frame_size, 1, 0); } } -/* Like do_link, but used for epilogues to deallocate the stack frame. */ +/* Like do_link, but used for epilogues to deallocate the stack frame. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ static void -do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all) +do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p) { frame_size += arg_area_size (); @@ -695,17 +1028,17 @@ do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all) { rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg)); - add_to_sp (spreg, frame_size, 0); + add_to_reg (spreg, frame_size, 0, epilogue_p); if (must_save_fp_p ()) { rtx fpreg = gen_rtx_REG (Pmode, REG_FP); emit_move_insn (fpreg, postinc); - emit_insn (gen_rtx_USE (VOIDmode, fpreg)); + emit_use (fpreg); } if (! current_function_is_leaf) { emit_move_insn (bfin_rets_rtx, postinc); - emit_insn (gen_rtx_USE (VOIDmode, bfin_rets_rtx)); + emit_use (bfin_rets_rtx); } } } @@ -715,15 +1048,13 @@ do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all) SPREG contains (reg:SI REG_SP). */ static void -expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind) +expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all) { - int i; HOST_WIDE_INT frame_size = get_frame_size (); rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); rtx predec = gen_rtx_MEM (SImode, predec1); rtx insn; tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); - bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; tree kspisusp = lookup_attribute ("kspisusp", attrs); if (kspisusp) @@ -740,28 +1071,12 @@ expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind) RTX_FRAME_RELATED_P (insn) = 1; } - insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT)); - RTX_FRAME_RELATED_P (insn) = 1; - /* If we're calling other functions, they won't save their call-clobbered registers, so we must save everything here. */ if (!current_function_is_leaf) all = true; expand_prologue_reg_save (spreg, all, true); - for (i = REG_P7 + 1; i < REG_CC; i++) - if (all - || regs_ever_live[i] - || (!leaf_function_p () && call_used_regs[i])) - { - if (i == REG_A0 || i == REG_A1) - insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1), - gen_rtx_REG (PDImode, i)); - else - insn = emit_move_insn (predec, gen_rtx_REG (SImode, i)); - RTX_FRAME_RELATED_P (insn) = 1; - } - if (lookup_attribute ("nesting", attrs)) { rtx srcreg = gen_rtx_REG (Pmode, (fkind == EXCPT_HANDLER ? REG_RETX @@ -781,23 +1096,11 @@ expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind) rtx insn; insn = emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT)); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); insn = emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26))); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); insn = emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26))); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); insn = emit_move_insn (r1reg, spreg); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); insn = emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP)); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); insn = emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8))); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - NULL_RTX); } } @@ -806,19 +1109,17 @@ expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind) SPREG contains (reg:SI REG_SP). */ static void -expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind) +expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all) { - int i; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); rtx postinc = gen_rtx_MEM (SImode, postinc1); - tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); - bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; /* A slightly crude technique to stop flow from trying to delete "dead" insns. */ MEM_VOLATILE_P (postinc) = 1; - do_unlink (spreg, get_frame_size (), all); + do_unlink (spreg, get_frame_size (), all, 1); if (lookup_attribute ("nesting", attrs)) { @@ -833,25 +1134,8 @@ expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind) if (!current_function_is_leaf) all = true; - for (i = REG_CC - 1; i > REG_P7; i--) - if (all - || regs_ever_live[i] - || (!leaf_function_p () && call_used_regs[i])) - { - if (i == REG_A0 || i == REG_A1) - { - rtx mem = gen_rtx_MEM (PDImode, postinc1); - MEM_VOLATILE_P (mem) = 1; - emit_move_insn (gen_rtx_REG (PDImode, i), mem); - } - else - emit_move_insn (gen_rtx_REG (SImode, i), postinc); - } - expand_epilogue_reg_restore (spreg, all, true); - emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc); - /* Deallocate any space we left on the stack in case we needed to save the argument registers. */ if (fkind == EXCPT_HANDLER) @@ -863,10 +1147,18 @@ expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind) /* Used while emitting the prologue to generate code to load the correct value into the PIC register, which is passed in DEST. */ -static void +static rtx bfin_load_pic_reg (rtx dest) { + struct cgraph_local_info *i = NULL; rtx addr, insn; + + i = cgraph_local_info (current_function_decl); + + /* Functions local to the translation unit don't need to reload the + pic reg, since the caller always passes a usable one. */ + if (i && i->local) + return pic_offset_table_rtx; if (bfin_lib_id_given) addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4); @@ -875,7 +1167,7 @@ bfin_load_pic_reg (rtx dest) gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_LIBRARY_OFFSET)); insn = emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr))); - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); + return dest; } /* Generate RTL for the prologue of the current function. */ @@ -883,36 +1175,44 @@ bfin_load_pic_reg (rtx dest) void bfin_expand_prologue (void) { - rtx insn; HOST_WIDE_INT frame_size = get_frame_size (); rtx spreg = gen_rtx_REG (Pmode, REG_SP); e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); rtx pic_reg_loaded = NULL_RTX; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; if (fkind != SUBROUTINE) { - expand_interrupt_handler_prologue (spreg, fkind); + expand_interrupt_handler_prologue (spreg, fkind, all); return; } - if (current_function_limit_stack) + if (crtl->limit_stack + || (TARGET_STACK_CHECK_L1 + && !DECL_NO_LIMIT_STACK (current_function_decl))) { HOST_WIDE_INT offset = bfin_initial_elimination_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM); - rtx lim = stack_limit_rtx; + rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX; + rtx p2reg = gen_rtx_REG (Pmode, REG_P2); + if (!lim) + { + emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode)); + emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg)); + lim = p2reg; + } if (GET_CODE (lim) == SYMBOL_REF) { - rtx p2reg = gen_rtx_REG (Pmode, REG_P2); if (TARGET_ID_SHARED_LIBRARY) { rtx p1reg = gen_rtx_REG (Pmode, REG_P1); - rtx r3reg = gen_rtx_REG (Pmode, REG_R3); rtx val; - pic_reg_loaded = p2reg; - bfin_load_pic_reg (pic_reg_loaded); - val = legitimize_pic_address (stack_limit_rtx, p1reg, p2reg); + pic_reg_loaded = bfin_load_pic_reg (p2reg); + val = legitimize_pic_address (stack_limit_rtx, p1reg, + pic_reg_loaded); emit_move_insn (p1reg, val); frame_related_constant_load (p2reg, offset, FALSE); emit_insn (gen_addsi3 (p2reg, p2reg, p1reg)); @@ -920,43 +1220,55 @@ bfin_expand_prologue (void) } else { - rtx limit = plus_constant (stack_limit_rtx, offset); + rtx limit = plus_constant (lim, offset); emit_move_insn (p2reg, limit); lim = p2reg; } } + else + { + if (lim != p2reg) + emit_move_insn (p2reg, lim); + add_to_reg (p2reg, offset, 0, 0); + lim = p2reg; + } emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim)); emit_insn (gen_trapifcc ()); } - expand_prologue_reg_save (spreg, 0, false); + expand_prologue_reg_save (spreg, all, false); do_link (spreg, frame_size, false); if (TARGET_ID_SHARED_LIBRARY - && (current_function_uses_pic_offset_table + && !TARGET_SEP_DATA + && (crtl->uses_pic_offset_table || !current_function_is_leaf)) bfin_load_pic_reg (pic_offset_table_rtx); } /* Generate RTL for the epilogue of the current function. NEED_RETURN is zero if this is for a sibcall. EH_RETURN is nonzero if we're expanding an - eh_return pattern. */ + eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue, + false otherwise. */ void -bfin_expand_epilogue (int need_return, int eh_return) +bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p) { rtx spreg = gen_rtx_REG (Pmode, REG_SP); e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + int e = sibcall_p ? -1 : 1; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; if (fkind != SUBROUTINE) { - expand_interrupt_handler_epilogue (spreg, fkind); + expand_interrupt_handler_epilogue (spreg, fkind, all); return; } - do_unlink (spreg, get_frame_size (), false); + do_unlink (spreg, get_frame_size (), false, e); - expand_epilogue_reg_restore (spreg, false, false); + expand_epilogue_reg_restore (spreg, all, false); /* Omit the return insn if this is for a sibcall. */ if (! need_return) @@ -979,7 +1291,7 @@ bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, call-clobbered. */ if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE - && !regs_ever_live[new_reg]) + && !df_regs_ever_live_p (new_reg)) return 0; return 1; @@ -1014,9 +1326,28 @@ legitimize_address (rtx x ATTRIBUTE_UNUSED, rtx oldx ATTRIBUTE_UNUSED, return NULL_RTX; } +static rtx +bfin_delegitimize_address (rtx orig_x) +{ + rtx x = orig_x; + + if (GET_CODE (x) != MEM) + return orig_x; + + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC + && GET_CODE (XEXP (x, 0)) == REG + && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) + return XVECEXP (XEXP (x, 1), 0, 0); + + return orig_x; +} + /* This predicate is used to compute the length of a load/store insn. OP is a MEM rtx, we return nonzero if its addressing mode requires a - 32 bit instruction. */ + 32-bit instruction. */ int effective_address_32bit_p (rtx op, enum machine_mode mode) @@ -1033,9 +1364,12 @@ effective_address_32bit_p (rtx op, enum machine_mode mode) return 0; } + if (GET_CODE (XEXP (op, 1)) == UNSPEC) + return 1; + offset = INTVAL (XEXP (op, 1)); - /* All byte loads use a 16 bit offset. */ + /* All byte loads use a 16-bit offset. */ if (GET_MODE_SIZE (mode) == 1) return 1; @@ -1052,11 +1386,24 @@ effective_address_32bit_p (rtx op, enum machine_mode mode) return offset < 0 || offset > 30; } +/* Returns true if X is a memory reference using an I register. */ +bool +bfin_dsp_memref_p (rtx x) +{ + if (! MEM_P (x)) + return false; + x = XEXP (x, 0); + if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC) + x = XEXP (x, 0); + return IREG_P (x); +} + /* Return cost of the memory address ADDR. All addressing modes are equally cheap on the Blackfin. */ static int -bfin_address_cost (rtx addr ATTRIBUTE_UNUSED) +bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) { return 1; } @@ -1102,7 +1449,18 @@ print_address_operand (FILE *file, rtx x) void print_operand (FILE *file, rtx x, char code) { - enum machine_mode mode = GET_MODE (x); + enum machine_mode mode; + + if (code == '!') + { + if (GET_MODE (current_output_insn) == SImode) + fprintf (file, " ||"); + else + fprintf (file, ";"); + return; + } + + mode = GET_MODE (x); switch (code) { @@ -1188,41 +1546,61 @@ print_operand (FILE *file, rtx x, char code) case REG: if (code == 'h') { - gcc_assert (REGNO (x) < 32); - fprintf (file, "%s", short_reg_names[REGNO (x)]); - /*fprintf (file, "\n%d\n ", REGNO (x));*/ - break; + if (REGNO (x) < 32) + fprintf (file, "%s", short_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'd') { - gcc_assert (REGNO (x) < 32); - fprintf (file, "%s", high_reg_names[REGNO (x)]); - break; + if (REGNO (x) < 32) + fprintf (file, "%s", high_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'w') { - gcc_assert (REGNO (x) == REG_A0 || REGNO (x) == REG_A1); - fprintf (file, "%s.w", reg_names[REGNO (x)]); + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.w", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'x') { - gcc_assert (REGNO (x) == REG_A0 || REGNO (x) == REG_A1); - fprintf (file, "%s.x", reg_names[REGNO (x)]); + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.x", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'v') + { + if (REGNO (x) == REG_A0) + fprintf (file, "AV0"); + else if (REGNO (x) == REG_A1) + fprintf (file, "AV1"); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'D') { - fprintf (file, "%s", dregs_pair_names[REGNO (x)]); + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", dregs_pair_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'H') { - gcc_assert (mode == DImode || mode == DFmode); - gcc_assert (REG_P (x)); - fprintf (file, "%s", reg_names[REGNO (x) + 1]); + if ((mode == DImode || mode == DFmode) && REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x) + 1]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else if (code == 'T') { - gcc_assert (D_REGNO_P (REGNO (x))); - fprintf (file, "%s", byte_reg_names[REGNO (x)]); + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", byte_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); } else fprintf (file, "%s", reg_names[REGNO (x)]); @@ -1236,12 +1614,68 @@ print_operand (FILE *file, rtx x, char code) break; case CONST_INT: + if (code == 'M') + { + switch (INTVAL (x)) + { + case MACFLAG_NONE: + break; + case MACFLAG_FU: + fputs ("(FU)", file); + break; + case MACFLAG_T: + fputs ("(T)", file); + break; + case MACFLAG_TFU: + fputs ("(TFU)", file); + break; + case MACFLAG_W32: + fputs ("(W32)", file); + break; + case MACFLAG_IS: + fputs ("(IS)", file); + break; + case MACFLAG_IU: + fputs ("(IU)", file); + break; + case MACFLAG_IH: + fputs ("(IH)", file); + break; + case MACFLAG_M: + fputs ("(M)", file); + break; + case MACFLAG_IS_M: + fputs ("(IS,M)", file); + break; + case MACFLAG_ISS2: + fputs ("(ISS2)", file); + break; + case MACFLAG_S2RND: + fputs ("(S2RND)", file); + break; + default: + gcc_unreachable (); + } + break; + } + else if (code == 'b') + { + if (INTVAL (x) == 0) + fputs ("+=", file); + else if (INTVAL (x) == 1) + fputs ("-=", file); + else + gcc_unreachable (); + break; + } /* Moves to half registers with d or h modifiers always use unsigned constants. */ - if (code == 'd') + else if (code == 'd') x = GEN_INT ((INTVAL (x) >> 16) & 0xffff); else if (code == 'h') x = GEN_INT (INTVAL (x) & 0xffff); + else if (code == 'N') + x = GEN_INT (-INTVAL (x)); else if (code == 'X') x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x))); else if (code == 'Y') @@ -1254,8 +1688,6 @@ print_operand (FILE *file, rtx x, char code) case SYMBOL_REF: output_addr_const (file, x); - if (code == 'G' && flag_pic) - fprintf (file, "@GOT"); break; case CONST_DOUBLE: @@ -1270,6 +1702,16 @@ print_operand (FILE *file, rtx x, char code) fprintf (file, "@GOT"); break; + case UNSPEC_MOVE_FDPIC: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOT17M4"); + break; + + case UNSPEC_FUNCDESC_GOT17M4: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@FUNCDESC_GOT17M4"); + break; + case UNSPEC_LIBRARY_OFFSET: fprintf (file, "_current_shared_library_p5_offset_"); break; @@ -1385,7 +1827,7 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, For args passed entirely in registers or entirely in memory, zero. Refer VDSP C Compiler manual, our ABI. - First 3 words are in registers. So, if a an argument is larger + First 3 words are in registers. So, if an argument is larger than the registers available, it will span the register and stack. */ @@ -1413,17 +1855,17 @@ bfin_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, static bool bfin_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, enum machine_mode mode ATTRIBUTE_UNUSED, - tree type, bool named ATTRIBUTE_UNUSED) + const_tree type, bool named ATTRIBUTE_UNUSED) { return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; } /* Decide whether a type should be returned in memory (true) or in a register (false). This is called by the macro - RETURN_IN_MEMORY. */ + TARGET_RETURN_IN_MEMORY. */ -int -bfin_return_in_memory (tree type) +static bool +bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) { int size = int_size_in_bytes (type); return size > 2 * UNITS_PER_WORD || size == -1; @@ -1488,32 +1930,57 @@ static bool bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) { + struct cgraph_local_info *this_func, *called_func; e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); - return fkind == SUBROUTINE; -} - + if (fkind != SUBROUTINE) + return false; + if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA) + return true; + + /* When compiling for ID shared libraries, can't sibcall a local function + from a non-local function, because the local function thinks it does + not need to reload P5 in the prologue, but the sibcall wil pop P5 in the + sibcall epilogue, and we end up with the wrong value in P5. */ + + if (!decl) + /* Not enough information. */ + return false; + + this_func = cgraph_local_info (current_function_decl); + called_func = cgraph_local_info (decl); + return !called_func->local || this_func->local; +} + /* Emit RTL insns to initialize the variable parts of a trampoline at TRAMP. FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. */ void -initialize_trampoline (tramp, fnaddr, cxt) - rtx tramp, fnaddr, cxt; +initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) { rtx t1 = copy_to_reg (fnaddr); rtx t2 = copy_to_reg (cxt); rtx addr; + int i = 0; - addr = memory_address (Pmode, plus_constant (tramp, 2)); + if (TARGET_FDPIC) + { + rtx a = memory_address (Pmode, plus_constant (tramp, 8)); + addr = memory_address (Pmode, tramp); + emit_move_insn (gen_rtx_MEM (SImode, addr), a); + i = 8; + } + + addr = memory_address (Pmode, plus_constant (tramp, i + 2)); emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1)); emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16))); - addr = memory_address (Pmode, plus_constant (tramp, 6)); + addr = memory_address (Pmode, plus_constant (tramp, i + 6)); emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1)); - addr = memory_address (Pmode, plus_constant (tramp, 10)); + addr = memory_address (Pmode, plus_constant (tramp, i + 10)); emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2)); emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16))); - addr = memory_address (Pmode, plus_constant (tramp, 14)); + addr = memory_address (Pmode, plus_constant (tramp, i + 14)); emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2)); } @@ -1524,27 +1991,53 @@ emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED) { rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode); + gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed)); if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) operands[1] = force_reg (SImode, operands[1]); else operands[1] = legitimize_pic_address (operands[1], temp, - pic_offset_table_rtx); + TARGET_FDPIC ? OUR_FDPIC_REG + : pic_offset_table_rtx); } -/* Expand a move operation in mode MODE. The operands are in OPERANDS. */ +/* Expand a move operation in mode MODE. The operands are in OPERANDS. + Returns true if no further code must be generated, false if the caller + should generate an insn to move OPERANDS[1] to OPERANDS[0]. */ -void +bool expand_move (rtx *operands, enum machine_mode mode) { - if (flag_pic && SYMBOLIC_CONST (operands[1])) + rtx op = operands[1]; + if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC) + && SYMBOLIC_CONST (op)) emit_pic_move (operands, mode); - + else if (mode == SImode && GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + && !bfin_legitimate_constant_p (op)) + { + rtx dest = operands[0]; + rtx op0, op1; + gcc_assert (!reload_in_progress && !reload_completed); + op = XEXP (op, 0); + op0 = force_reg (mode, XEXP (op, 0)); + op1 = XEXP (op, 1); + if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode)) + op1 = force_reg (mode, op1); + if (GET_CODE (dest) == MEM) + dest = gen_reg_rtx (mode); + emit_insn (gen_addsi3 (dest, op0, op1)); + if (dest == operands[0]) + return true; + operands[1] = dest; + } /* Don't generate memory->memory or constant->memory moves, go through a register */ else if ((reload_in_progress | reload_completed) == 0 && GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) != REG) operands[1] = force_reg (mode, operands[1]); + return false; } /* Split one or more DImode RTL references into pairs of SImode @@ -1601,23 +2094,68 @@ bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall) { rtx use = NULL, call; rtx callee = XEXP (fnaddr, 0); - rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (sibcall ? 3 : 2)); + int nelts = 2 + !!sibcall; + rtx pat; + rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO); + int n; /* In an untyped call, we can get NULL for operand 2. */ if (cookie == NULL_RTX) cookie = const0_rtx; /* Static functions and indirect calls don't need the pic register. */ - if (flag_pic + if (!TARGET_FDPIC && flag_pic && GET_CODE (callee) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (callee)) use_reg (&use, pic_offset_table_rtx); - if ((!register_no_elim_operand (callee, Pmode) - && GET_CODE (callee) != SYMBOL_REF) - || (GET_CODE (callee) == SYMBOL_REF - && (flag_pic - || bfin_longcall_p (callee, INTVAL (cookie))))) + if (TARGET_FDPIC) + { + int caller_has_l1_text, callee_has_l1_text; + + caller_has_l1_text = callee_has_l1_text = 0; + + if (lookup_attribute ("l1_text", + DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE) + caller_has_l1_text = 1; + + if (GET_CODE (callee) == SYMBOL_REF + && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)) + && lookup_attribute + ("l1_text", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE) + callee_has_l1_text = 1; + + if (GET_CODE (callee) != SYMBOL_REF + || bfin_longcall_p (callee, INTVAL (cookie)) + || (GET_CODE (callee) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (callee) + && TARGET_INLINE_PLT) + || caller_has_l1_text != callee_has_l1_text + || (caller_has_l1_text && callee_has_l1_text + && (GET_CODE (callee) != SYMBOL_REF + || !SYMBOL_REF_LOCAL_P (callee)))) + { + rtx addr = callee; + if (! address_operand (addr, Pmode)) + addr = force_reg (Pmode, addr); + + fnaddr = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (fnaddr, addr)); + fnaddr = gen_rtx_MEM (Pmode, fnaddr); + + picreg = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (picreg, + plus_constant (addr, 4))); + } + + nelts++; + } + else if ((!register_no_elim_operand (callee, Pmode) + && GET_CODE (callee) != SYMBOL_REF) + || (GET_CODE (callee) == SYMBOL_REF + && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY) + || bfin_longcall_p (callee, INTVAL (cookie))))) { callee = copy_to_mode_reg (Pmode, callee); fnaddr = gen_rtx_MEM (Pmode, callee); @@ -1627,10 +2165,14 @@ bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall) if (retval) call = gen_rtx_SET (VOIDmode, retval, call); - XVECEXP (pat, 0, 0) = call; - XVECEXP (pat, 0, 1) = gen_rtx_USE (VOIDmode, cookie); + pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts)); + n = 0; + XVECEXP (pat, 0, n++) = call; + if (TARGET_FDPIC) + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg); + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie); if (sibcall) - XVECEXP (pat, 0, 2) = gen_rtx_RETURN (VOIDmode); + XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode); call = emit_call_insn (pat); if (use) CALL_INSN_FUNCTION_USAGE (call) = use; @@ -1642,21 +2184,27 @@ int hard_regno_mode_ok (int regno, enum machine_mode mode) { /* Allow only dregs to store value of mode HI or QI */ - enum reg_class class = REGNO_REG_CLASS (regno); + enum reg_class rclass = REGNO_REG_CLASS (regno); if (mode == CCmode) return 0; if (mode == V2HImode) return D_REGNO_P (regno); - if (class == CCREGS) + if (rclass == CCREGS) return mode == BImode; - if (mode == PDImode) + if (mode == PDImode || mode == V2PDImode) return regno == REG_A0 || regno == REG_A1; + + /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes + up with a bad register class (such as ALL_REGS) for DImode. */ + if (mode == DImode) + return regno < REG_M3; + if (mode == SImode && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno)) return 1; - + return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno); } @@ -1672,9 +2220,14 @@ bfin_vector_mode_supported_p (enum machine_mode mode) one in class CLASS2. A cost of 2 is the default. */ int -bfin_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, +bfin_register_move_cost (enum machine_mode mode, enum reg_class class1, enum reg_class class2) { + /* These need secondary reloads, so they're more expensive. */ + if ((class1 == CCREGS && class2 != DREGS) + || (class1 != DREGS && class2 == CCREGS)) + return 4; + /* If optimizing for size, always prefer reg-reg over reg-memory moves. */ if (optimize_size) return 2; @@ -1685,6 +2238,15 @@ bfin_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, if (class1 == DREGS && class2 != DREGS) return 2 * 2; + if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* Discourage trying to use the accumulators. */ + if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1)) + return 20; + } return 2; } @@ -1698,25 +2260,25 @@ bfin_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, int bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, - enum reg_class class, + enum reg_class rclass, int in ATTRIBUTE_UNUSED) { /* Make memory accesses slightly more expensive than any register-register move. Also, penalize non-DP registers, since they need secondary reloads to load and store. */ - if (! reg_class_subset_p (class, DPREGS)) + if (! reg_class_subset_p (rclass, DPREGS)) return 10; return 8; } /* Inform reload about cases where moving X with a mode MODE to a register in - CLASS requires an extra scratch register. Return the class needed for the + RCLASS requires an extra scratch register. Return the class needed for the scratch register. */ static enum reg_class -bfin_secondary_reload (bool in_p, rtx x, enum reg_class class, - enum machine_mode mode, secondary_reload_info *sri) +bfin_secondary_reload (bool in_p, rtx x, enum reg_class rclass, + enum machine_mode mode, secondary_reload_info *sri) { /* If we have HImode or QImode, we can only use DREGS as secondary registers; in most other cases we can also use PREGS. */ @@ -1744,13 +2306,13 @@ bfin_secondary_reload (bool in_p, rtx x, enum reg_class class, if (fp_plus_const_operand (x, mode)) { rtx op2 = XEXP (x, 1); - int large_constant_p = ! CONST_7BIT_IMM_P (INTVAL (op2)); + int large_constant_p = ! satisfies_constraint_Ks7 (op2); - if (class == PREGS || class == PREGS_CLOBBERED) + if (rclass == PREGS || rclass == PREGS_CLOBBERED) return NO_REGS; /* If destination is a DREG, we can do this without a scratch register if the constant is valid for an add instruction. */ - if ((class == DREGS || class == DPREGS) + if ((rclass == DREGS || rclass == DPREGS) && ! large_constant_p) return NO_REGS; /* Reloading to anything other than a DREG? Use a PREG scratch @@ -1762,27 +2324,39 @@ bfin_secondary_reload (bool in_p, rtx x, enum reg_class class, /* Data can usually be moved freely between registers of most classes. AREGS are an exception; they can only move to or from another register in AREGS or one in DREGS. They can also be assigned the constant 0. */ - if (x_class == AREGS) - return class == DREGS || class == AREGS ? NO_REGS : DREGS; + if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS) + return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS + || rclass == ODD_AREGS + ? NO_REGS : DREGS); - if (class == AREGS) + if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS) { + if (code == MEM) + { + sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi; + return NO_REGS; + } + if (x != const0_rtx && x_class != DREGS) - return DREGS; + { + return DREGS; + } else return NO_REGS; } /* CCREGS can only be moved from/to DREGS. */ - if (class == CCREGS && x_class != DREGS) + if (rclass == CCREGS && x_class != DREGS) return DREGS; - if (x_class == CCREGS && class != DREGS) + if (x_class == CCREGS && rclass != DREGS) return DREGS; + /* All registers other than AREGS can load arbitrary constants. The only case that remains is MEM. */ if (code == MEM) - if (! reg_class_subset_p (class, default_class)) + if (! reg_class_subset_p (rclass, default_class)) return default_class; + return NO_REGS; } @@ -1800,16 +2374,117 @@ bfin_handle_option (size_t code, const char *arg, int value) bfin_lib_id_given = 1; return true; + case OPT_mcpu_: + { + const char *p, *q; + int i; + + i = 0; + while ((p = bfin_cpus[i].name) != NULL) + { + if (strncmp (arg, p, strlen (p)) == 0) + break; + i++; + } + + if (p == NULL) + { + error ("-mcpu=%s is not valid", arg); + return false; + } + + bfin_cpu_type = bfin_cpus[i].type; + + q = arg + strlen (p); + + if (*q == '\0') + { + bfin_si_revision = bfin_cpus[i].si_revision; + bfin_workarounds |= bfin_cpus[i].workarounds; + } + else if (strcmp (q, "-none") == 0) + bfin_si_revision = -1; + else if (strcmp (q, "-any") == 0) + { + bfin_si_revision = 0xffff; + while (bfin_cpus[i].type == bfin_cpu_type) + { + bfin_workarounds |= bfin_cpus[i].workarounds; + i++; + } + } + else + { + unsigned int si_major, si_minor; + int rev_len, n; + + rev_len = strlen (q); + + if (sscanf (q, "-%u.%u%n", &si_major, &si_minor, &n) != 2 + || n != rev_len + || si_major > 0xff || si_minor > 0xff) + { + invalid_silicon_revision: + error ("-mcpu=%s has invalid silicon revision", arg); + return false; + } + + bfin_si_revision = (si_major << 8) | si_minor; + + while (bfin_cpus[i].type == bfin_cpu_type + && bfin_cpus[i].si_revision != bfin_si_revision) + i++; + + if (bfin_cpus[i].type != bfin_cpu_type) + goto invalid_silicon_revision; + + bfin_workarounds |= bfin_cpus[i].workarounds; + } + + return true; + } + default: return true; } } +static struct machine_function * +bfin_init_machine_status (void) +{ + struct machine_function *f; + + f = GGC_CNEW (struct machine_function); + + return f; +} + /* Implement the macro OVERRIDE_OPTIONS. */ void override_options (void) { + /* If processor type is not specified, enable all workarounds. */ + if (bfin_cpu_type == BFIN_CPU_UNKNOWN) + { + int i; + + for (i = 0; bfin_cpus[i].name != NULL; i++) + bfin_workarounds |= bfin_cpus[i].workarounds; + + bfin_si_revision = 0xffff; + } + + if (bfin_csync_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_SYNCS; + else if (bfin_csync_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_SYNCS; + + if (bfin_specld_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_LOADS; + else if (bfin_specld_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_LOADS; + if (TARGET_OMIT_LEAF_FRAME_POINTER) flag_omit_frame_pointer = 1; @@ -1817,11 +2492,54 @@ override_options (void) if (bfin_lib_id_given && ! TARGET_ID_SHARED_LIBRARY) error ("-mshared-library-id= specified without -mid-shared-library"); - if (TARGET_ID_SHARED_LIBRARY) - /* ??? Provide a way to use a bigger GOT. */ + if (stack_limit_rtx && TARGET_STACK_CHECK_L1) + error ("Can't use multiple stack checking methods together."); + + if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC) + error ("ID shared libraries and FD-PIC mode can't be used together."); + + /* Don't allow the user to specify -mid-shared-library and -msep-data + together, as it makes little sense from a user's point of view... */ + if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY) + error ("cannot specify both -msep-data and -mid-shared-library"); + /* ... internally, however, it's nearly the same. */ + if (TARGET_SEP_DATA) + target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY; + + if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0) flag_pic = 1; + /* There is no single unaligned SI op for PIC code. Sometimes we + need to use ".4byte" and sometimes we need to use ".picptr". + See bfin_assemble_integer for details. */ + if (TARGET_FDPIC) + targetm.asm_out.unaligned_op.si = 0; + + /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries, + since we don't support it and it'll just break. */ + if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY) + flag_pic = 0; + + if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561) + error ("-mmulticore can only be used with BF561"); + + if (TARGET_COREA && !TARGET_MULTICORE) + error ("-mcorea should be used with -mmulticore"); + + if (TARGET_COREB && !TARGET_MULTICORE) + error ("-mcoreb should be used with -mmulticore"); + + if (TARGET_COREA && TARGET_COREB) + error ("-mcorea and -mcoreb can't be used together"); + flag_schedule_insns = 0; + + /* Passes after sched2 can break the helpful TImode annotations that + haifa-sched puts on every insn. Just do scheduling in reorg. */ + bfin_flag_schedule_insns2 = flag_schedule_insns_after_reload; + flag_schedule_insns_after_reload = 0; + + init_machine_status = bfin_init_machine_status; } /* Return the destination address of BRANCH. @@ -1946,7 +2664,7 @@ bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED) } /* Return nonzero iff C has exactly one bit set if it is interpreted - as a 32 bit constant. */ + as a 32-bit constant. */ int log2constp (unsigned HOST_WIDE_INT c) @@ -1995,7 +2713,6 @@ split_load_immediate (rtx operands[]) int num_zero = shiftr_zero (&shifted); int num_compl_zero = shiftr_zero (&shifted_compl); unsigned int regno = REGNO (operands[0]); - enum reg_class class1 = REGNO_REG_CLASS (regno); /* This case takes care of single-bit set/clear constants, which we could also implement with BITSET/BITCLR. */ @@ -2030,7 +2747,7 @@ split_load_immediate (rtx operands[]) if (D_REGNO_P (regno)) { - if (CONST_7BIT_IMM_P (tmp)) + if (tmp >= -64 && tmp <= 63) { emit_insn (gen_movsi (operands[0], GEN_INT (tmp))); emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536))); @@ -2057,7 +2774,7 @@ split_load_immediate (rtx operands[]) return 0; if (optimize_size - && num_compl_zero && CONST_7BIT_IMM_P (shifted_compl)) + && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63) { /* If optimizing for size, generate a sequence that has more instructions but is shorter. */ @@ -2081,15 +2798,19 @@ bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value) int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2; /* The usual offsettable_memref machinery doesn't work so well for this port, so we deal with the problem here. */ - unsigned HOST_WIDE_INT mask = sz == 8 ? 0x7ffe : 0x7fff; - return (v & ~(mask << shift)) == 0; + if (value > 0 && sz == 8) + v += 4; + return (v & ~(0x7fff << shift)) == 0; } static bool -bfin_valid_reg_p (unsigned int regno, int strict) +bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode, + enum rtx_code outer_code) { - return ((strict && REGNO_OK_FOR_BASE_STRICT_P (regno)) - || (!strict && REGNO_OK_FOR_BASE_NONSTRICT_P (regno))); + if (strict) + return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH); + else + return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH); } bool @@ -2097,13 +2818,13 @@ bfin_legitimate_address_p (enum machine_mode mode, rtx x, int strict) { switch (GET_CODE (x)) { case REG: - if (bfin_valid_reg_p (REGNO (x), strict)) + if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM)) return true; break; case PLUS: if (REG_P (XEXP (x, 0)) - && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict) - && (GET_CODE (XEXP (x, 1)) == UNSPEC + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS) + && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode) || (GET_CODE (XEXP (x, 1)) == CONST_INT && bfin_valid_add (mode, INTVAL (XEXP (x, 1)))))) return true; @@ -2112,13 +2833,13 @@ bfin_legitimate_address_p (enum machine_mode mode, rtx x, int strict) case POST_DEC: if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) && REG_P (XEXP (x, 0)) - && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC)) return true; case PRE_DEC: if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) && XEXP (x, 0) == stack_pointer_rtx && REG_P (XEXP (x, 0)) - && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC)) return true; break; default: @@ -2127,16 +2848,64 @@ bfin_legitimate_address_p (enum machine_mode mode, rtx x, int strict) return false; } +/* Decide whether we can force certain constants to memory. If we + decide we can't, the caller should be able to cope with it in + another way. */ + +static bool +bfin_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED) +{ + /* We have only one class of non-legitimate constants, and our movsi + expander knows how to handle them. Dropping these constants into the + data section would only shift the problem - we'd still get relocs + outside the object, in the data section rather than the text section. */ + return true; +} + +/* Ensure that for any constant of the form symbol + offset, the offset + remains within the object. Any other constants are ok. + This ensures that flat binaries never have to deal with relocations + crossing section boundaries. */ + +bool +bfin_legitimate_constant_p (rtx x) +{ + rtx sym; + HOST_WIDE_INT offset; + + if (GET_CODE (x) != CONST) + return true; + + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS); + + sym = XEXP (x, 0); + x = XEXP (x, 1); + if (GET_CODE (sym) != SYMBOL_REF + || GET_CODE (x) != CONST_INT) + return true; + offset = INTVAL (x); + + if (SYMBOL_REF_DECL (sym) == 0) + return true; + if (offset < 0 + || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym)))) + return false; + + return true; +} + static bool -bfin_rtx_costs (rtx x, int code, int outer_code, int *total) +bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed) { int cost2 = COSTS_N_INSNS (1); + rtx op0, op1; switch (code) { case CONST_INT: if (outer_code == SET || outer_code == PLUS) - *total = CONST_7BIT_IMM_P (INTVAL (x)) ? 0 : cost2; + *total = satisfies_constraint_Ks7 (x) ? 0 : cost2; else if (outer_code == AND) *total = log2constp (~INTVAL (x)) ? 0 : cost2; else if (outer_code == LE || outer_code == LT || outer_code == EQ) @@ -2164,58 +2933,174 @@ bfin_rtx_costs (rtx x, int code, int outer_code, int *total) return true; case PLUS: - if (GET_MODE (x) == Pmode) + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_MODE (x) == SImode) { - if (GET_CODE (XEXP (x, 0)) == MULT - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) + if (GET_CODE (op0) == MULT + && GET_CODE (XEXP (op0, 1)) == CONST_INT) { - HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); + HOST_WIDE_INT val = INTVAL (XEXP (op0, 1)); if (val == 2 || val == 4) { *total = cost2; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); - *total += rtx_cost (XEXP (x, 1), outer_code); + *total += rtx_cost (XEXP (op0, 0), outer_code, speed); + *total += rtx_cost (op1, outer_code, speed); return true; } } + *total = cost2; + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, SET, speed); +#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer + towards creating too many induction variables. */ + if (!reg_or_7bit_operand (op1, SImode)) + *total += rtx_cost (op1, SET, speed); +#endif } - - /* fall through */ + else if (GET_MODE (x) == DImode) + { + *total = 6 * cost2; + if (GET_CODE (op1) != CONST_INT + || !satisfies_constraint_Ks7 (op1)) + *total += rtx_cost (op1, PLUS, speed); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, PLUS, speed); + } + return true; case MINUS: + if (GET_MODE (x) == DImode) + *total = 6 * cost2; + else + *total = cost2; + return true; + case ASHIFT: case ASHIFTRT: case LSHIFTRT: if (GET_MODE (x) == DImode) *total = 6 * cost2; - return false; + else + *total = cost2; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, speed); + + return true; - case AND: case IOR: + case AND: case XOR: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */ + if (code == IOR) + { + if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) + || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT)) + { + *total = cost2; + return true; + } + } + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, speed); + if (GET_MODE (x) == DImode) - *total = 2 * cost2; - return false; + { + *total = 2 * cost2; + return true; + } + *total = cost2; + if (GET_MODE (x) != SImode) + return true; + + if (code == AND) + { + if (! rhs_andsi3_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, speed); + } + else + { + if (! regorlog2_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, speed); + } + + return true; + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + if (outer_code == SET + && XEXP (x, 1) == const1_rtx + && GET_CODE (XEXP (x, 2)) == CONST_INT) + { + *total = 2 * cost2; + return true; + } + /* fall through */ + + case SIGN_EXTEND: + case ZERO_EXTEND: + *total = cost2; + return true; case MULT: - if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD) - *total = COSTS_N_INSNS (3); - return false; + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) == GET_CODE (op1) + && (GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (1); + op0 = XEXP (op0, 0); + op1 = XEXP (op1, 0); + } + else if (!speed) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (3); + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, MULT, speed); + if (GET_CODE (op1) != REG + && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG)) + *total += rtx_cost (op1, MULT, speed); + } + return true; + + case UDIV: + case UMOD: + *total = COSTS_N_INSNS (32); + return true; + + case VEC_CONCAT: + case VEC_SELECT: + if (outer_code == SET) + *total = cost2; + return true; default: return false; } } - -static void -bfin_internal_label (FILE *stream, const char *prefix, unsigned long num) -{ - fprintf (stream, "%s%s$%ld:\n", LOCAL_LABEL_PREFIX, prefix, num); -} /* Used for communication between {push,pop}_multiple_operation (which we use not only as a predicate) and the corresponding output functions. */ static int first_preg_to_save, first_dreg_to_save; +static int n_regs_to_save; int push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) @@ -2284,6 +3169,7 @@ push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) lastpreg++; } } + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; return 1; } @@ -2343,6 +3229,7 @@ pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) } first_dreg_to_save = lastdreg; first_preg_to_save = lastpreg; + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; return 1; } @@ -2397,7 +3284,7 @@ output_pop_multiple (rtx insn, rtx *operands) /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */ static void -single_move_for_strmov (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset) +single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset) { rtx scratch = gen_reg_rtx (mode); rtx srcmem, dstmem; @@ -2413,7 +3300,7 @@ single_move_for_strmov (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT back on a different method. */ bool -bfin_expand_strmov (rtx dst, rtx src, rtx count_exp, rtx align_exp) +bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) { rtx srcreg, destreg, countreg; HOST_WIDE_INT align = 0; @@ -2458,7 +3345,7 @@ bfin_expand_strmov (rtx dst, rtx src, rtx count_exp, rtx align_exp) { if ((count & ~3) == 4) { - single_move_for_strmov (dst, src, SImode, offset); + single_move_for_movmem (dst, src, SImode, offset); offset = 4; } else if (count & ~3) @@ -2470,7 +3357,7 @@ bfin_expand_strmov (rtx dst, rtx src, rtx count_exp, rtx align_exp) } if (count & 2) { - single_move_for_strmov (dst, src, HImode, offset); + single_move_for_movmem (dst, src, HImode, offset); offset += 2; } } @@ -2478,7 +3365,7 @@ bfin_expand_strmov (rtx dst, rtx src, rtx count_exp, rtx align_exp) { if ((count & ~1) == 2) { - single_move_for_strmov (dst, src, HImode, offset); + single_move_for_movmem (dst, src, HImode, offset); offset = 2; } else if (count & ~1) @@ -2491,14 +3378,39 @@ bfin_expand_strmov (rtx dst, rtx src, rtx count_exp, rtx align_exp) } if (count & 1) { - single_move_for_strmov (dst, src, QImode, offset); + single_move_for_movmem (dst, src, QImode, offset); } return true; } return false; } + +/* Compute the alignment for a local variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. */ +int +bfin_local_alignment (tree type, int align) +{ + /* Increasing alignment for (relatively) big types allows the builtin + memcpy can use 32 bit loads/stores. */ + if (TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32) + return 32; + return align; +} +/* Implement TARGET_SCHED_ISSUE_RATE. */ + +static int +bfin_issue_rate (void) +{ + return 3; +} + static int bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { @@ -2521,132 +3433,1480 @@ bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD) { rtx pat = PATTERN (dep_insn); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); rtx dest = SET_DEST (pat); rtx src = SET_SRC (pat); - if (! ADDRESS_REGNO_P (REGNO (dest)) || ! D_REGNO_P (REGNO (src))) + if (! ADDRESS_REGNO_P (REGNO (dest)) + || ! (MEM_P (src) || D_REGNO_P (REGNO (src)))) return cost; return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3); } return cost; } + -/* We use the machine specific reorg pass for emitting CSYNC instructions - after conditional branches as needed. +/* Increment the counter for the number of loop instructions in the + current function. */ - The Blackfin is unusual in that a code sequence like - if cc jump label - r0 = (p0) - may speculatively perform the load even if the condition isn't true. This - happens for a branch that is predicted not taken, because the pipeline - isn't flushed or stalled, so the early stages of the following instructions, - which perform the memory reference, are allowed to execute before the - jump condition is evaluated. - Therefore, we must insert additional instructions in all places where this - could lead to incorrect behavior. The manual recommends CSYNC, while - VDSP seems to use NOPs (even though its corresponding compiler option is - named CSYNC). +void +bfin_hardware_loop (void) +{ + cfun->machine->has_hardware_loops++; +} - When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. - When optimizing for size, we turn the branch into a predicted taken one. - This may be slower due to mispredicts, but saves code size. */ +/* Maximum loop nesting depth. */ +#define MAX_LOOP_DEPTH 2 -static void -bfin_reorg (void) +/* Maximum size of a loop. */ +#define MAX_LOOP_LENGTH 2042 + +/* Maximum distance of the LSETUP instruction from the loop start. */ +#define MAX_LSETUP_DISTANCE 30 + +/* We need to keep a vector of loops */ +typedef struct loop_info *loop_info; +DEF_VEC_P (loop_info); +DEF_VEC_ALLOC_P (loop_info,heap); + +/* Information about a loop we have found (or are in the process of + finding). */ +struct loop_info GTY (()) { - rtx insn, last_condjump = NULL_RTX; - int cycles_since_jump = INT_MAX; + /* loop number, for dumps */ + int loop_no; - if (! TARGET_SPECLD_ANOMALY || ! TARGET_CSYNC_ANOMALY) - return; + /* All edges that jump into and out of the loop. */ + VEC(edge,gc) *incoming; - /* First pass: find predicted-false branches; if something after them - needs nops, insert them or change the branch to predict true. */ - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - rtx pat; + /* We can handle two cases: all incoming edges have the same destination + block, or all incoming edges have the same source block. These two + members are set to the common source or destination we found, or NULL + if different blocks were found. If both are NULL the loop can't be + optimized. */ + basic_block incoming_src; + basic_block incoming_dest; - if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn)) - continue; + /* First block in the loop. This is the one branched to by the loop_end + insn. */ + basic_block head; - pat = PATTERN (insn); - if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER - || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC - || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) - continue; + /* Last block in the loop (the one with the loop_end insn). */ + basic_block tail; - if (JUMP_P (insn)) - { - if (any_condjump_p (insn) - && ! cbranch_predicted_taken_p (insn)) - { - last_condjump = insn; - cycles_since_jump = 0; - } - else - cycles_since_jump = INT_MAX; - } - else if (INSN_P (insn)) - { - enum attr_type type = get_attr_type (insn); - int delay_needed = 0; - if (cycles_since_jump < INT_MAX) - cycles_since_jump++; + /* The successor block of the loop. This is the one the loop_end insn + falls into. */ + basic_block successor; - if (type == TYPE_MCLD && TARGET_SPECLD_ANOMALY) - { - rtx pat = single_set (insn); - if (may_trap_p (SET_SRC (pat))) - delay_needed = 3; - } - else if (type == TYPE_SYNC && TARGET_CSYNC_ANOMALY) - delay_needed = 4; + /* The last instruction in the tail. */ + rtx last_insn; - if (delay_needed > cycles_since_jump) - { - rtx pat; - int num_clobbers; - rtx *op = recog_data.operand; + /* The loop_end insn. */ + rtx loop_end; - delay_needed -= cycles_since_jump; + /* The iteration register. */ + rtx iter_reg; - extract_insn (last_condjump); - if (optimize_size) - { - pat = gen_cbranch_predicted_taken (op[0], op[1], op[2], - op[3]); - cycles_since_jump = INT_MAX; - } - else - /* Do not adjust cycles_since_jump in this case, so that - we'll increase the number of NOPs for a subsequent insn - if necessary. */ - pat = gen_cbranch_with_nops (op[0], op[1], op[2], op[3], - GEN_INT (delay_needed)); - PATTERN (last_condjump) = pat; - INSN_CODE (last_condjump) = recog (pat, insn, &num_clobbers); - } - } - } - /* Second pass: for predicted-true branches, see if anything at the - branch destination needs extra nops. */ - if (! TARGET_CSYNC_ANOMALY) - return; + /* The new initialization insn. */ + rtx init; + + /* The new initialization instruction. */ + rtx loop_init; + + /* The new label placed at the beginning of the loop. */ + rtx start_label; + + /* The new label placed at the end of the loop. */ + rtx end_label; + + /* The length of the loop. */ + int length; + + /* The nesting depth of the loop. */ + int depth; + + /* Nonzero if we can't optimize this loop. */ + int bad; + + /* True if we have visited this loop. */ + int visited; + + /* True if this loop body clobbers any of LC0, LT0, or LB0. */ + int clobber_loop0; + + /* True if this loop body clobbers any of LC1, LT1, or LB1. */ + int clobber_loop1; + + /* Next loop in the graph. */ + struct loop_info *next; + + /* Immediate outer loop of this loop. */ + struct loop_info *outer; + + /* Vector of blocks only within the loop, including those within + inner loops. */ + VEC (basic_block,heap) *blocks; + + /* Same information in a bitmap. */ + bitmap block_bitmap; + + /* Vector of inner loops within this loop */ + VEC (loop_info,heap) *loops; +}; + +static void +bfin_dump_loops (loop_info loops) +{ + loop_info loop; + + for (loop = loops; loop; loop = loop->next) + { + loop_info i; + basic_block b; + unsigned ix; + + fprintf (dump_file, ";; loop %d: ", loop->loop_no); + if (loop->bad) + fprintf (dump_file, "(bad) "); + fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth); + + fprintf (dump_file, " blocks: [ "); + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++) + fprintf (dump_file, "%d ", b->index); + fprintf (dump_file, "] "); + + fprintf (dump_file, " inner loops: [ "); + for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++) + fprintf (dump_file, "%d ", i->loop_no); + fprintf (dump_file, "]\n"); + } + fprintf (dump_file, "\n"); +} + +/* Scan the blocks of LOOP (and its inferiors) looking for basic block + BB. Return true, if we find it. */ + +static bool +bfin_bb_in_loop (loop_info loop, basic_block bb) +{ + return bitmap_bit_p (loop->block_bitmap, bb->index); +} + +/* Scan the blocks of LOOP (and its inferiors) looking for uses of + REG. Return true, if we find any. Don't count the loop's loop_end + insn if it matches LOOP_END. */ + +static bool +bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end) +{ + unsigned ix; + basic_block bb; + + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++) + { + rtx insn; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + if (insn == loop_end) + continue; + if (reg_mentioned_p (reg, PATTERN (insn))) + return true; + } + } + return false; +} + +/* Estimate the length of INSN conservatively. */ + +static int +length_for_loop (rtx insn) +{ + int length = 0; + if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 8; + else if (ENABLE_WA_SPECULATIVE_LOADS) + length = 6; + } + else if (LABEL_P (insn)) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 4; + } + + if (INSN_P (insn)) + length += get_attr_length (insn); + + return length; +} + +/* Optimize LOOP. */ + +static void +bfin_optimize_loop (loop_info loop) +{ + basic_block bb; + loop_info inner; + rtx insn, init_insn, last_insn, nop_insn; + rtx loop_init, start_label, end_label; + rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1; + rtx iter_reg; + rtx lc_reg, lt_reg, lb_reg; + rtx seq, seq_end; + int length; + unsigned ix; + int inner_depth = 0; + + if (loop->visited) + return; + + loop->visited = 1; + + if (loop->bad) + { + if (dump_file) + fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); + goto bad_loop; + } + + /* Every loop contains in its list of inner loops every loop nested inside + it, even if there are intermediate loops. This works because we're doing + a depth-first search here and never visit a loop more than once. */ + for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++) + { + bfin_optimize_loop (inner); + + if (!inner->bad && inner_depth < inner->depth) + { + inner_depth = inner->depth; + + loop->clobber_loop0 |= inner->clobber_loop0; + loop->clobber_loop1 |= inner->clobber_loop1; + } + } + + loop->depth = inner_depth + 1; + if (loop->depth > MAX_LOOP_DEPTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no); + goto bad_loop; + } + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + if (!DPREG_P (iter_reg)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n", + loop->loop_no); + goto bad_loop; + } + + if (loop->incoming_src) + { + /* Make sure the predecessor is before the loop start label, as required by + the LSETUP instruction. */ + length = 0; + for (insn = BB_END (loop->incoming_src); + insn && insn != loop->start_label; + insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup not before loop_start\n", + loop->loop_no); + goto bad_loop; + } + + if (length > MAX_LSETUP_DISTANCE) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no); + goto bad_loop; + } + } + + /* Check if start_label appears before loop_end and calculate the + offset between them. We calculate the length of instructions + conservatively. */ + length = 0; + for (insn = loop->start_label; + insn && insn != loop->loop_end; + insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + goto bad_loop; + } + + loop->length = length; + if (loop->length > MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + goto bad_loop; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (bfin_scan_loop (loop, iter_reg, loop->loop_end)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); + goto bad_loop; + } + + /* Scan all the insns to see if the loop body clobber + any hardware loop registers. */ + + reg_lc0 = gen_rtx_REG (SImode, REG_LC0); + reg_lc1 = gen_rtx_REG (SImode, REG_LC1); + reg_lt0 = gen_rtx_REG (SImode, REG_LT0); + reg_lt1 = gen_rtx_REG (SImode, REG_LT1); + reg_lb0 = gen_rtx_REG (SImode, REG_LB0); + reg_lb1 = gen_rtx_REG (SImode, REG_LB1); + + for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++) + { + rtx insn; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + + if (reg_set_p (reg_lc0, insn) + || reg_set_p (reg_lt0, insn) + || reg_set_p (reg_lb0, insn)) + loop->clobber_loop0 = 1; + + if (reg_set_p (reg_lc1, insn) + || reg_set_p (reg_lt1, insn) + || reg_set_p (reg_lb1, insn)) + loop->clobber_loop1 |= 1; + } + } + + if ((loop->clobber_loop0 && loop->clobber_loop1) + || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0)) + { + loop->depth = MAX_LOOP_DEPTH + 1; + if (dump_file) + fprintf (dump_file, ";; loop %d no loop reg available\n", + loop->loop_no); + goto bad_loop; + } + + /* There should be an instruction before the loop_end instruction + in the same basic block. And the instruction must not be + - JUMP + - CONDITIONAL BRANCH + - CALL + - CSYNC + - SSYNC + - Returns (RTS, RTN, etc.) */ + + bb = loop->tail; + last_insn = PREV_INSN (loop->loop_end); + + while (1) + { + for (; last_insn != PREV_INSN (BB_HEAD (bb)); + last_insn = PREV_INSN (last_insn)) + if (INSN_P (last_insn)) + break; + + if (last_insn != PREV_INSN (BB_HEAD (bb))) + break; + + if (single_pred_p (bb) + && single_pred (bb) != ENTRY_BLOCK_PTR) + { + bb = single_pred (bb); + last_insn = BB_END (bb); + continue; + } + else + { + last_insn = NULL_RTX; + break; + } + } + + if (!last_insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has no last instruction\n", + loop->loop_no); + goto bad_loop; + } + + if (JUMP_P (last_insn)) + { + loop_info inner = (loop_info) bb->aux; + if (inner + && inner->outer == loop + && inner->loop_end == last_insn + && inner->depth == 1) + /* This jump_insn is the exact loop_end of an inner loop + and to be optimized away. So use the inner's last_insn. */ + last_insn = inner->last_insn; + else + { + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last instruction\n", + loop->loop_no); + goto bad_loop; + } + } + else if (CALL_P (last_insn) + || (GET_CODE (PATTERN (last_insn)) != SEQUENCE + && get_attr_type (last_insn) == TYPE_SYNC) + || recog_memoized (last_insn) == CODE_FOR_return_internal) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last instruction\n", + loop->loop_no); + goto bad_loop; + } + + if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT + || asm_noperands (PATTERN (last_insn)) >= 0 + || (GET_CODE (PATTERN (last_insn)) != SEQUENCE + && get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI)) + { + nop_insn = emit_insn_after (gen_nop (), last_insn); + last_insn = nop_insn; + } + + loop->last_insn = last_insn; + + /* The loop is good for replacement. */ + start_label = loop->start_label; + end_label = gen_label_rtx (); + iter_reg = loop->iter_reg; + + if (loop->depth == 1 && !loop->clobber_loop1) + { + lc_reg = reg_lc1; + lt_reg = reg_lt1; + lb_reg = reg_lb1; + loop->clobber_loop1 = 1; + } + else + { + lc_reg = reg_lc0; + lt_reg = reg_lt0; + lb_reg = reg_lb0; + loop->clobber_loop0 = 1; + } + + /* If iter_reg is a DREG, we need generate an instruction to load + the loop count into LC register. */ + if (D_REGNO_P (REGNO (iter_reg))) + { + init_insn = gen_movsi (lc_reg, iter_reg); + loop_init = gen_lsetup_without_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg); + } + else if (P_REGNO_P (REGNO (iter_reg))) + { + init_insn = NULL_RTX; + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, iter_reg); + } + else + gcc_unreachable (); + + loop->init = init_insn; + loop->end_label = end_label; + loop->loop_init = loop_init; + + if (dump_file) + { + fprintf (dump_file, ";; replacing loop %d initializer with\n", + loop->loop_no); + print_rtl_single (dump_file, loop->loop_init); + fprintf (dump_file, ";; replacing loop %d terminator with\n", + loop->loop_no); + print_rtl_single (dump_file, loop->loop_end); + } + + start_sequence (); + + if (loop->init != NULL_RTX) + emit_insn (loop->init); + seq_end = emit_insn (loop->loop_init); + + seq = get_insns (); + end_sequence (); + + if (loop->incoming_src) + { + rtx prev = BB_END (loop->incoming_src); + if (VEC_length (edge, loop->incoming) > 1 + || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU)) + { + gcc_assert (JUMP_P (prev)); + prev = PREV_INSN (prev); + } + emit_insn_after (seq, prev); + } + else + { + basic_block new_bb; + edge e; + edge_iterator ei; + + if (loop->head != loop->incoming_dest) + { + FOR_EACH_EDGE (e, ei, loop->head->preds) + { + if (e->flags & EDGE_FALLTHRU) + { + rtx newjump = gen_jump (loop->start_label); + emit_insn_before (newjump, BB_HEAD (loop->head)); + new_bb = create_basic_block (newjump, newjump, loop->head->prev_bb); + gcc_assert (new_bb = loop->head->prev_bb); + break; + } + } + } + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU) + || e->dest != loop->head) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + } + + delete_insn (loop->loop_end); + /* Insert the loop end label before the last instruction of the loop. */ + emit_label_before (loop->end_label, loop->last_insn); + + return; + + bad_loop: + + if (dump_file) + fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); + + loop->bad = 1; + + if (DPREG_P (loop->iter_reg)) + { + /* If loop->iter_reg is a DREG or PREG, we can split it here + without scratch register. */ + rtx insn; + + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + emit_insn_before (gen_cmpsi (loop->iter_reg, const0_rtx), + loop->loop_end); + + insn = emit_jump_insn_before (gen_bne (loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); + } +} + +/* Called from bfin_reorg_loops when a potential loop end is found. LOOP is + a newly set up structure describing the loop, it is this function's + responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the + loop_end insn and its enclosing basic block. */ + +static void +bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn) +{ + unsigned dwork = 0; + basic_block bb; + VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20); + + loop->tail = tail_bb; + loop->head = BRANCH_EDGE (tail_bb)->dest; + loop->successor = FALLTHRU_EDGE (tail_bb)->dest; + loop->loop_end = tail_insn; + loop->last_insn = NULL_RTX; + loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1)); + loop->depth = loop->length = 0; + loop->visited = 0; + loop->clobber_loop0 = loop->clobber_loop1 = 0; + loop->outer = NULL; + loop->loops = NULL; + loop->incoming = VEC_alloc (edge, gc, 2); + loop->init = loop->loop_init = NULL_RTX; + loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0); + loop->end_label = NULL_RTX; + loop->bad = 0; + + VEC_safe_push (basic_block, heap, works, loop->head); + + while (VEC_iterate (basic_block, works, dwork++, bb)) + { + edge e; + edge_iterator ei; + if (bb == EXIT_BLOCK_PTR) + { + /* We've reached the exit block. The loop must be bad. */ + if (dump_file) + fprintf (dump_file, + ";; Loop is bad - reached exit block while scanning\n"); + loop->bad = 1; + break; + } + + if (bitmap_bit_p (loop->block_bitmap, bb->index)) + continue; + + /* We've not seen this block before. Add it to the loop's + list and then add each successor to the work list. */ + + VEC_safe_push (basic_block, heap, loop->blocks, bb); + bitmap_set_bit (loop->block_bitmap, bb->index); + + if (bb != tail_bb) + { + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block succ = EDGE_SUCC (bb, ei.index)->dest; + if (!REGNO_REG_SET_P (df_get_live_in (succ), + REGNO (loop->iter_reg))) + continue; + if (!VEC_space (basic_block, works, 1)) + { + if (dwork) + { + VEC_block_remove (basic_block, works, 0, dwork); + dwork = 0; + } + else + VEC_reserve (basic_block, heap, works, 1); + } + VEC_quick_push (basic_block, works, succ); + } + } + } + + /* Find the predecessor, and make sure nothing else jumps into this loop. */ + if (!loop->bad) + { + int pass, retry; + for (dwork = 0; VEC_iterate (basic_block, loop->blocks, dwork, bb); dwork++) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred = e->src; + + if (!bfin_bb_in_loop (loop, pred)) + { + if (dump_file) + fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", + loop->loop_no, pred->index, + e->dest->index); + VEC_safe_push (edge, gc, loop->incoming, e); + } + } + } + + for (pass = 0, retry = 1; retry && pass < 2; pass++) + { + edge e; + edge_iterator ei; + bool first = true; + retry = 0; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (first) + { + loop->incoming_src = e->src; + loop->incoming_dest = e->dest; + first = false; + } + else + { + if (e->dest != loop->incoming_dest) + loop->incoming_dest = NULL; + if (e->src != loop->incoming_src) + loop->incoming_src = NULL; + } + if (loop->incoming_src == NULL && loop->incoming_dest == NULL) + { + if (pass == 0) + { + if (dump_file) + fprintf (dump_file, + ";; retrying loop %d with forwarder blocks\n", + loop->loop_no); + retry = 1; + break; + } + loop->bad = 1; + if (dump_file) + fprintf (dump_file, + ";; can't find suitable entry for loop %d\n", + loop->loop_no); + goto out; + } + } + if (retry) + { + retry = 0; + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (forwarder_block_p (e->src)) + { + edge e2; + edge_iterator ei2; + + if (dump_file) + fprintf (dump_file, + ";; Adding forwarder block %d to loop %d and retrying\n", + e->src->index, loop->loop_no); + VEC_safe_push (basic_block, heap, loop->blocks, e->src); + bitmap_set_bit (loop->block_bitmap, e->src->index); + FOR_EACH_EDGE (e2, ei2, e->src->preds) + VEC_safe_push (edge, gc, loop->incoming, e2); + VEC_unordered_remove (edge, loop->incoming, ei.index); + retry = 1; + break; + } + } + } + } + } + + out: + VEC_free (basic_block, heap, works); +} + +/* Analyze the structure of the loops in the current function. Use STACK + for bitmap allocations. Returns all the valid candidates for hardware + loops found in this function. */ +static loop_info +bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file) +{ + loop_info loops = NULL; + loop_info loop; + basic_block bb; + bitmap tmp_bitmap; + int nloops = 0; + + /* Find all the possible loop tails. This means searching for every + loop_end instruction. For each one found, create a loop_info + structure and add the head block to the work list. */ + FOR_EACH_BB (bb) + { + rtx tail = BB_END (bb); + + while (GET_CODE (tail) == NOTE) + tail = PREV_INSN (tail); + + bb->aux = NULL; + + if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end) + { + rtx insn; + /* A possible loop end */ + + /* There's a degenerate case we can handle - an empty loop consisting + of only a back branch. Handle that by deleting the branch. */ + insn = BB_HEAD (BRANCH_EDGE (bb)->dest); + if (next_real_insn (insn) == tail) + { + if (dump_file) + { + fprintf (dump_file, ";; degenerate loop ending at\n"); + print_rtl_single (dump_file, tail); + } + delete_insn_and_edges (tail); + continue; + } + + loop = XNEW (struct loop_info); + loop->next = loops; + loops = loop; + loop->loop_no = nloops++; + loop->blocks = VEC_alloc (basic_block, heap, 20); + loop->block_bitmap = BITMAP_ALLOC (stack); + bb->aux = loop; + + if (dump_file) + { + fprintf (dump_file, ";; potential loop %d ending at\n", + loop->loop_no); + print_rtl_single (dump_file, tail); + } + + bfin_discover_loop (loop, bb, tail); + } + } + + tmp_bitmap = BITMAP_ALLOC (stack); + /* Compute loop nestings. */ + for (loop = loops; loop; loop = loop->next) + { + loop_info other; + if (loop->bad) + continue; + + for (other = loop->next; other; other = other->next) + { + if (other->bad) + continue; + + bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap); + if (bitmap_empty_p (tmp_bitmap)) + continue; + if (bitmap_equal_p (tmp_bitmap, other->block_bitmap)) + { + other->outer = loop; + VEC_safe_push (loop_info, heap, loop->loops, other); + } + else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap)) + { + loop->outer = other; + VEC_safe_push (loop_info, heap, other->loops, loop); + } + else + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable nesting for loops %d and %d\n", + loop->loop_no, other->loop_no); + loop->bad = other->bad = 1; + } + } + } + BITMAP_FREE (tmp_bitmap); + + return loops; +} + +/* Free up the loop structures in LOOPS. */ +static void +free_loops (loop_info loops) +{ + while (loops) + { + loop_info loop = loops; + loops = loop->next; + VEC_free (loop_info, heap, loop->loops); + VEC_free (basic_block, heap, loop->blocks); + BITMAP_FREE (loop->block_bitmap); + XDELETE (loop); + } +} + +#define BB_AUX_INDEX(BB) ((unsigned)(BB)->aux) + +/* The taken-branch edge from the loop end can actually go forward. Since the + Blackfin's LSETUP instruction requires that the loop end be after the loop + start, try to reorder a loop's basic blocks when we find such a case. */ +static void +bfin_reorder_loops (loop_info loops, FILE *dump_file) +{ + basic_block bb; + loop_info loop; + + FOR_EACH_BB (bb) + bb->aux = NULL; + cfg_layout_initialize (0); + + for (loop = loops; loop; loop = loop->next) + { + unsigned index; + basic_block bb; + edge e; + edge_iterator ei; + + if (loop->bad) + continue; + + /* Recreate an index for basic blocks that represents their order. */ + for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0; + bb != EXIT_BLOCK_PTR; + bb = bb->next_bb, index++) + bb->aux = (PTR) index; + + if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail)) + continue; + + FOR_EACH_EDGE (e, ei, loop->head->succs) + { + if (bitmap_bit_p (loop->block_bitmap, e->dest->index) + && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) + { + basic_block start_bb = e->dest; + basic_block start_prev_bb = start_bb->prev_bb; + + if (dump_file) + fprintf (dump_file, ";; Moving block %d before block %d\n", + loop->head->index, start_bb->index); + loop->head->prev_bb->next_bb = loop->head->next_bb; + loop->head->next_bb->prev_bb = loop->head->prev_bb; + + loop->head->prev_bb = start_prev_bb; + loop->head->next_bb = start_bb; + start_prev_bb->next_bb = start_bb->prev_bb = loop->head; + break; + } + } + loops = loops->next; + } + + FOR_EACH_BB (bb) + { + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->aux = bb->next_bb; + else + bb->aux = NULL; + } + cfg_layout_finalize (); + df_analyze (); +} + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Blackfin + hardware loops are generated. */ + +static void +bfin_reorg_loops (FILE *dump_file) +{ + loop_info loops = NULL; + loop_info loop; + basic_block bb; + bitmap_obstack stack; + + bitmap_obstack_initialize (&stack); + + if (dump_file) + fprintf (dump_file, ";; Find loops, first pass\n\n"); + + loops = bfin_discover_loops (&stack, dump_file); + + if (dump_file) + bfin_dump_loops (loops); + + bfin_reorder_loops (loops, dump_file); + free_loops (loops); + + if (dump_file) + fprintf (dump_file, ";; Find loops, second pass\n\n"); + + loops = bfin_discover_loops (&stack, dump_file); + if (dump_file) + { + fprintf (dump_file, ";; All loops found:\n\n"); + bfin_dump_loops (loops); + } + + /* Now apply the optimizations. */ + for (loop = loops; loop; loop = loop->next) + bfin_optimize_loop (loop); + + if (dump_file) + { + fprintf (dump_file, ";; After hardware loops optimization:\n\n"); + bfin_dump_loops (loops); + } + + free_loops (loops); + + if (dump_file) + print_rtl (dump_file, get_insns ()); + + FOR_EACH_BB (bb) + bb->aux = NULL; +} + +/* Possibly generate a SEQUENCE out of three insns found in SLOT. + Returns true if we modified the insn chain, false otherwise. */ +static bool +gen_one_bundle (rtx slot[3]) +{ + gcc_assert (slot[1] != NULL_RTX); + + /* Don't add extra NOPs if optimizing for size. */ + if (optimize_size + && (slot[0] == NULL_RTX || slot[2] == NULL_RTX)) + return false; + + /* Verify that we really can do the multi-issue. */ + if (slot[0]) + { + rtx t = NEXT_INSN (slot[0]); + while (t != slot[1]) + { + if (GET_CODE (t) != NOTE + || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + if (slot[2]) + { + rtx t = NEXT_INSN (slot[1]); + while (t != slot[2]) + { + if (GET_CODE (t) != NOTE + || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + + if (slot[0] == NULL_RTX) + { + slot[0] = emit_insn_before (gen_mnop (), slot[1]); + df_insn_rescan (slot[0]); + } + if (slot[2] == NULL_RTX) + { + slot[2] = emit_insn_after (gen_forced_nop (), slot[1]); + df_insn_rescan (slot[2]); + } + + /* Avoid line number information being printed inside one bundle. */ + if (INSN_LOCATOR (slot[1]) + && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0])) + INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]); + if (INSN_LOCATOR (slot[2]) + && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0])) + INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]); + + /* Terminate them with "|| " instead of ";" in the output. */ + PUT_MODE (slot[0], SImode); + PUT_MODE (slot[1], SImode); + /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */ + PUT_MODE (slot[2], QImode); + return true; +} + +/* Go through all insns, and use the information generated during scheduling + to generate SEQUENCEs to represent bundles of instructions issued + simultaneously. */ + +static void +bfin_gen_bundles (void) +{ + basic_block bb; + FOR_EACH_BB (bb) + { + rtx insn, next; + rtx slot[3]; + int n_filled = 0; + + slot[0] = slot[1] = slot[2] = NULL_RTX; + for (insn = BB_HEAD (bb);; insn = next) + { + int at_end; + if (INSN_P (insn)) + { + if (get_attr_type (insn) == TYPE_DSP32) + slot[0] = insn; + else if (slot[1] == NULL_RTX) + slot[1] = insn; + else + slot[2] = insn; + n_filled++; + } + + next = NEXT_INSN (insn); + while (next && insn != BB_END (bb) + && !(INSN_P (next) + && GET_CODE (PATTERN (next)) != USE + && GET_CODE (PATTERN (next)) != CLOBBER)) + { + insn = next; + next = NEXT_INSN (insn); + } + + /* BB_END can change due to emitting extra NOPs, so check here. */ + at_end = insn == BB_END (bb); + if (at_end || GET_MODE (next) == TImode) + { + if ((n_filled < 2 + || !gen_one_bundle (slot)) + && slot[0] != NULL_RTX) + { + rtx pat = PATTERN (slot[0]); + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC + && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT) + { + SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0); + INSN_CODE (slot[0]) = -1; + df_insn_rescan (slot[0]); + } + } + n_filled = 0; + slot[0] = slot[1] = slot[2] = NULL_RTX; + } + if (at_end) + break; + } + } +} + +/* Ensure that no var tracking notes are emitted in the middle of a + three-instruction bundle. */ + +static void +reorder_var_tracking_notes (void) +{ + basic_block bb; + FOR_EACH_BB (bb) + { + rtx insn, next; + rtx queue = NULL_RTX; + bool in_bundle = false; + + for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next) + { + next = NEXT_INSN (insn); + + if (INSN_P (insn)) + { + /* Emit queued up notes at the last instruction of a bundle. */ + if (GET_MODE (insn) == QImode) + { + while (queue) + { + rtx next_queue = PREV_INSN (queue); + PREV_INSN (NEXT_INSN (insn)) = queue; + NEXT_INSN (queue) = NEXT_INSN (insn); + NEXT_INSN (insn) = queue; + PREV_INSN (queue) = insn; + queue = next_queue; + } + in_bundle = false; + } + else if (GET_MODE (insn) == SImode) + in_bundle = true; + } + else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION) + { + if (in_bundle) + { + rtx prev = PREV_INSN (insn); + PREV_INSN (next) = prev; + NEXT_INSN (prev) = next; + + PREV_INSN (insn) = queue; + queue = insn; + } + } + } + } +} + +/* On some silicon revisions, functions shorter than a certain number of cycles + can cause unpredictable behaviour. Work around this by adding NOPs as + needed. */ +static void +workaround_rts_anomaly (void) +{ + rtx insn, first_insn = NULL_RTX; + int cycles = 4; + + if (! ENABLE_WA_RETS) + return; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (BARRIER_P (insn)) + return; + + if (NOTE_P (insn) || LABEL_P (insn)) + continue; + + if (first_insn == NULL_RTX) + first_insn = insn; + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (CALL_P (insn)) + return; + + if (JUMP_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_return_internal) + break; + + /* Nothing to worry about for direct jumps. */ + if (!any_condjump_p (insn)) + return; + if (cycles <= 1) + return; + cycles--; + } + else if (INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int this_cycles = 1; + + if (GET_CODE (pat) == PARALLEL) + { + if (push_multiple_operation (pat, VOIDmode) + || pop_multiple_operation (pat, VOIDmode)) + this_cycles = n_regs_to_save; + } + else + { + enum insn_code icode = recog_memoized (insn); + if (icode == CODE_FOR_link) + this_cycles = 4; + else if (icode == CODE_FOR_unlink) + this_cycles = 3; + else if (icode == CODE_FOR_mulsi3) + this_cycles = 5; + } + if (this_cycles >= cycles) + return; + + cycles -= this_cycles; + } + } + while (cycles > 0) + { + emit_insn_before (gen_nop (), first_insn); + cycles--; + } +} + +/* Return an insn type for INSN that can be used by the caller for anomaly + workarounds. This differs from plain get_attr_type in that it handles + SEQUENCEs. */ + +static enum attr_type +type_for_anomaly (rtx insn) +{ + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SEQUENCE) + { + enum attr_type t; + t = get_attr_type (XVECEXP (pat, 0, 1)); + if (t == TYPE_MCLD) + return t; + t = get_attr_type (XVECEXP (pat, 0, 2)); + if (t == TYPE_MCLD) + return t; + return TYPE_MCST; + } + else + return get_attr_type (insn); +} + +/* Return nonzero if INSN contains any loads that may trap. It handles + SEQUENCEs correctly. */ + +static bool +trapping_loads_p (rtx insn) +{ + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SEQUENCE) + { + enum attr_type t; + t = get_attr_type (XVECEXP (pat, 0, 1)); + if (t == TYPE_MCLD + && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1))))) + return true; + t = get_attr_type (XVECEXP (pat, 0, 2)); + if (t == TYPE_MCLD + && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2))))) + return true; + return false; + } + else + return may_trap_p (SET_SRC (single_set (insn))); +} + +/* This function acts like NEXT_INSN, but is aware of three-insn bundles and + skips all subsequent parallel instructions if INSN is the start of such + a group. */ +static rtx +find_next_insn_start (rtx insn) +{ + if (GET_MODE (insn) == SImode) + { + while (GET_MODE (insn) != QImode) + insn = NEXT_INSN (insn); + } + return NEXT_INSN (insn); +} + +/* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of + a three-insn bundle, see if one of them is a load and return that if so. + Return NULL_RTX if the insn does not contain loads. */ +static rtx +find_load (rtx insn) +{ + if (get_attr_type (insn) == TYPE_MCLD) + return insn; + if (GET_MODE (insn) != SImode) + return NULL_RTX; + do { + insn = NEXT_INSN (insn); + if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode) + && get_attr_type (insn) == TYPE_MCLD) + return insn; + } while (GET_MODE (insn) != QImode); + return NULL_RTX; +} + +static void +workaround_speculation (void) +{ + rtx insn, next; + rtx last_condjump = NULL_RTX; + int cycles_since_jump = INT_MAX; + int delay_added = 0; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = next) + { + rtx pat; + int delay_needed = 0; + + next = find_next_insn_start (insn); + + if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (JUMP_P (insn)) + { + if (any_condjump_p (insn) + && ! cbranch_predicted_taken_p (insn)) + { + last_condjump = insn; + delay_added = 0; + cycles_since_jump = 0; + } + else + cycles_since_jump = INT_MAX; + } + else if (INSN_P (insn)) + { + rtx load_insn = find_load (insn); + enum attr_type type = type_for_anomaly (insn); + + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn)) + delay_needed = 4; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) + delay_needed = 3; + } + + if (delay_needed > cycles_since_jump + && (delay_needed - cycles_since_jump) > delay_added) + { + rtx pat1; + int num_clobbers; + rtx *op = recog_data.operand; + + delay_needed -= cycles_since_jump; + + extract_insn (last_condjump); + if (optimize_size) + { + pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2], + op[3]); + cycles_since_jump = INT_MAX; + } + else + { + /* Do not adjust cycles_since_jump in this case, so that + we'll increase the number of NOPs for a subsequent insn + if necessary. */ + pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3], + GEN_INT (delay_needed)); + delay_added = delay_needed; + } + PATTERN (last_condjump) = pat1; + INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers); + } + if (CALL_P (insn)) + { + cycles_since_jump = INT_MAX; + delay_added = 0; + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + int cycles_since_jump; + if (JUMP_P (insn) + && any_condjump_p (insn) + && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken + || cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx label = target; + rtx next_tgt; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (JUMP_P (insn) - && any_condjump_p (insn) - && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken - || cbranch_predicted_taken_p (insn))) - { - rtx target = JUMP_LABEL (insn); - rtx label = target; cycles_since_jump = 0; - for (; target && cycles_since_jump < 3; target = NEXT_INSN (target)) + for (; target && cycles_since_jump < 3; target = next_tgt) { rtx pat; + next_tgt = find_next_insn_start (target); + if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target)) continue; @@ -2658,12 +4918,18 @@ bfin_reorg (void) if (INSN_P (target)) { - enum attr_type type = get_attr_type (target); + rtx load_insn = find_load (target); + enum attr_type type = type_for_anomaly (target); int delay_needed = 0; if (cycles_since_jump < INT_MAX) cycles_since_jump++; - if (type == TYPE_SYNC && TARGET_CSYNC_ANOMALY) + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn)) + delay_needed = 2; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) delay_needed = 2; if (delay_needed > cycles_since_jump) @@ -2697,6 +4963,69 @@ bfin_reorg (void) } } } + +/* We use the machine specific reorg pass for emitting CSYNC instructions + after conditional branches as needed. + + The Blackfin is unusual in that a code sequence like + if cc jump label + r0 = (p0) + may speculatively perform the load even if the condition isn't true. This + happens for a branch that is predicted not taken, because the pipeline + isn't flushed or stalled, so the early stages of the following instructions, + which perform the memory reference, are allowed to execute before the + jump condition is evaluated. + Therefore, we must insert additional instructions in all places where this + could lead to incorrect behavior. The manual recommends CSYNC, while + VDSP seems to use NOPs (even though its corresponding compiler option is + named CSYNC). + + When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. + When optimizing for size, we turn the branch into a predicted taken one. + This may be slower due to mispredicts, but saves code size. */ + +static void +bfin_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + if (bfin_flag_schedule_insns2) + { + splitting_for_sched = 1; + split_all_insns (); + splitting_for_sched = 0; + + timevar_push (TV_SCHED2); + schedule_insns (); + timevar_pop (TV_SCHED2); + + /* Examine the schedule and insert nops as necessary for 64-bit parallel + instructions. */ + bfin_gen_bundles (); + } + + df_analyze (); + + /* Doloop optimization */ + if (cfun->machine->has_hardware_loops) + bfin_reorg_loops (dump_file); + + workaround_speculation (); + + if (bfin_flag_var_tracking) + { + timevar_push (TV_VAR_TRACKING); + variable_tracking_main (); + reorder_var_tracking_notes (); + timevar_pop (TV_VAR_TRACKING); + } + + df_finish_pass (false); + + workaround_rts_anomaly (); +} /* Handle interrupt_handler, exception_handler and nmi_handler function attributes; arguments as in struct attribute_spec.handler. */ @@ -2728,7 +5057,7 @@ handle_int_attribute (tree *node, tree name, warning to be generated). */ static int -bfin_comp_type_attributes (tree type1, tree type2) +bfin_comp_type_attributes (const_tree type1, const_tree type2) { e_funkind kind1, kind2; @@ -2792,6 +5121,91 @@ bfin_handle_longcall_attribute (tree *node, tree name, return NULL_TREE; } +/* Handle a "l1_text" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error ("`%s' attribute only applies to functions", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + else if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l1.text") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l1.text"); + + return NULL_TREE; +} + +/* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute; + arguments as in struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != VAR_DECL) + { + error ("`%s' attribute only applies to variables", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + else if (current_function_decl != NULL_TREE + && !TREE_STATIC (decl)) + { + error ("`%s' attribute cannot be specified for local variables", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + else + { + const char *section_name; + + if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0) + section_name = ".l1.data"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0) + section_name = ".l1.data.A"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0) + section_name = ".l1.data.B"; + else + gcc_unreachable (); + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + section_name) != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) + = build_string (strlen (section_name) + 1, section_name); + } + + return NULL_TREE; +} + /* Table of valid machine attributes. */ const struct attribute_spec bfin_attribute_table[] = { @@ -2804,9 +5218,41 @@ const struct attribute_spec bfin_attribute_table[] = { "saveall", 0, 0, false, true, true, NULL }, { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute }, { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute }, + { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute }, + { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, + { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, + { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, { NULL, 0, 0, false, false, false, NULL } }; +/* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to + tell the assembler to generate pointers to function descriptors in + some cases. */ + +static bool +bfin_assemble_integer (rtx value, unsigned int size, int aligned_p) +{ + if (TARGET_FDPIC && size == UNITS_PER_WORD) + { + if (GET_CODE (value) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (value)) + { + fputs ("\t.picptr\tfuncdesc(", asm_out_file); + output_addr_const (asm_out_file, value); + fputs (")\n", asm_out_file); + return true; + } + if (!aligned_p) + { + /* We've set the unaligned SI op to NULL, so we always have to + handle the unaligned case here. */ + assemble_integer_with_op ("\t.4byte\t", value); + return true; + } + } + return default_assemble_integer (value, size, aligned_p); +} + /* Output the assembler code for a thunk function. THUNK_DECL is the declaration for the thunk function itself, FUNCTION is the decl for the target function. DELTA is an immediate constant offset to be @@ -2820,12 +5266,12 @@ bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, { rtx xops[3]; /* The this parameter is passed as the first argument. */ - rtx this = gen_rtx_REG (Pmode, REG_R0); + rtx this_rtx = gen_rtx_REG (Pmode, REG_R0); /* Adjust the this parameter by a fixed constant. */ if (delta) { - xops[1] = this; + xops[1] = this_rtx; if (delta >= -64 && delta <= 63) { xops[0] = GEN_INT (delta); @@ -2852,7 +5298,7 @@ bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, if (vcall_offset) { rtx p2tmp = gen_rtx_REG (Pmode, REG_P2); - rtx tmp = gen_rtx_REG (Pmode, REG_R2); + rtx tmp = gen_rtx_REG (Pmode, REG_R3); xops[1] = tmp; xops[2] = p2tmp; @@ -2868,7 +5314,7 @@ bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops); xops[0] = gen_rtx_MEM (Pmode, p2tmp); } - xops[2] = this; + xops[2] = this_rtx; output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops); } @@ -2882,25 +5328,436 @@ enum bfin_builtins { BFIN_BUILTIN_CSYNC, BFIN_BUILTIN_SSYNC, + BFIN_BUILTIN_ONES, + BFIN_BUILTIN_COMPOSE_2X16, + BFIN_BUILTIN_EXTRACTLO, + BFIN_BUILTIN_EXTRACTHI, + + BFIN_BUILTIN_SSADD_2X16, + BFIN_BUILTIN_SSSUB_2X16, + BFIN_BUILTIN_SSADDSUB_2X16, + BFIN_BUILTIN_SSSUBADD_2X16, + BFIN_BUILTIN_MULT_2X16, + BFIN_BUILTIN_MULTR_2X16, + BFIN_BUILTIN_NEG_2X16, + BFIN_BUILTIN_ABS_2X16, + BFIN_BUILTIN_MIN_2X16, + BFIN_BUILTIN_MAX_2X16, + + BFIN_BUILTIN_SSADD_1X16, + BFIN_BUILTIN_SSSUB_1X16, + BFIN_BUILTIN_MULT_1X16, + BFIN_BUILTIN_MULTR_1X16, + BFIN_BUILTIN_NORM_1X16, + BFIN_BUILTIN_NEG_1X16, + BFIN_BUILTIN_ABS_1X16, + BFIN_BUILTIN_MIN_1X16, + BFIN_BUILTIN_MAX_1X16, + + BFIN_BUILTIN_SUM_2X16, + BFIN_BUILTIN_DIFFHL_2X16, + BFIN_BUILTIN_DIFFLH_2X16, + + BFIN_BUILTIN_SSADD_1X32, + BFIN_BUILTIN_SSSUB_1X32, + BFIN_BUILTIN_NORM_1X32, + BFIN_BUILTIN_ROUND_1X32, + BFIN_BUILTIN_NEG_1X32, + BFIN_BUILTIN_ABS_1X32, + BFIN_BUILTIN_MIN_1X32, + BFIN_BUILTIN_MAX_1X32, + BFIN_BUILTIN_MULT_1X32, + BFIN_BUILTIN_MULT_1X32X32, + BFIN_BUILTIN_MULT_1X32X32NS, + + BFIN_BUILTIN_MULHISILL, + BFIN_BUILTIN_MULHISILH, + BFIN_BUILTIN_MULHISIHL, + BFIN_BUILTIN_MULHISIHH, + + BFIN_BUILTIN_LSHIFT_1X16, + BFIN_BUILTIN_LSHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X16, + BFIN_BUILTIN_SSASHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X32, + + BFIN_BUILTIN_CPLX_MUL_16, + BFIN_BUILTIN_CPLX_MAC_16, + BFIN_BUILTIN_CPLX_MSU_16, + + BFIN_BUILTIN_CPLX_MUL_16_S40, + BFIN_BUILTIN_CPLX_MAC_16_S40, + BFIN_BUILTIN_CPLX_MSU_16_S40, + + BFIN_BUILTIN_CPLX_SQU, + + BFIN_BUILTIN_LOADBYTES, + BFIN_BUILTIN_MAX }; #define def_builtin(NAME, TYPE, CODE) \ do { \ - lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ - NULL, NULL_TREE); \ + add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ } while (0) /* Set up all builtin functions for this target. */ static void bfin_init_builtins (void) { + tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode); tree void_ftype_void = build_function_type (void_type_node, void_list_node); - + tree short_ftype_short + = build_function_type_list (short_integer_type_node, short_integer_type_node, + NULL_TREE); + tree short_ftype_int_int + = build_function_type_list (short_integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int_int + = build_function_type_list (integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int + = build_function_type_list (integer_type_node, integer_type_node, + NULL_TREE); + tree short_ftype_int + = build_function_type_list (short_integer_type_node, integer_type_node, + NULL_TREE); + tree int_ftype_v2hi_v2hi + = build_function_type_list (integer_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2hi_ftype_int_int + = build_function_type_list (V2HI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_int + = build_function_type_list (V2HI_type_node, V2HI_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_short_short + = build_function_type_list (integer_type_node, short_integer_type_node, + short_integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE); + tree short_ftype_v2hi + = build_function_type_list (short_integer_type_node, V2HI_type_node, + NULL_TREE); + tree int_ftype_pint + = build_function_type_list (integer_type_node, + build_pointer_type (integer_type_node), + NULL_TREE); + /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC); def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC); + + def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES); + + def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int, + BFIN_BUILTIN_COMPOSE_2X16); + def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTHI); + def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTLO); + + def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MIN_2X16); + def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MAX_2X16); + + def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADDSUB_2X16); + def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUBADD_2X16); + def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULT_2X16); + def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULTR_2X16); + def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_NEG_2X16); + def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_ABS_2X16); + + def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MIN_1X16); + def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MAX_1X16); + + def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSADD_1X16); + def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X16); + def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULT_1X16); + def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULTR_1X16); + def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short, + BFIN_BUILTIN_NEG_1X16); + def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short, + BFIN_BUILTIN_ABS_1X16); + def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int, + BFIN_BUILTIN_NORM_1X16); + + def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_SUM_2X16); + def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFHL_2X16); + def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFLH_2X16); + + def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILL); + def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHL); + def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILH); + def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHH); + + def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MIN_1X32); + def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MAX_1X32); + + def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSADD_1X32); + def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X32); + def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int, + BFIN_BUILTIN_NEG_1X32); + def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int, + BFIN_BUILTIN_ABS_1X32); + def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int, + BFIN_BUILTIN_NORM_1X32); + def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int, + BFIN_BUILTIN_ROUND_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short, + BFIN_BUILTIN_MULT_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32NS); + + /* Shifts. */ + def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X16); + def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_SSASHIFT_2X16); + def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_LSHIFT_1X16); + def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_LSHIFT_2X16); + def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X32); + + /* Complex numbers. */ + def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16); + def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16); + def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16); + def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16_S40); + def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16_S40); + def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16_S40); + def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi, + BFIN_BUILTIN_CPLX_SQU); + + /* "Unaligned" load. */ + def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint, + BFIN_BUILTIN_LOADBYTES); + +} + + +struct builtin_description +{ + const enum insn_code icode; + const char *const name; + const enum bfin_builtins code; + int macflag; +}; + +static const struct builtin_description bdesc_2arg[] = +{ + { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 }, + + { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 }, + { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 }, + { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 }, + { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 }, + { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 }, + + { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 }, + { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 }, + { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 }, + { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 }, + + { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 }, + { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 }, + { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 }, + { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 }, + + { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 }, + { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 }, + { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 }, + { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 }, + { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 }, + { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 }, + + { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE } +}; + +static const struct builtin_description bdesc_1arg[] = +{ + { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 }, + + { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 }, + + { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 }, + { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 }, + { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 }, + + { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 }, + { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 }, + { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 }, + { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 }, + + { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 }, + { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 }, + { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 }, + { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 } +}; + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (x, CONST0_RTX (SImode))); + return gen_lowpart (mode, x); +} + +/* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1 + if this is a normal binary op, or one of the MACFLAG_xxx constants. */ + +static rtx +bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target, + int macflag) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode op1mode = GET_MODE (op1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode) + { + op1mode = HImode; + op1 = gen_lowpart (HImode, op1); + } + /* In case the insn wants input operands in modes different from + the result, abort. */ + gcc_assert ((op0mode == mode0 || op0mode == VOIDmode) + && (op1mode == mode1 || op1mode == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (macflag == -1) + pat = GEN_FCN (icode) (target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag)); + if (! pat) + return 0; + + emit_insn (pat); + return target; +} + +/* Subroutine of bfin_expand_builtin to take care of unop insns. */ + +static rtx +bfin_expand_unop_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (op0mode == SImode && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + gcc_assert (op0mode == mode0 || op0mode == VOIDmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; } /* Expand an expression EXP that calls a built-in function, @@ -2915,8 +5772,14 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, enum machine_mode mode ATTRIBUTE_UNUSED, int ignore ATTRIBUTE_UNUSED) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + size_t i; + enum insn_code icode; + const struct builtin_description *d; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1, arg2; + rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg; + enum machine_mode tmode, mode0; switch (fcode) { @@ -2927,9 +5790,222 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, emit_insn (gen_ssync ()); return 0; + case BFIN_BUILTIN_DIFFHL_2X16: + case BFIN_BUILTIN_DIFFLH_2X16: + case BFIN_BUILTIN_SUM_2X16: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3 + : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3 + : CODE_FOR_ssaddhilov2hi3); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case BFIN_BUILTIN_MULT_1X32X32: + case BFIN_BUILTIN_MULT_1X32X32NS: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + if (! target + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + a1reg = gen_rtx_REG (PDImode, REG_A1); + a0reg = gen_rtx_REG (PDImode, REG_A0); + tmp1 = gen_lowpart (V2HImode, op0); + tmp2 = gen_lowpart (V2HImode, op1); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, op0), + gen_lowpart (HImode, op1), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + + if (fcode == BFIN_BUILTIN_MULT_1X32X32) + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + else + { + /* For saturating multiplication, there's exactly one special case + to be handled: multiplying the smallest negative value with + itself. Due to shift correction in fractional multiplies, this + can overflow. Iff this happens, OP2 will contain 1, which, when + added in 32 bits to the smallest negative, wraps to the largest + positive, which is the result we want. */ + op2 = gen_reg_rtx (V2HImode); + emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx)); + emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC), + gen_lowpart (SImode, op2))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + op2 = gen_reg_rtx (SImode); + emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC))); + } + emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1, + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15))); + emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg)); + if (fcode == BFIN_BUILTIN_MULT_1X32X32NS) + emit_insn (gen_addsi3 (target, target, op2)); + return target; + + case BFIN_BUILTIN_CPLX_MUL_16: + case BFIN_BUILTIN_CPLX_MUL_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + accvec = gen_reg_rtx (V2PDImode); + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + + if (fcode == BFIN_BUILTIN_CPLX_MUL_16) + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_NONE))); + emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, const1_rtx, const0_rtx, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_MAC_16: + case BFIN_BUILTIN_CPLX_MSU_16: + case BFIN_BUILTIN_CPLX_MAC_16_S40: + case BFIN_BUILTIN_CPLX_MSU_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + accvec = gen_reg_rtx (V2PDImode); + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + if (! register_operand (op2, GET_MODE (op2))) + op2 = copy_to_mode_reg (GET_MODE (op2), op2); + + tmp1 = gen_reg_rtx (SImode); + tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16))); + emit_move_insn (tmp2, gen_lowpart (SImode, op0)); + emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx)); + emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2)); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MSU_16) + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_NONE))); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40) + { + tmp1 = const1_rtx; + tmp2 = const0_rtx; + } + else + { + tmp1 = const0_rtx; + tmp2 = const1_rtx; + } + emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, tmp1, tmp2, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_SQU: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_mulv2hi; + tmp1 = gen_reg_rtx (V2HImode); + tmp2 = gen_reg_rtx (V2HImode); + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (V2HImode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + + emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_flag_mulhi_parts (tmp2, op0, op0, const0_rtx, + const0_rtx, const1_rtx, + GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_ssaddhi3_parts (target, tmp2, tmp2, const1_rtx, + const0_rtx, const0_rtx)); + + emit_insn (gen_sssubhi3_parts (target, tmp1, tmp1, const0_rtx, + const0_rtx, const1_rtx)); + + return target; + default: - gcc_unreachable (); + break; } + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + return bfin_expand_binop_builtin (d->icode, exp, target, + d->macflag); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return bfin_expand_unop_builtin (d->icode, exp, target); + + gcc_unreachable (); } #undef TARGET_INIT_BUILTINS @@ -2956,8 +6032,8 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST bfin_address_cost -#undef TARGET_ASM_INTERNAL_LABEL -#define TARGET_ASM_INTERNAL_LABEL bfin_internal_label +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER bfin_assemble_integer #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg @@ -2968,17 +6044,20 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK -#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST bfin_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate + #undef TARGET_PROMOTE_PROTOTYPES -#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true #undef TARGET_PROMOTE_FUNCTION_ARGS -#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true +#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true #undef TARGET_PROMOTE_FUNCTION_RETURN -#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true +#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true #undef TARGET_ARG_PARTIAL_BYTES #define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes @@ -3004,4 +6083,13 @@ bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD bfin_secondary_reload +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory + struct gcc_target targetm = TARGET_INITIALIZER;