X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Ffwprop.c;h=4fab5b060725edabb848f39a77a9e51649047825;hp=fbe432974f4179ec1225f3b14f7caf66496f94cc;hb=11ba5d591c5736e46afec6b9c43d97ba26b67836;hpb=1a665a74387cd9365548afcd47cdb0756af87cab diff --git a/gcc/fwprop.c b/gcc/fwprop.c index fbe432974f4..4fab5b06072 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -1,5 +1,6 @@ /* RTL-based forward propagation pass for GNU compiler. - Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. Contributed by Paolo Bonzini and Steven Bosscher. This file is part of GCC. @@ -22,12 +23,12 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "tm.h" -#include "toplev.h" +#include "diagnostic-core.h" +#include "sparseset.h" #include "timevar.h" #include "rtl.h" #include "tm_p.h" -#include "emit-rtl.h" #include "insn-config.h" #include "recog.h" #include "flags.h" @@ -38,6 +39,8 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "cfgloop.h" #include "tree-pass.h" +#include "domwalk.h" +#include "emit-rtl.h" /* This pass does simple forward propagation and simplification when an @@ -100,11 +103,215 @@ along with GCC; see the file COPYING3. If not see (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) - where the first two insns are now dead. */ + where the first two insns are now dead. + + We used to use reaching definitions to find which uses have a + single reaching definition (sounds obvious...), but this is too + complex a problem in nasty testcases like PR33928. Now we use the + multiple definitions problem in df-problems.c. The similarity + between that problem and SSA form creation is taken further, in + that fwprop does a dominator walk to create its chains; however, + instead of creating a PHI function where multiple definitions meet + I just punt and record only singleton use-def chains, which is + all that is needed by fwprop. */ static int num_changes; +DEF_VEC_P(df_ref); +DEF_VEC_ALLOC_P(df_ref,heap); +static VEC(df_ref,heap) *use_def_ref; +static VEC(df_ref,heap) *reg_defs; +static VEC(df_ref,heap) *reg_defs_stack; + +/* The MD bitmaps are trimmed to include only live registers to cut + memory usage on testcases like insn-recog.c. Track live registers + in the basic block and do not perform forward propagation if the + destination is a dead pseudo occurring in a note. */ +static bitmap local_md; +static bitmap local_lr; + +/* Return the only def in USE's use-def chain, or NULL if there is + more than one def in the chain. */ + +static inline df_ref +get_def_for_use (df_ref use) +{ + return VEC_index (df_ref, use_def_ref, DF_REF_ID (use)); +} + + +/* Update the reg_defs vector with non-partial definitions in DEF_REC. + TOP_FLAG says which artificials uses should be used, when DEF_REC + is an artificial def vector. LOCAL_MD is modified as after a + df_md_simulate_* function; we do more or less the same processing + done there, so we do not use those functions. */ + +#define DF_MD_GEN_FLAGS \ + (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER) + +static void +process_defs (df_ref *def_rec, int top_flag) +{ + df_ref def; + while ((def = *def_rec++) != NULL) + { + df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def)); + unsigned int dregno; + + if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag) + continue; + + dregno = DF_REF_REGNO (def); + if (curr_def) + VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def); + else + { + /* Do not store anything if "transitioning" from NULL to NULL. But + otherwise, push a special entry on the stack to tell the + leave_block callback that the entry in reg_defs was NULL. */ + if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) + ; + else + VEC_safe_push (df_ref, heap, reg_defs_stack, def); + } + + if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) + { + bitmap_set_bit (local_md, dregno); + VEC_replace (df_ref, reg_defs, dregno, NULL); + } + else + { + bitmap_clear_bit (local_md, dregno); + VEC_replace (df_ref, reg_defs, dregno, def); + } + } +} + + +/* Fill the use_def_ref vector with values for the uses in USE_REC, + taking reaching definitions info from LOCAL_MD and REG_DEFS. + TOP_FLAG says which artificials uses should be used, when USE_REC + is an artificial use vector. */ + +static void +process_uses (df_ref *use_rec, int top_flag) +{ + df_ref use; + while ((use = *use_rec++) != NULL) + if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag) + { + unsigned int uregno = DF_REF_REGNO (use); + if (VEC_index (df_ref, reg_defs, uregno) + && !bitmap_bit_p (local_md, uregno) + && bitmap_bit_p (local_lr, uregno)) + VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), + VEC_index (df_ref, reg_defs, uregno)); + } +} + + +static void +single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, + basic_block bb) +{ + int bb_index = bb->index; + struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index); + struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index); + rtx insn; + + bitmap_copy (local_md, &md_bb_info->in); + bitmap_copy (local_lr, &lr_bb_info->in); + + /* Push a marker for the leave_block callback. */ + VEC_safe_push (df_ref, heap, reg_defs_stack, NULL); + + process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP); + process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP); + + /* We don't call df_simulate_initialize_forwards, as it may overestimate + the live registers if there are unused artificial defs. We prefer + liveness to be underestimated. */ + + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn)) + { + unsigned int uid = INSN_UID (insn); + process_uses (DF_INSN_UID_USES (uid), 0); + process_uses (DF_INSN_UID_EQ_USES (uid), 0); + process_defs (DF_INSN_UID_DEFS (uid), 0); + df_simulate_one_insn_forwards (bb, insn, local_lr); + } + + process_uses (df_get_artificial_uses (bb_index), 0); + process_defs (df_get_artificial_defs (bb_index), 0); +} + +/* Pop the definitions created in this basic block when leaving its + dominated parts. */ + +static void +single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, + basic_block bb ATTRIBUTE_UNUSED) +{ + df_ref saved_def; + while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL) + { + unsigned int dregno = DF_REF_REGNO (saved_def); + + /* See also process_defs. */ + if (saved_def == VEC_index (df_ref, reg_defs, dregno)) + VEC_replace (df_ref, reg_defs, dregno, NULL); + else + VEC_replace (df_ref, reg_defs, dregno, saved_def); + } +} + + +/* Build a vector holding the reaching definitions of uses reached by a + single dominating definition. */ + +static void +build_single_def_use_links (void) +{ + struct dom_walk_data walk_data; + + /* We use the multiple definitions problem to compute our restricted + use-def chains. */ + df_set_flags (DF_EQ_NOTES); + df_md_add_problem (); + df_note_add_problem (); + df_analyze (); + df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); + + use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ()); + VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ()); + + reg_defs = VEC_alloc (df_ref, heap, max_reg_num ()); + VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ()); + + reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10); + local_md = BITMAP_ALLOC (NULL); + local_lr = BITMAP_ALLOC (NULL); + + /* Walk the dominator tree looking for single reaching definitions + dominating the uses. This is similar to how SSA form is built. */ + walk_data.dom_direction = CDI_DOMINATORS; + walk_data.initialize_block_local_data = NULL; + walk_data.before_dom_children = single_def_use_enter_block; + walk_data.after_dom_children = single_def_use_leave_block; + + init_walk_dominator_tree (&walk_data); + walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); + fini_walk_dominator_tree (&walk_data); + + BITMAP_FREE (local_lr); + BITMAP_FREE (local_md); + VEC_free (df_ref, heap, reg_defs); + VEC_free (df_ref, heap, reg_defs_stack); +} + /* Do not try to replace constant addresses or addresses of local and argument slots. These MEM expressions are made only once and inserted @@ -149,7 +356,7 @@ canonicalize_address (rtx x) switch (GET_CODE (x)) { case ASHIFT: - if (GET_CODE (XEXP (x, 1)) == CONST_INT + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x)) && INTVAL (XEXP (x, 1)) >= 0) { @@ -184,11 +391,13 @@ canonicalize_address (rtx x) for a memory access in the given MODE. */ static bool -should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode) +should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode, + addr_space_t as, bool speed) { int gain; - if (rtx_equal_p (old_rtx, new_rtx) || !memory_address_p (mode, new_rtx)) + if (rtx_equal_p (old_rtx, new_rtx) + || !memory_address_addr_space_p (mode, new_rtx, as)) return false; /* Copy propagation is always ok. */ @@ -196,14 +405,15 @@ should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode) return true; /* Prefer the new address if it is less expensive. */ - gain = address_cost (old_rtx, mode) - address_cost (new_rtx, mode); + gain = (address_cost (old_rtx, mode, as, speed) + - address_cost (new_rtx, mode, as, speed)); /* If the addresses have equivalent cost, prefer the new address - if it has the highest `rtx_cost'. That has the potential of + if it has the highest `set_src_cost'. That has the potential of eliminating the most insns without additional costs, and it is the same that cse.c used to do. */ if (gain == 0) - gain = rtx_cost (new_rtx, SET) - rtx_cost (old_rtx, SET); + gain = set_src_cost (new_rtx, speed) - set_src_cost (old_rtx, speed); return (gain > 0); } @@ -231,7 +441,10 @@ enum { PR_HANDLE_MEM is set when the source of the propagation was not another MEM. Then, it is safe not to treat non-read-only MEMs as ``opaque'' objects. */ - PR_HANDLE_MEM = 2 + PR_HANDLE_MEM = 2, + + /* Set when costs should be optimized for speed. */ + PR_OPTIMIZE_FOR_SPEED = 4 }; @@ -360,7 +573,9 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) /* Copy propagations are always ok. Otherwise check the costs. */ if (!(REG_P (old_rtx) && REG_P (new_rtx)) - && !should_replace_address (op0, new_op0, GET_MODE (x))) + && !should_replace_address (op0, new_op0, GET_MODE (x), + MEM_ADDR_SPACE (x), + flags & PR_OPTIMIZE_FOR_SPEED)) return true; tem = replace_equiv_address_nv (x, new_op0); @@ -438,7 +653,8 @@ varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) Otherwise, we accept simplifications that have a lower or equal cost. */ static rtx -propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) +propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx, + bool speed) { rtx tem; bool collapsed; @@ -453,6 +669,9 @@ propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) if (!for_each_rtx (&new_rtx, varying_mem_p, NULL)) flags |= PR_HANDLE_MEM; + if (speed) + flags |= PR_OPTIMIZE_FOR_SPEED; + tem = x; collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags); if (tem == x || !collapsed) @@ -460,7 +679,7 @@ propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) /* gen_lowpart_common will not be able to process VOIDmode entities other than CONST_INTs. */ - if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT) + if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem)) return NULL_RTX; if (GET_MODE (tem) == VOIDmode) @@ -478,19 +697,19 @@ propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx) between FROM to (but not including) TO. */ static bool -local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to) +local_ref_killed_between_p (df_ref ref, rtx from, rtx to) { rtx insn; for (insn = from; insn != to; insn = NEXT_INSN (insn)) { - struct df_ref **def_rec; + df_ref *def_rec; if (!INSN_P (insn)) continue; for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) { - struct df_ref *def = *def_rec; + df_ref def = *def_rec; if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) return true; } @@ -508,19 +727,19 @@ local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to) we check if the definition is killed after DEF_INSN or before TARGET_INSN insn, in their respective basic blocks. */ static bool -use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) +use_killed_between (df_ref use, rtx def_insn, rtx target_insn) { basic_block def_bb = BLOCK_FOR_INSN (def_insn); basic_block target_bb = BLOCK_FOR_INSN (target_insn); int regno; - struct df_ref * def; - - /* In some obscure situations we can have a def reaching a use - that is _before_ the def. In other words the def does not - dominate the use even though the use and def are in the same - basic block. This can happen when a register may be used - uninitialized in a loop. In such cases, we must assume that - DEF is not available. */ + df_ref def; + + /* We used to have a def reaching a use that is _before_ the def, + with the def not dominating the use even though the use and def + are in the same basic block, when a register may be used + uninitialized in a loop. This should not happen anymore since + we do not use reaching definitions, but still we test for such + cases and assume that DEF is not available. */ if (def_bb == target_bb ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn) : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb)) @@ -534,7 +753,7 @@ use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) regno = DF_REF_REGNO (use); def = DF_REG_DEF_CHAIN (regno); if (def - && def->next_reg == NULL + && DF_REF_NEXT_REG (def) == NULL && regno >= FIRST_PSEUDO_REGISTER) return false; @@ -546,7 +765,7 @@ use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) if (single_pred_p (target_bb) && single_pred (target_bb) == def_bb) { - struct df_ref *x; + df_ref x; /* See if USE is killed between DEF_INSN and the last insn in the basic block containing DEF_INSN. */ @@ -574,7 +793,7 @@ use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) static bool all_uses_available_at (rtx def_insn, rtx target_insn) { - struct df_ref **use_rec; + df_ref *use_rec; struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); rtx def_set = single_set (def_insn); @@ -591,30 +810,36 @@ all_uses_available_at (rtx def_insn, rtx target_insn) invalid. */ for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) { - struct df_ref *use = *use_rec; + df_ref use = *use_rec; if (rtx_equal_p (DF_REF_REG (use), def_reg)) return false; } for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) { - struct df_ref *use = *use_rec; - if (rtx_equal_p (use->reg, def_reg)) + df_ref use = *use_rec; + if (rtx_equal_p (DF_REF_REG (use), def_reg)) return false; } } else { + rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX; + /* Look at all the uses of DEF_INSN, and see if they are not killed between DEF_INSN and TARGET_INSN. */ for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++) { - struct df_ref *use = *use_rec; + df_ref use = *use_rec; + if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) + return false; if (use_killed_between (use, def_insn, target_insn)) return false; } for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++) { - struct df_ref *use = *use_rec; + df_ref use = *use_rec; + if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) + return false; if (use_killed_between (use, def_insn, target_insn)) return false; } @@ -624,94 +849,95 @@ all_uses_available_at (rtx def_insn, rtx target_insn) } -struct find_occurrence_data -{ - rtx find; - rtx *retval; -}; +static df_ref *active_defs; +#ifdef ENABLE_CHECKING +static sparseset active_defs_check; +#endif -/* Callback for for_each_rtx, used in find_occurrence. - See if PX is the rtx we have to find. Return 1 to stop for_each_rtx - if successful, or 0 to continue traversing otherwise. */ +/* Fill the ACTIVE_DEFS array with the use->def link for the registers + mentioned in USE_REC. Register the valid entries in ACTIVE_DEFS_CHECK + too, for checking purposes. */ -static int -find_occurrence_callback (rtx *px, void *data) +static void +register_active_defs (df_ref *use_rec) { - struct find_occurrence_data *fod = (struct find_occurrence_data *) data; - rtx x = *px; - rtx find = fod->find; - - if (x == find) + while (*use_rec) { - fod->retval = px; - return 1; + df_ref use = *use_rec++; + df_ref def = get_def_for_use (use); + int regno = DF_REF_REGNO (use); + +#ifdef ENABLE_CHECKING + sparseset_set_bit (active_defs_check, regno); +#endif + active_defs[regno] = def; } - - return 0; } -/* Return a pointer to one of the occurrences of register FIND in *PX. */ -static rtx * -find_occurrence (rtx *px, rtx find) +/* Build the use->def links that we use to update the dataflow info + for new uses. Note that building the links is very cheap and if + it were done earlier, they could be used to rule out invalid + propagations (in addition to what is done in all_uses_available_at). + I'm not doing this yet, though. */ + +static void +update_df_init (rtx def_insn, rtx insn) { - struct find_occurrence_data data; +#ifdef ENABLE_CHECKING + sparseset_clear (active_defs_check); +#endif + register_active_defs (DF_INSN_USES (def_insn)); + register_active_defs (DF_INSN_USES (insn)); + register_active_defs (DF_INSN_EQ_USES (insn)); +} - gcc_assert (REG_P (find) - || (GET_CODE (find) == SUBREG - && REG_P (SUBREG_REG (find)))); - data.find = find; - data.retval = NULL; - for_each_rtx (px, find_occurrence_callback, &data); - return data.retval; -} +/* Update the USE_DEF_REF array for the given use, using the active definitions + in the ACTIVE_DEFS array to match pseudos to their def. */ - -/* Inside INSN, the expression rooted at *LOC has been changed, moving some - uses from USE_VEC. Find those that are present, and create new items - in the data flow object of the pass. Mark any new uses as having the - given TYPE. */ -static void -update_df (rtx insn, rtx *loc, struct df_ref **use_rec, enum df_ref_type type, - int new_flags) +static inline void +update_uses (df_ref *use_rec) { - bool changed = false; - - /* Add a use for the registers that were propagated. */ while (*use_rec) { - struct df_ref *use = *use_rec; - struct df_ref *orig_use = use, *new_use; - int width = -1; - int offset = -1; - enum machine_mode mode = 0; - rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use)); - use_rec++; - - if (!new_loc) - continue; + df_ref use = *use_rec++; + int regno = DF_REF_REGNO (use); - if (DF_REF_FLAGS_IS_SET (orig_use, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT)) - { - width = DF_REF_EXTRACT_WIDTH (orig_use); - offset = DF_REF_EXTRACT_OFFSET (orig_use); - mode = DF_REF_EXTRACT_MODE (orig_use); - } + /* Set up the use-def chain. */ + if (DF_REF_ID (use) >= (int) VEC_length (df_ref, use_def_ref)) + VEC_safe_grow_cleared (df_ref, heap, use_def_ref, + DF_REF_ID (use) + 1); + +#ifdef ENABLE_CHECKING + gcc_assert (sparseset_bit_p (active_defs_check, regno)); +#endif + VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), active_defs[regno]); + } +} - /* Add a new insn use. Use the original type, because it says if the - use was within a MEM. */ - new_use = df_ref_create (DF_REF_REG (orig_use), new_loc, - insn, BLOCK_FOR_INSN (insn), - type, DF_REF_FLAGS (orig_use) | new_flags, - width, offset, mode); - /* Set up the use-def chain. */ - df_chain_copy (new_use, DF_REF_CHAIN (orig_use)); - changed = true; +/* Update the USE_DEF_REF array for the uses in INSN. Only update note + uses if NOTES_ONLY is true. */ + +static void +update_df (rtx insn, rtx note) +{ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + + if (note) + { + df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE); + df_notes_rescan (insn); } - if (changed) - df_insn_rescan (insn); + else + { + df_uses_create (&PATTERN (insn), insn, 0); + df_insn_rescan (insn); + update_uses (DF_INSN_INFO_USES (insn_info)); + } + + update_uses (DF_INSN_INFO_EQ_USES (insn_info)); } @@ -722,15 +948,22 @@ update_df (rtx insn, rtx *loc, struct df_ref **use_rec, enum df_ref_type type, performed. */ static bool -try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal) +try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal) { rtx insn = DF_REF_INSN (use); - enum df_ref_type type = DF_REF_TYPE (use); - int flags = DF_REF_FLAGS (use); rtx set = single_set (insn); - int old_cost = rtx_cost (SET_SRC (set), SET); + rtx note = NULL_RTX; + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); + int old_cost = 0; bool ok; + update_df_init (def_insn, insn); + + /* forward_propagate_subreg may be operating on an instruction with + multiple sets. If so, assume the cost of the new instruction is + not greater than the old one. */ + if (set) + old_cost = set_src_cost (SET_SRC (set), speed); if (dump_file) { fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); @@ -750,7 +983,8 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool } else if (DF_REF_TYPE (use) == DF_REF_REG_USE - && rtx_cost (SET_SRC (set), SET) > old_cost) + && set + && set_src_cost (SET_SRC (set), speed) > old_cost) { if (dump_file) fprintf (dump_file, "Changes to insn %d not profitable\n", @@ -769,14 +1003,6 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool { confirm_change_group (); num_changes++; - - df_ref_remove (use); - if (!CONSTANT_P (new_rtx)) - { - struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); - update_df (insn, loc, DF_INSN_INFO_USES (insn_info), type, flags); - update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), type, flags); - } } else { @@ -789,95 +1015,254 @@ try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new_rtx, rtx def_insn, bool if (dump_file) fprintf (dump_file, " Setting REG_EQUAL note\n"); - set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); - - /* ??? Is this still necessary if we add the note through - set_unique_reg_note? */ - if (!CONSTANT_P (new_rtx)) - { - struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); - update_df (insn, loc, DF_INSN_INFO_USES (insn_info), - type, DF_REF_IN_NOTE); - update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), - type, DF_REF_IN_NOTE); - } + note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); } } + if ((ok || note) && !CONSTANT_P (new_rtx)) + update_df (insn, note); + return ok; } +/* For the given single_set INSN, containing SRC known to be a + ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN + is redundant due to the register being set by a LOAD_EXTEND_OP + load from memory. */ -/* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */ +static bool +free_load_extend (rtx src, rtx insn) +{ + rtx reg; + df_ref *use_vec; + df_ref use = 0, def; + + reg = XEXP (src, 0); +#ifdef LOAD_EXTEND_OP + if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src)) +#endif + return false; + + for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++) + { + use = *use_vec; + + if (!DF_REF_IS_ARTIFICIAL (use) + && DF_REF_TYPE (use) == DF_REF_REG_USE + && DF_REF_REG (use) == reg) + break; + } + if (!use) + return false; + + def = get_def_for_use (use); + if (!def) + return false; + + if (DF_REF_IS_ARTIFICIAL (def)) + return false; + + if (NONJUMP_INSN_P (DF_REF_INSN (def))) + { + rtx patt = PATTERN (DF_REF_INSN (def)); + + if (GET_CODE (patt) == SET + && GET_CODE (SET_SRC (patt)) == MEM + && rtx_equal_p (SET_DEST (patt), reg)) + return true; + } + return false; +} + +/* If USE is a subreg, see if it can be replaced by a pseudo. */ static bool -forward_propagate_subreg (struct df_ref *use, rtx def_insn, rtx def_set) +forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set) { rtx use_reg = DF_REF_REG (use); rtx use_insn, src; - /* Only consider paradoxical subregs... */ + /* Only consider subregs... */ enum machine_mode use_mode = GET_MODE (use_reg); if (GET_CODE (use_reg) != SUBREG - || !REG_P (SET_DEST (def_set)) - || GET_MODE_SIZE (use_mode) - <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) + || !REG_P (SET_DEST (def_set))) return false; - /* If this is a paradoxical SUBREG, we have no idea what value the - extra bits would have. However, if the operand is equivalent to - a SUBREG whose operand is the same as our mode, and all the modes - are within a word, we can just use the inner operand because - these SUBREGs just say how to treat the register. */ - use_insn = DF_REF_INSN (use); + /* If this is a paradoxical SUBREG... */ + if (GET_MODE_SIZE (use_mode) + > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) + { + /* If this is a paradoxical SUBREG, we have no idea what value the + extra bits would have. However, if the operand is equivalent to + a SUBREG whose operand is the same as our mode, and all the modes + are within a word, we can just use the inner operand because + these SUBREGs just say how to treat the register. */ + use_insn = DF_REF_INSN (use); + src = SET_SRC (def_set); + if (GET_CODE (src) == SUBREG + && REG_P (SUBREG_REG (src)) + && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER + && GET_MODE (SUBREG_REG (src)) == use_mode + && subreg_lowpart_p (src) + && all_uses_available_at (def_insn, use_insn)) + return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), + def_insn, false); + } + + /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG + is the low part of the reg being extended then just use the inner + operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will + be removed due to it matching a LOAD_EXTEND_OP load from memory, + or due to the operation being a no-op when applied to registers. + For example, if we have: + + A: (set (reg:DI X) (sign_extend:DI (reg:SI Y))) + B: (... (subreg:SI (reg:DI X)) ...) + + and mode_rep_extended says that Y is already sign-extended, + the backend will typically allow A to be combined with the + definition of Y or, failing that, allow A to be deleted after + reload through register tying. Introducing more uses of Y + prevents both optimisations. */ + else if (subreg_lowpart_p (use_reg)) + { + use_insn = DF_REF_INSN (use); + src = SET_SRC (def_set); + if ((GET_CODE (src) == ZERO_EXTEND + || GET_CODE (src) == SIGN_EXTEND) + && REG_P (XEXP (src, 0)) + && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER + && GET_MODE (XEXP (src, 0)) == use_mode + && !free_load_extend (src, def_insn) + && (targetm.mode_rep_extended (use_mode, GET_MODE (src)) + != (int) GET_CODE (src)) + && all_uses_available_at (def_insn, use_insn)) + return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0), + def_insn, false); + } + + return false; +} + +/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */ + +static bool +forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg) +{ + rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc; + int speed_p, i; + df_ref *use_vec; + + gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0); + src = SET_SRC (def_set); - if (GET_CODE (src) == SUBREG - && REG_P (SUBREG_REG (src)) - && GET_MODE (SUBREG_REG (src)) == use_mode - && subreg_lowpart_p (src) - && all_uses_available_at (def_insn, use_insn)) - return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), - def_insn, false); - else + use_pat = PATTERN (use_insn); + + /* In __asm don't replace if src might need more registers than + reg, as that could increase register pressure on the __asm. */ + use_vec = DF_INSN_USES (def_insn); + if (use_vec[0] && use_vec[1]) return false; + + update_df_init (def_insn, use_insn); + speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)); + asm_operands = NULL_RTX; + switch (GET_CODE (use_pat)) + { + case ASM_OPERANDS: + asm_operands = use_pat; + break; + case SET: + if (MEM_P (SET_DEST (use_pat))) + { + loc = &SET_DEST (use_pat); + new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p); + if (new_rtx) + validate_unshare_change (use_insn, loc, new_rtx, true); + } + asm_operands = SET_SRC (use_pat); + break; + case PARALLEL: + for (i = 0; i < XVECLEN (use_pat, 0); i++) + if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET) + { + if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i)))) + { + loc = &SET_DEST (XVECEXP (use_pat, 0, i)); + new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, + src, speed_p); + if (new_rtx) + validate_unshare_change (use_insn, loc, new_rtx, true); + } + asm_operands = SET_SRC (XVECEXP (use_pat, 0, i)); + } + else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS) + asm_operands = XVECEXP (use_pat, 0, i); + break; + default: + gcc_unreachable (); + } + + gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS); + for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++) + { + loc = &ASM_OPERANDS_INPUT (asm_operands, i); + new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p); + if (new_rtx) + validate_unshare_change (use_insn, loc, new_rtx, true); + } + + if (num_changes_pending () == 0 || !apply_change_group ()) + return false; + + update_df (use_insn, NULL); + num_changes++; + return true; } /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the result. */ static bool -forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) +forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set) { rtx use_insn = DF_REF_INSN (use); rtx use_set = single_set (use_insn); rtx src, reg, new_rtx, *loc; bool set_reg_equal; enum machine_mode mode; + int asm_use = -1; + + if (INSN_CODE (use_insn) < 0) + asm_use = asm_noperands (PATTERN (use_insn)); - if (!use_set) + if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn)) return false; /* Do not propagate into PC, CC0, etc. */ - if (GET_MODE (SET_DEST (use_set)) == VOIDmode) + if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) return false; /* If def and use are subreg, check if they match. */ reg = DF_REF_REG (use); - if (GET_CODE (reg) == SUBREG - && GET_CODE (SET_DEST (def_set)) == SUBREG - && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg) - || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))) - return false; - + if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG) + { + if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)) + return false; + } /* Check if the def had a subreg, but the use has the whole reg. */ - if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) + else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) return false; - /* Check if the use has a subreg, but the def had the whole reg. Unlike the previous case, the optimization is possible and often useful indeed. */ - if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) + else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) reg = SUBREG_REG (reg); + /* Make sure that we can treat REG as having the same mode as the + source of DEF_SET. */ + if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)) + return false; + /* Check if the substitution is valid (last, because it's the most expensive check!). */ src = SET_SRC (def_set); @@ -890,7 +1275,7 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) if (MEM_P (src) && MEM_READONLY_P (src)) { rtx x = avoid_constant_pool_reference (src); - if (x != src) + if (x != src && use_set) { rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set); @@ -901,6 +1286,9 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) return false; } + if (asm_use >= 0) + return forward_propagate_asm (use, def_insn, def_set, reg); + /* Else try simplifying. */ if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE) @@ -908,6 +1296,11 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) loc = &SET_DEST (use_set); set_reg_equal = false; } + else if (!use_set) + { + loc = &INSN_VAR_LOCATION_LOC (use_insn); + set_reg_equal = false; + } else { rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); @@ -917,10 +1310,11 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) loc = &SET_SRC (use_set); /* Do not replace an existing REG_EQUAL note if the insn is not - recognized. Either we're already replacing in the note, or - we'll separately try plugging the definition in the note and - simplifying. */ - set_reg_equal = (note == NULL_RTX); + recognized. Either we're already replacing in the note, or we'll + separately try plugging the definition in the note and simplifying. + And only install a REQ_EQUAL note when the destination is a REG, + as the note would be invalid otherwise. */ + set_reg_equal = (note == NULL_RTX && REG_P (SET_DEST (use_set))); } if (GET_MODE (*loc) == VOIDmode) @@ -928,7 +1322,8 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) else mode = GET_MODE (*loc); - new_rtx = propagate_rtx (*loc, mode, reg, src); + new_rtx = propagate_rtx (*loc, mode, reg, src, + optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn))); if (!new_rtx) return false; @@ -938,35 +1333,33 @@ forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) /* Given a use USE of an insn, if it has a single reaching - definition, try to forward propagate it into that insn. */ + definition, try to forward propagate it into that insn. + Return true if cfg cleanup will be needed. */ -static void -forward_propagate_into (struct df_ref *use) +static bool +forward_propagate_into (df_ref use) { - struct df_link *defs; - struct df_ref *def; + df_ref def; rtx def_insn, def_set, use_insn; rtx parent; if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) - return; + return false; if (DF_REF_IS_ARTIFICIAL (use)) - return; + return false; /* Only consider uses that have a single definition. */ - defs = DF_REF_CHAIN (use); - if (!defs || defs->next) - return; - - def = defs->ref; + def = get_def_for_use (use); + if (!def) + return false; if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE) - return; + return false; if (DF_REF_IS_ARTIFICIAL (def)) - return; + return false; /* Do not propagate loop invariant definitions inside the loop. */ if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father) - return; + return false; /* Check if the use is still present in the insn! */ use_insn = DF_REF_INSN (use); @@ -976,19 +1369,26 @@ forward_propagate_into (struct df_ref *use) parent = PATTERN (use_insn); if (!reg_mentioned_p (DF_REF_REG (use), parent)) - return; + return false; def_insn = DF_REF_INSN (def); if (multiple_sets (def_insn)) - return; + return false; def_set = single_set (def_insn); if (!def_set) - return; + return false; /* Only try one kind of propagation. If two are possible, we'll do it on the following iterations. */ - if (!forward_propagate_and_simplify (use, def_insn, def_set)) - forward_propagate_subreg (use, def_insn, def_set); + if (forward_propagate_and_simplify (use, def_insn, def_set) + || forward_propagate_subreg (use, def_insn, def_set)) + { + if (cfun->can_throw_non_call_exceptions + && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX) + && purge_dead_edges (DF_REF_BB (use))) + return true; + } + return false; } @@ -1004,13 +1404,13 @@ fwprop_init (void) insns (sadly) if we are not working in cfglayout mode. */ loop_optimizer_init (0); - /* Now set up the dataflow problem (we only want use-def chains) and - put the dataflow solver to work. */ - df_set_flags (DF_EQ_NOTES); - df_chain_add_problem (DF_UD_CHAIN); - df_analyze (); - df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); + build_single_def_use_links (); df_set_flags (DF_DEFER_INSN_RESCAN); + + active_defs = XNEWVEC (df_ref, max_reg_num ()); +#ifdef ENABLE_CHECKING + active_defs_check = sparseset_alloc (max_reg_num ()); +#endif } static void @@ -1018,6 +1418,12 @@ fwprop_done (void) { loop_optimizer_finalize (); + VEC_free (df_ref, heap, use_def_ref); + free (active_defs); +#ifdef ENABLE_CHECKING + sparseset_free (active_defs_check); +#endif + free_dominance_info (CDI_DOMINATORS); cleanup_cfg (0); delete_trivially_dead_insns (get_insns (), max_reg_num ()); @@ -1029,7 +1435,6 @@ fwprop_done (void) } - /* Main entry point. */ static bool @@ -1042,10 +1447,11 @@ static unsigned int fwprop (void) { unsigned i; + bool need_cleanup = false; fwprop_init (); - /* Go through all the uses. update_df will create new ones at the + /* Go through all the uses. df_uses_create will create new ones at the end, and we'll go through them as well. Do not forward propagate addresses into loops until after unrolling. @@ -1053,14 +1459,18 @@ fwprop (void) for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { - struct df_ref *use = DF_USES_GET (i); + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) == DF_REF_REG_USE - || DF_REF_BB (use)->loop_father == NULL) - forward_propagate_into (use); + || DF_REF_BB (use)->loop_father == NULL + /* The outer most loop is not really a loop. */ + || loop_outer (DF_REF_BB (use)->loop_father) == NULL) + need_cleanup |= forward_propagate_into (use); } fwprop_done (); + if (need_cleanup) + cleanup_cfg (0); return 0; } @@ -1079,8 +1489,9 @@ struct rtl_opt_pass pass_rtl_fwprop = 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func /* todo_flags_finish */ + TODO_df_finish + | TODO_verify_flow + | TODO_verify_rtl_sharing /* todo_flags_finish */ } }; @@ -1088,23 +1499,27 @@ static unsigned int fwprop_addr (void) { unsigned i; + bool need_cleanup = false; + fwprop_init (); - /* Go through all the uses. update_df will create new ones at the + /* Go through all the uses. df_uses_create will create new ones at the end, and we'll go through them as well. */ - df_set_flags (DF_DEFER_INSN_RESCAN); - for (i = 0; i < DF_USES_TABLE_SIZE (); i++) { - struct df_ref *use = DF_USES_GET (i); + df_ref use = DF_USES_GET (i); if (use) if (DF_REF_TYPE (use) != DF_REF_REG_USE - && DF_REF_BB (use)->loop_father != NULL) - forward_propagate_into (use); + && DF_REF_BB (use)->loop_father != NULL + /* The outer most loop is not really a loop. */ + && loop_outer (DF_REF_BB (use)->loop_father) != NULL) + need_cleanup |= forward_propagate_into (use); } fwprop_done (); + if (need_cleanup) + cleanup_cfg (0); return 0; } @@ -1123,7 +1538,6 @@ struct rtl_opt_pass pass_rtl_fwprop_addr = 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func /* todo_flags_finish */ + TODO_df_finish | TODO_verify_rtl_sharing /* todo_flags_finish */ } };