/* Global common subexpression elimination/Partial redundancy elimination
and global constant/copy propagation for GNU compiler.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
Free Software Foundation, Inc.
This file is part of GCC.
#include "toplev.h"
#include "rtl.h"
+#include "tree.h"
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "ggc.h"
#include "params.h"
#include "cselib.h"
-
+#include "intl.h"
#include "obstack.h"
/* Propagate flow information through back edges and thus enable PRE's
struct ls_expr * next; /* Next in the list. */
int invalid; /* Invalid for some reason. */
int index; /* If it maps to a bitmap index. */
- int hash_index; /* Index when in a hash table. */
+ unsigned int hash_index; /* Index when in a hash table. */
rtx reaching_reg; /* Register to use when re-writing. */
};
static void free_reg_set_mem (void);
static int get_bitmap_width (int, int, int);
static void record_one_set (int, rtx);
+static void replace_one_set (int, rtx, rtx);
static void record_set_info (rtx, rtx, void *);
static void compute_sets (rtx);
static void hash_scan_insn (rtx, struct hash_table *, int);
static void trim_ld_motion_mems (void);
static void update_ld_motion_stores (struct expr *);
static void reg_set_info (rtx, rtx, void *);
+static void reg_clear_last_set (rtx, rtx, void *);
static bool store_ops_ok (rtx, int *);
static rtx extract_mentioned_regs (rtx);
static rtx extract_mentioned_regs_helper (rtx, rtx);
static void build_store_vectors (void);
static void insert_insn_start_bb (rtx, basic_block);
static int insert_store (struct ls_expr *, edge);
-static void replace_store_insn (rtx, rtx, basic_block);
+static void remove_reachable_equiv_notes (basic_block, struct ls_expr *);
+static void replace_store_insn (rtx, rtx, basic_block, struct ls_expr *);
static void delete_store (struct ls_expr *, basic_block);
static void free_store_memory (void);
static void store_motion (void);
static bool do_local_cprop (rtx, rtx, int, rtx*);
static bool adjust_libcall_notes (rtx, rtx, rtx, rtx*);
static void local_cprop_pass (int);
+static bool is_too_expensive (const char *);
\f
+
/* Entry point for global common subexpression elimination.
F is the first instruction in the function. */
if (file)
dump_flow_info (file);
- /* Return if there's nothing to do. */
- if (n_basic_blocks <= 1)
+ /* Return if there's nothing to do, or it is too expensive. */
+ if (n_basic_blocks <= 1 || is_too_expensive (_("GCSE disabled")))
return 0;
-
- /* Trying to perform global optimizations on flow graphs which have
- a high connectivity will take a long time and is unlikely to be
- particularly useful.
-
- In normal circumstances a cfg should have about twice as many edges
- as blocks. But we do not want to punish small functions which have
- a couple switch statements. So we require a relatively large number
- of basic blocks and the ratio of edges to blocks to be high. */
- if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
- {
- if (warn_disabled_optimization)
- warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
- n_basic_blocks, n_edges / n_basic_blocks);
- return 0;
- }
-
- /* If allocating memory for the cprop bitmap would take up too much
- storage it's better just to disable the optimization. */
- if ((n_basic_blocks
- * SBITMAP_SET_SIZE (max_gcse_regno)
- * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
- {
- if (warn_disabled_optimization)
- warning ("GCSE disabled: %d basic blocks and %d registers",
- n_basic_blocks, max_gcse_regno);
-
- return 0;
- }
-
+
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
partial redundancy elimination. */
free_gcse_mem ();
- /* It does not make sense to run code hoisting unless we optimizing
+ /* It does not make sense to run code hoisting unless we are optimizing
for code size -- it rarely makes programs faster, and can make
them bigger if we did partial redundancy elimination (when optimizing
for space, we use a classic gcse algorithm instead of partial
if (file)
{
fprintf (file, "GCSE of %s: %d basic blocks, ",
- current_function_name, n_basic_blocks);
+ current_function_name (), n_basic_blocks);
fprintf (file, "%d pass%s, %d bytes\n\n",
pass, pass > 1 ? "es" : "", max_pass_bytes);
}
obstack_free (®_set_obstack, NULL);
}
+/* An OLD_INSN that used to set REGNO was replaced by NEW_INSN.
+ Update the corresponding `reg_set_table' entry accordingly.
+ We assume that NEW_INSN is not already recorded in reg_set_table[regno]. */
+
+static void
+replace_one_set (int regno, rtx old_insn, rtx new_insn)
+{
+ struct reg_set *reg_info;
+ if (regno >= reg_set_table_size)
+ return;
+ for (reg_info = reg_set_table[regno]; reg_info; reg_info = reg_info->next)
+ if (reg_info->insn == old_insn)
+ {
+ reg_info->insn = new_insn;
+ break;
+ }
+}
+
/* Record REGNO in the reg_set table. */
static void
MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
indicating if a volatile operand is found or if the expression contains
- something we don't want to insert in the table.
+ something we don't want to insert in the table. HASH_TABLE_SIZE is
+ the current size of the hash table to be probed.
??? One might want to merge this with canon_hash. Later. */
static unsigned int
-hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p, int hash_table_size)
+hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p,
+ int hash_table_size)
{
unsigned int hash;
due to it being set with the different alias set. */
if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
return 0;
+
+ /* A volatile mem should not be considered equivalent to any other. */
+ if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
+ return 0;
break;
/* For commutative operations, check both orders. */
/* A copy is not available if its src or dest is subsequently
modified. Here we want to search from INSN+1 on, but
oprs_available_p searches from INSN on. */
- && (insn == BLOCK_END (BLOCK_NUM (insn))
+ && (insn == BB_END (BLOCK_FOR_INSN (insn))
|| ((tmp = next_nonnote_insn (insn)) != NULL_RTX
&& oprs_available_p (pat, tmp))))
insert_set_in_table (pat, insn, table);
}
+ /* In case of store we want to consider the memory value as available in
+ the REG stored in that memory. This makes it possible to remove
+ redundant loads from due to stores to the same location. */
+ else if (flag_gcse_las && GET_CODE (src) == REG && GET_CODE (dest) == MEM)
+ {
+ unsigned int regno = REGNO (src);
+
+ /* Do not do this for constant/copy propagation. */
+ if (! table->set_p
+ /* Only record sets of pseudo-regs in the hash table. */
+ && regno >= FIRST_PSEUDO_REGISTER
+ /* Don't GCSE something if we can't do a reg/reg copy. */
+ && can_copy_p (GET_MODE (src))
+ /* GCSE commonly inserts instruction after the insn. We can't
+ do that easily for EH_REGION notes so disable GCSE on these
+ for now. */
+ && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX)
+ /* Is SET_DEST something we want to gcse? */
+ && want_to_gcse_p (dest)
+ /* Don't CSE a nop. */
+ && ! set_noop_p (pat)
+ /* Don't GCSE if it has attached REG_EQUIV note.
+ At this point this only function parameters should have
+ REG_EQUIV notes and if the argument slot is used somewhere
+ explicitly, it means address of parameter has been taken,
+ so we should not extend the lifetime of the pseudo. */
+ && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
+ || GET_CODE (XEXP (note, 0)) != MEM))
+ {
+ /* Stores are never anticipatable. */
+ int antic_p = 0;
+ /* An expression is not available if its operands are
+ subsequently modified, including this insn. It's also not
+ available if this is a branch, because we can't insert
+ a set after the branch. */
+ int avail_p = oprs_available_p (dest, insn)
+ && ! JUMP_P (insn);
+
+ /* Record the memory expression (DEST) in the hash table. */
+ insert_expr_in_table (dest, GET_MODE (dest), insn,
+ antic_p, avail_p, table);
+ }
+ }
}
static void
??? hard-reg reg_set_in_block computation
could be moved to compute_sets since they currently don't change. */
- for (insn = current_bb->head;
- insn && insn != NEXT_INSN (current_bb->end);
+ for (insn = BB_HEAD (current_bb);
+ insn && insn != NEXT_INSN (BB_END (current_bb));
insn = NEXT_INSN (insn))
{
if (! INSN_P (insn))
if (table->set_p
&& implicit_sets[current_bb->index] != NULL_RTX)
hash_scan_set (implicit_sets[current_bb->index],
- current_bb->head, table);
+ BB_HEAD (current_bb), table);
/* The next pass builds the hash table. */
- for (insn = current_bb->head, in_libcall_block = 0;
- insn && insn != NEXT_INSN (current_bb->end);
+ for (insn = BB_HEAD (current_bb), in_libcall_block = 0;
+ insn && insn != NEXT_INSN (BB_END (current_bb));
insn = NEXT_INSN (insn))
if (INSN_P (insn))
{
if (insn_computes_expr == NULL)
return 0;
expr_set = single_set (insn_computes_expr);
+ /* The set might be in a parallel with multiple sets; we could
+ probably handle that, but there's currently no easy way to find
+ the relevant sub-expression. */
if (!expr_set)
- abort ();
+ return 0;
found_setting = 0;
use_src = 0;
start of the block]. */
reset_opr_set_tables ();
- for (insn = bb->head;
- insn != NULL && insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn != NULL && insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
/* Is insn of form (set (pseudo-reg) ...)? */
{
fprintf (gcse_file, "\n");
fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
- current_function_name, pass, bytes_used, gcse_subst_count);
+ current_function_name (), pass, bytes_used, gcse_subst_count);
fprintf (gcse_file, "%d insns created\n", gcse_create_count);
}
start of the block]. */
reset_opr_set_tables ();
- for (insn = bb->head;
- insn != NULL && insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn != NULL && insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
if (INSN_P (insn))
{
/* Use canonicalize_condition to do the dirty work of manipulating
MODE_CC values and COMPARE rtx codes. */
- tmp = canonicalize_condition (jump, cond, reverse, &earliest, NULL_RTX);
+ tmp = canonicalize_condition (jump, cond, reverse, &earliest, NULL_RTX,
+ false);
if (!tmp)
return NULL_RTX;
tmp = XEXP (tmp, 0);
if (!REG_P (tmp) || GET_MODE_CLASS (GET_MODE (tmp)) != MODE_INT)
return NULL_RTX;
- tmp = canonicalize_condition (jump, cond, reverse, &earliest, tmp);
+ tmp = canonicalize_condition (jump, cond, reverse, &earliest, tmp,
+ false);
if (!tmp)
return NULL_RTX;
return tmp;
}
+/* Check the comparison COND to see if we can safely form an implicit set from
+ it. COND is either an EQ or NE comparison. */
+
+static bool
+implicit_set_cond_p (rtx cond)
+{
+ enum machine_mode mode = GET_MODE (XEXP (cond, 0));
+ rtx cst = XEXP (cond, 1);
+
+ /* We can't perform this optimization if either operand might be or might
+ contain a signed zero. */
+ if (HONOR_SIGNED_ZEROS (mode))
+ {
+ /* It is sufficient to check if CST is or contains a zero. We must
+ handle float, complex, and vector. If any subpart is a zero, then
+ the optimization can't be performed. */
+ /* ??? The complex and vector checks are not implemented yet. We just
+ always return zero for them. */
+ if (GET_CODE (cst) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE d;
+ REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
+ if (REAL_VALUES_EQUAL (d, dconst0))
+ return 0;
+ }
+ else
+ return 0;
+ }
+
+ return gcse_constant_p (cst);
+}
+
/* Find the implicit sets of a function. An "implicit set" is a constraint
on the value of a variable, implied by a conditional jump. For example,
following "if (x == 2)", the then branch may be optimized as though the
/* Check for more than one successor. */
if (bb->succ && bb->succ->succ_next)
{
- cond = fis_get_condition (bb->end);
+ cond = fis_get_condition (BB_END (bb));
if (cond
&& (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
&& GET_CODE (XEXP (cond, 0)) == REG
&& REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
- && gcse_constant_p (XEXP (cond, 1)))
+ && implicit_set_cond_p (cond))
{
dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
: FALLTHRU_EDGE (bb)->dest;
if (gcse_file)
{
fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
- current_function_name, pass, bytes_used);
+ current_function_name (), pass, bytes_used);
fprintf (gcse_file, "%d const props, %d copy props\n\n",
const_prop_count, copy_prop_count);
}
else
dest = NULL;
+ /* Avoid unification of the edge with other edges from original
+ branch. We would end up emitting the instruction on "both"
+ edges. */
+
+ if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc))))
+ {
+ edge e2;
+ for (e2 = e->src->succ; e2; e2 = e2->succ_next)
+ if (e2->dest == dest)
+ {
+ dest = NULL;
+ break;
+ }
+ }
+
old_dest = e->dest;
if (dest != NULL
&& dest != old_dest
if (bb->pred && bb->pred->pred_next)
{
setcc = NULL_RTX;
- for (insn = bb->head;
- insn != NULL && insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn != NULL && insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
if (GET_CODE (insn) == INSN)
{
static void
insert_insn_end_bb (struct expr *expr, basic_block bb, int pre)
{
- rtx insn = bb->end;
+ rtx insn = BB_END (bb);
rtx new_insn;
rtx reg = expr->reaching_reg;
int regno = REGNO (reg);
/* Since different machines initialize their parameter registers
in different orders, assume nothing. Collect the set of all
parameter registers. */
- insn = find_first_parameter_load (insn, bb->head);
+ insn = find_first_parameter_load (insn, BB_HEAD (bb));
/* If we found all the parameter loads, then we want to insert
before the first parameter load.
return did_insert;
}
-/* Copy the result of INSN to REG. INDX is the expression number. */
+/* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
+ Given "old_reg <- expr" (INSN), instead of adding after it
+ reaching_reg <- old_reg
+ it's better to do the following:
+ reaching_reg <- expr
+ old_reg <- reaching_reg
+ because this way copy propagation can discover additional PRE
+ opportunities. But if this fails, we try the old way.
+ When "expr" is a store, i.e.
+ given "MEM <- old_reg", instead of adding after it
+ reaching_reg <- old_reg
+ it's better to add it before as follows:
+ reaching_reg <- old_reg
+ MEM <- reaching_reg. */
static void
pre_insert_copy_insn (struct expr *expr, rtx insn)
rtx reg = expr->reaching_reg;
int regno = REGNO (reg);
int indx = expr->bitmap_index;
- rtx set = single_set (insn);
- rtx new_insn;
+ rtx pat = PATTERN (insn);
+ rtx set, new_insn;
+ rtx old_reg;
+ int i;
- if (!set)
+ /* This block matches the logic in hash_scan_insn. */
+ if (GET_CODE (pat) == SET)
+ set = pat;
+ else if (GET_CODE (pat) == PARALLEL)
+ {
+ /* Search through the parallel looking for the set whose
+ source was the expression that we're interested in. */
+ set = NULL_RTX;
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx x = XVECEXP (pat, 0, i);
+ if (GET_CODE (x) == SET
+ && expr_equiv_p (SET_SRC (x), expr->expr))
+ {
+ set = x;
+ break;
+ }
+ }
+ }
+ else
abort ();
- new_insn = emit_insn_after (gen_move_insn (reg, copy_rtx (SET_DEST (set))), insn);
+ if (GET_CODE (SET_DEST (set)) == REG)
+ {
+ old_reg = SET_DEST (set);
+ /* Check if we can modify the set destination in the original insn. */
+ if (validate_change (insn, &SET_DEST (set), reg, 0))
+ {
+ new_insn = gen_move_insn (old_reg, reg);
+ new_insn = emit_insn_after (new_insn, insn);
+
+ /* Keep register set table up to date. */
+ replace_one_set (REGNO (old_reg), insn, new_insn);
+ record_one_set (regno, insn);
+ }
+ else
+ {
+ new_insn = gen_move_insn (reg, old_reg);
+ new_insn = emit_insn_after (new_insn, insn);
+
+ /* Keep register set table up to date. */
+ record_one_set (regno, new_insn);
+ }
+ }
+ else /* This is possible only in case of a store to memory. */
+ {
+ old_reg = SET_SRC (set);
+ new_insn = gen_move_insn (reg, old_reg);
+
+ /* Check if we can modify the set source in the original insn. */
+ if (validate_change (insn, &SET_SRC (set), reg, 0))
+ new_insn = emit_insn_before (new_insn, insn);
+ else
+ new_insn = emit_insn_after (new_insn, insn);
- /* Keep register set table up to date. */
- record_one_set (regno, new_insn);
+ /* Keep register set table up to date. */
+ record_one_set (regno, new_insn);
+ }
gcse_create_count++;
"PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
BLOCK_NUM (insn), INSN_UID (new_insn), indx,
INSN_UID (insn), regno);
- update_ld_motion_stores (expr);
}
/* Copy available expressions that reach the redundant expression
static void
pre_insert_copies (void)
{
- unsigned int i;
+ unsigned int i, added_copy;
struct expr *expr;
struct occr *occr;
struct occr *avail;
expression wasn't deleted anywhere. */
if (expr->reaching_reg == NULL)
continue;
+
+ /* Set when we add a copy for that expression. */
+ added_copy = 0;
for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
{
BLOCK_FOR_INSN (occr->insn)))
continue;
+ added_copy = 1;
+
/* Copy the result of avail to reaching_reg. */
pre_insert_copy_insn (expr, insn);
avail->copied_p = 1;
}
}
+
+ if (added_copy)
+ update_ld_motion_stores (expr);
}
}
changed = 0;
for (i = 0; i < expr_hash_table.size; i++)
- for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
+ for (expr = expr_hash_table.table[i];
+ expr != NULL;
+ expr = expr->next_same_hash)
{
int indx = expr->bitmap_index;
rtx set;
basic_block bb = BLOCK_FOR_INSN (insn);
- if (TEST_BIT (pre_delete_map[bb->index], indx))
+ /* We only delete insns that have a single_set. */
+ if (TEST_BIT (pre_delete_map[bb->index], indx)
+ && (set = single_set (insn)) != 0)
{
- set = single_set (insn);
- if (! set)
- abort ();
-
/* Create a pseudo-reg to store the result of reaching
expressions into. Get the mode for the new pseudo from
the mode of the original destination pseudo. */
if (gcse_file)
{
fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
- current_function_name, pass, bytes_used);
+ current_function_name (), pass, bytes_used);
fprintf (gcse_file, "%d substs, %d insns created\n",
gcse_subst_count, gcse_create_count);
}
/* Note that flow inserted a nop a the end of basic blocks that
end in call instructions for reasons other than abnormal
control flow. */
- if (GET_CODE (bb->end) != CALL_INSN)
+ if (GET_CODE (BB_END (bb)) != CALL_INSN)
continue;
for (i = 0; i < expr_hash_table.size; i++)
/* Scan each insn in the basic block looking for memory references and
register sets. */
- stop_insn = NEXT_INSN (current_block->end);
- for (insn = current_block->head;
+ stop_insn = NEXT_INSN (BB_HEAD (current_block));
+ for (insn = BB_HEAD (current_block);
insn != stop_insn;
insn = NEXT_INSN (insn))
{
against zero. */
FOR_EACH_BB (bb)
{
- rtx last_insn = bb->end;
+ rtx last_insn = BB_END (bb);
rtx condition, earliest;
int compare_and_branch;
continue;
/* LAST_INSN is a conditional jump. Get its condition. */
- condition = get_condition (last_insn, &earliest);
+ condition = get_condition (last_insn, &earliest, false);
/* If we can't determine the condition then skip. */
if (! condition)
delete_insn (earliest);
purge_dead_edges (bb);
- /* Don't check this block again. (Note that BLOCK_END is
+ /* Don't check this block again. (Note that BB_END is
invalid here; we deleted the last instruction in the
block.) */
block_reg[bb->index] = 0;
basic_block bb;
int reg;
int regs_per_pass;
- int max_reg;
+ int max_reg = max_reg_num ();
struct null_pointer_info npi;
int something_changed = 0;
- /* If we have only a single block, then there's nothing to do. */
- if (n_basic_blocks <= 1)
- return 0;
-
- /* Trying to perform global optimizations on flow graphs which have
- a high connectivity will take a long time and is unlikely to be
- particularly useful.
-
- In normal circumstances a cfg should have about twice as many edges
- as blocks. But we do not want to punish small functions which have
- a couple switch statements. So we require a relatively large number
- of basic blocks and the ratio of edges to blocks to be high. */
- if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
+ /* If we have only a single block, or it is too expensive, give up. */
+ if (n_basic_blocks <= 1
+ || is_too_expensive (_ ("NULL pointer checks disabled")))
return 0;
/* We need four bitmaps, each with a bit for each register in each
basic block. */
- max_reg = max_reg_num ();
regs_per_pass = get_bitmap_width (4, last_basic_block, max_reg);
/* Allocate bitmaps to hold local and global properties. */
block_reg = xcalloc (last_basic_block, sizeof (int));
FOR_EACH_BB (bb)
{
- rtx last_insn = bb->end;
+ rtx last_insn = BB_END (bb);
rtx condition, earliest, reg;
/* We only want conditional branches. */
continue;
/* LAST_INSN is a conditional jump. Get its condition. */
- condition = get_condition (last_insn, &earliest);
+ condition = get_condition (last_insn, &earliest, false);
/* If we were unable to get the condition, or it is not an equality
comparison against zero then there's nothing we can do. */
/* Hoistable expressions. */
static sbitmap *hoist_exprs;
-/* Dominator bitmaps. */
-dominance_info dominators;
-
/* ??? We could compute post dominators and run this algorithm in
reverse to perform tail merging, doing so would probably be
more effective than the tail merging code in jump.c.
sbitmap_vector_free (hoist_exprs);
sbitmap_vector_free (transpout);
- free_dominance_info (dominators);
+ free_dominance_info (CDI_DOMINATORS);
}
/* Compute the very busy expressions at entry/exit from each block.
compute_local_properties (transp, comp, antloc, &expr_hash_table);
compute_transpout ();
compute_code_hoist_vbeinout ();
- dominators = calculate_dominance_info (CDI_DOMINATORS);
+ calculate_dominance_info (CDI_DOMINATORS);
if (gcse_file)
fprintf (gcse_file, "\n");
}
int found = 0;
int insn_inserted_p;
- domby_len = get_dominated_by (dominators, bb, &domby);
+ domby_len = get_dominated_by (CDI_DOMINATORS, bb, &domby);
/* Examine each expression that is very busy at the exit of this
block. These are the potentially hoistable expressions. */
for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
static struct ls_expr *
ldst_entry (rtx x)
{
+ int do_not_record_p = 0;
struct ls_expr * ptr;
+ unsigned int hash;
- for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
- if (expr_equiv_p (ptr->pattern, x))
- break;
+ hash = hash_expr_1 (x, GET_MODE (x), & do_not_record_p);
- if (!ptr)
- {
- ptr = xmalloc (sizeof (struct ls_expr));
+ for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
+ if (ptr->hash_index == hash && expr_equiv_p (ptr->pattern, x))
+ return ptr;
- ptr->next = pre_ldst_mems;
- ptr->expr = NULL;
- ptr->pattern = x;
- ptr->pattern_regs = NULL_RTX;
- ptr->loads = NULL_RTX;
- ptr->stores = NULL_RTX;
- ptr->reaching_reg = NULL_RTX;
- ptr->invalid = 0;
- ptr->index = 0;
- ptr->hash_index = 0;
- pre_ldst_mems = ptr;
- }
+ ptr = xmalloc (sizeof (struct ls_expr));
+
+ ptr->next = pre_ldst_mems;
+ ptr->expr = NULL;
+ ptr->pattern = x;
+ ptr->pattern_regs = NULL_RTX;
+ ptr->loads = NULL_RTX;
+ ptr->stores = NULL_RTX;
+ ptr->reaching_reg = NULL_RTX;
+ ptr->invalid = 0;
+ ptr->index = 0;
+ ptr->hash_index = hash;
+ pre_ldst_mems = ptr;
return ptr;
}
FOR_EACH_BB (bb)
{
- for (insn = bb->head;
- insn && insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn && insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
if (INSN_P (insn))
static void
trim_ld_motion_mems (void)
{
- struct ls_expr * last = NULL;
- struct ls_expr * ptr = first_ls_expr ();
+ struct ls_expr * * last = & pre_ldst_mems;
+ struct ls_expr * ptr = pre_ldst_mems;
while (ptr != NULL)
{
- int del = ptr->invalid;
- struct expr * expr = NULL;
+ struct expr * expr;
/* Delete if entry has been made invalid. */
- if (!del)
+ if (! ptr->invalid)
{
- unsigned int i;
-
- del = 1;
/* Delete if we cannot find this mem in the expression list. */
- for (i = 0; i < expr_hash_table.size && del; i++)
- {
- for (expr = expr_hash_table.table[i];
- expr != NULL;
- expr = expr->next_same_hash)
- if (expr_equiv_p (expr->expr, ptr->pattern))
- {
- del = 0;
- break;
- }
- }
- }
+ unsigned int hash = ptr->hash_index % expr_hash_table.size;
- if (del)
- {
- if (last != NULL)
- {
- last->next = ptr->next;
- free_ldst_entry (ptr);
- ptr = last->next;
- }
- else
- {
- pre_ldst_mems = pre_ldst_mems->next;
- free_ldst_entry (ptr);
- ptr = pre_ldst_mems;
- }
+ for (expr = expr_hash_table.table[hash];
+ expr != NULL;
+ expr = expr->next_same_hash)
+ if (expr_equiv_p (expr->expr, ptr->pattern))
+ break;
}
else
+ expr = (struct expr *) 0;
+
+ if (expr)
{
/* Set the expression field if we are keeping it. */
- last = ptr;
ptr->expr = expr;
+ last = & ptr->next;
ptr = ptr->next;
}
+ else
+ {
+ *last = ptr->next;
+ free_ldst_entry (ptr);
+ ptr = * last;
+ }
}
/* Show the world what we've found. */
/* Global holding the number of store expressions we are dealing with. */
static int num_stores;
-/* Checks to set if we need to mark a register set. Called from note_stores. */
+/* Checks to set if we need to mark a register set. Called from
+ note_stores. */
static void
reg_set_info (rtx dest, rtx setter ATTRIBUTE_UNUSED,
- void *data ATTRIBUTE_UNUSED)
+ void *data)
{
+ sbitmap bb_reg = data;
+
if (GET_CODE (dest) == SUBREG)
dest = SUBREG_REG (dest);
if (GET_CODE (dest) == REG)
- regvec[REGNO (dest)] = INSN_UID (compute_store_table_current_insn);
+ {
+ regvec[REGNO (dest)] = INSN_UID (compute_store_table_current_insn);
+ if (bb_reg)
+ SET_BIT (bb_reg, REGNO (dest));
+ }
+}
+
+/* Clear any mark that says that this insn sets dest. Called from
+ note_stores. */
+
+static void
+reg_clear_last_set (rtx dest, rtx setter ATTRIBUTE_UNUSED,
+ void *data)
+{
+ int *dead_vec = data;
+
+ if (GET_CODE (dest) == SUBREG)
+ dest = SUBREG_REG (dest);
+
+ if (GET_CODE (dest) == REG &&
+ dead_vec[REGNO (dest)] == INSN_UID (compute_store_table_current_insn))
+ dead_vec[REGNO (dest)] = 0;
}
/* Return zero if some of the registers in list X are killed
failed last time. */
if (LAST_AVAIL_CHECK_FAILURE (ptr))
{
- for (tmp = bb->end;
+ for (tmp = BB_END (bb);
tmp != insn && tmp != LAST_AVAIL_CHECK_FAILURE (ptr);
tmp = PREV_INSN (tmp))
continue;
max_gcse_regno);
sbitmap_vector_zero (reg_set_in_block, last_basic_block);
pre_ldst_mems = 0;
- last_set_in = xmalloc (sizeof (int) * max_gcse_regno);
+ last_set_in = xcalloc (max_gcse_regno, sizeof (int));
already_set = xmalloc (sizeof (int) * max_gcse_regno);
/* Find all the stores we care about. */
FOR_EACH_BB (bb)
{
/* First compute the registers set in this block. */
- memset (last_set_in, 0, sizeof (int) * max_gcse_regno);
regvec = last_set_in;
- for (insn = bb->head;
- insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
if (! INSN_P (insn))
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (clobbers_all
|| TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
- last_set_in[regno] = INSN_UID (insn);
+ {
+ last_set_in[regno] = INSN_UID (insn);
+ SET_BIT (reg_set_in_block[bb->index], regno);
+ }
}
pat = PATTERN (insn);
compute_store_table_current_insn = insn;
- note_stores (pat, reg_set_info, NULL);
+ note_stores (pat, reg_set_info, reg_set_in_block[bb->index]);
}
- /* Record the set registers. */
- for (regno = 0; regno < max_gcse_regno; regno++)
- if (last_set_in[regno])
- SET_BIT (reg_set_in_block[bb->index], regno);
-
/* Now find the stores. */
memset (already_set, 0, sizeof (int) * max_gcse_regno);
regvec = already_set;
- for (insn = bb->head;
- insn != NEXT_INSN (bb->end);
+ for (insn = BB_HEAD (bb);
+ insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
if (! INSN_P (insn))
find_moveable_store (insn, already_set, last_set_in);
/* Unmark regs that are no longer set. */
- for (regno = 0; regno < max_gcse_regno; regno++)
- if (last_set_in[regno] == INSN_UID (insn))
- last_set_in[regno] = 0;
+ compute_store_table_current_insn = insn;
+ note_stores (pat, reg_clear_last_set, last_set_in);
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ bool clobbers_all = false;
+#ifdef NON_SAVING_SETJMP
+ if (NON_SAVING_SETJMP
+ && find_reg_note (insn, REG_SETJMP, NULL_RTX))
+ clobbers_all = true;
+#endif
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if ((clobbers_all
+ || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+ && last_set_in[regno] == INSN_UID (insn))
+ last_set_in[regno] = 0;
+ }
}
+#ifdef ENABLE_CHECKING
+ /* last_set_in should now be all-zero. */
+ for (regno = 0; regno < max_gcse_regno; regno++)
+ if (last_set_in[regno] != 0)
+ abort ();
+#endif
+
/* Clear temporary marks. */
for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
{
static bool
store_killed_in_insn (rtx x, rtx x_regs, rtx insn, int after)
{
- rtx reg, base;
+ rtx reg, base, note;
if (!INSN_P (insn))
return false;
return true;
}
}
- return find_loads (SET_SRC (pat), x, after);
+ if (find_loads (SET_SRC (pat), x, after))
+ return true;
}
- else
- return find_loads (PATTERN (insn), x, after);
+ else if (find_loads (PATTERN (insn), x, after))
+ return true;
+
+ /* If this insn has a REG_EQUAL or REG_EQUIV note referencing a memory
+ location aliased with X, then this insn kills X. */
+ note = find_reg_equal_equiv_note (insn);
+ if (! note)
+ return false;
+ note = XEXP (note, 0);
+
+ /* However, if the note represents a must alias rather than a may
+ alias relationship, then it does not kill X. */
+ if (expr_equiv_p (note, x))
+ return false;
+
+ /* See if there are any aliased loads in the note. */
+ return find_loads (note, x, after);
}
/* Returns true if the expression X is loaded or clobbered on or after INSN
store_killed_after (rtx x, rtx x_regs, rtx insn, basic_block bb,
int *regs_set_after, rtx *fail_insn)
{
- rtx last = bb->end, act;
+ rtx last = BB_END (bb), act;
if (!store_ops_ok (x_regs, regs_set_after))
{
store_killed_before (rtx x, rtx x_regs, rtx insn, basic_block bb,
int *regs_set_before)
{
- rtx first = bb->head;
+ rtx first = BB_HEAD (bb);
if (!store_ops_ok (x_regs, regs_set_before))
return true;
rtx r = gen_reg_rtx (GET_MODE (ptr->pattern));
if (gcse_file)
fprintf (gcse_file, "Removing redundant store:\n");
- replace_store_insn (r, XEXP (st, 0), bb);
+ replace_store_insn (r, XEXP (st, 0), bb, ptr);
continue;
}
SET_BIT (ae_gen[bb->index], ptr->index);
for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
{
- if (store_killed_after (ptr->pattern, ptr->pattern_regs, bb->head,
+ if (store_killed_after (ptr->pattern, ptr->pattern_regs, BB_HEAD (bb),
bb, regs_set_in_block, NULL))
{
/* It should not be necessary to consider the expression
}
/* Insert an instruction at the beginning of a basic block, and update
- the BLOCK_HEAD if needed. */
+ the BB_HEAD if needed. */
static void
insert_insn_start_bb (rtx insn, basic_block bb)
{
/* Insert at start of successor block. */
- rtx prev = PREV_INSN (bb->head);
- rtx before = bb->head;
+ rtx prev = PREV_INSN (BB_HEAD (bb));
+ rtx before = BB_HEAD (bb);
while (before != 0)
{
if (GET_CODE (before) != CODE_LABEL
|| NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK))
break;
prev = before;
- if (prev == bb->end)
+ if (prev == BB_END (bb))
break;
before = NEXT_INSN (before);
}
return 1;
}
+/* Remove any REG_EQUAL or REG_EQUIV notes containing a reference to the
+ memory location in SMEXPR set in basic block BB.
+
+ This could be rather expensive. */
+
+static void
+remove_reachable_equiv_notes (basic_block bb, struct ls_expr *smexpr)
+{
+ edge *stack = xmalloc (sizeof (edge) * n_basic_blocks), act;
+ sbitmap visited = sbitmap_alloc (last_basic_block);
+ int stack_top = 0;
+ rtx last, insn, note;
+ rtx mem = smexpr->pattern;
+
+ sbitmap_zero (visited);
+ act = bb->succ;
+
+ while (1)
+ {
+ if (!act)
+ {
+ if (!stack_top)
+ {
+ free (stack);
+ sbitmap_free (visited);
+ return;
+ }
+ act = stack[--stack_top];
+ }
+ bb = act->dest;
+
+ if (bb == EXIT_BLOCK_PTR
+ || TEST_BIT (visited, bb->index)
+ || TEST_BIT (ae_kill[bb->index], smexpr->index))
+ {
+ act = act->succ_next;
+ continue;
+ }
+ SET_BIT (visited, bb->index);
+
+ if (TEST_BIT (st_antloc[bb->index], smexpr->index))
+ {
+ for (last = ANTIC_STORE_LIST (smexpr);
+ BLOCK_FOR_INSN (XEXP (last, 0)) != bb;
+ last = XEXP (last, 1))
+ continue;
+ last = XEXP (last, 0);
+ }
+ else
+ last = NEXT_INSN (BB_END (bb));
+
+ for (insn = BB_HEAD (bb); insn != last; insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ {
+ note = find_reg_equal_equiv_note (insn);
+ if (!note || !expr_equiv_p (XEXP (note, 0), mem))
+ continue;
+
+ if (gcse_file)
+ fprintf (gcse_file, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
+ INSN_UID (insn));
+ remove_note (insn, note);
+ }
+ act = act->succ_next;
+ if (bb->succ)
+ {
+ if (act)
+ stack[stack_top++] = act;
+ act = bb->succ;
+ }
+ }
+}
+
/* This routine will replace a store with a SET to a specified register. */
static void
-replace_store_insn (rtx reg, rtx del, basic_block bb)
+replace_store_insn (rtx reg, rtx del, basic_block bb, struct ls_expr *smexpr)
{
- rtx insn;
+ rtx insn, mem, note, set, ptr;
+ mem = smexpr->pattern;
insn = gen_move_insn (reg, SET_SRC (single_set (del)));
insn = emit_insn_after (insn, del);
fprintf (gcse_file, "\n");
}
+ for (ptr = ANTIC_STORE_LIST (smexpr); ptr; ptr = XEXP (ptr, 1))
+ if (XEXP (ptr, 0) == del)
+ {
+ XEXP (ptr, 0) = insn;
+ break;
+ }
delete_insn (del);
+
+ /* Now we must handle REG_EQUAL notes whose contents is equal to the mem;
+ they are no longer accurate provided that they are reached by this
+ definition, so drop them. */
+ for (; insn != NEXT_INSN (BB_END (bb)); insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ {
+ set = single_set (insn);
+ if (!set)
+ continue;
+ if (expr_equiv_p (SET_DEST (set), mem))
+ return;
+ note = find_reg_equal_equiv_note (insn);
+ if (!note || !expr_equiv_p (XEXP (note, 0), mem))
+ continue;
+
+ if (gcse_file)
+ fprintf (gcse_file, "STORE_MOTION drop REG_EQUAL note at insn %d:\n",
+ INSN_UID (insn));
+ remove_note (insn, note);
+ }
+ remove_reachable_equiv_notes (bb, smexpr);
}
{
/* We know there is only one since we deleted redundant
ones during the available computation. */
- replace_store_insn (reg, del, bb);
+ replace_store_insn (reg, del, bb, expr);
break;
}
}
if (file)
dump_flow_info (file);
- /* Return if there's nothing to do. */
- if (n_basic_blocks <= 1)
+ /* Return if there's nothing to do, or it is too expensive. */
+ if (n_basic_blocks <= 1 || is_too_expensive (_ ("jump bypassing disabled")))
return 0;
- /* Trying to perform global optimizations on flow graphs which have
- a high connectivity will take a long time and is unlikely to be
- particularly useful.
-
- In normal circumstances a cfg should have about twice as many edges
- as blocks. But we do not want to punish small functions which have
- a couple switch statements. So we require a relatively large number
- of basic blocks and the ratio of edges to blocks to be high. */
- if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
- {
- if (warn_disabled_optimization)
- warning ("BYPASS disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
- n_basic_blocks, n_edges / n_basic_blocks);
- return 0;
- }
-
- /* If allocating memory for the cprop bitmap would take up too much
- storage it's better just to disable the optimization. */
- if ((n_basic_blocks
- * SBITMAP_SET_SIZE (max_gcse_regno)
- * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
- {
- if (warn_disabled_optimization)
- warning ("GCSE disabled: %d basic blocks and %d registers",
- n_basic_blocks, max_gcse_regno);
-
- return 0;
- }
-
gcc_obstack_init (&gcse_obstack);
bytes_used = 0;
if (file)
{
fprintf (file, "BYPASS of %s: %d basic blocks, ",
- current_function_name, n_basic_blocks);
+ current_function_name (), n_basic_blocks);
fprintf (file, "%d bytes\n\n", bytes_used);
}
return changed;
}
+/* Return true if the graph is too expensive to optimize. PASS is the
+ optimization about to be performed. */
+
+static bool
+is_too_expensive (const char *pass)
+{
+ /* Trying to perform global optimizations on flow graphs which have
+ a high connectivity will take a long time and is unlikely to be
+ particularly useful.
+
+ In normal circumstances a cfg should have about twice as many
+ edges as blocks. But we do not want to punish small functions
+ which have a couple switch statements. Rather than simply
+ threshold the number of blocks, uses something with a more
+ graceful degradation. */
+ if (n_edges > 20000 + n_basic_blocks * 4)
+ {
+ if (warn_disabled_optimization)
+ warning ("%s: %d basic blocks and %d edges/basic block",
+ pass, n_basic_blocks, n_edges / n_basic_blocks);
+
+ return true;
+ }
+
+ /* If allocating memory for the cprop bitmap would take up too much
+ storage it's better just to disable the optimization. */
+ if ((n_basic_blocks
+ * SBITMAP_SET_SIZE (max_reg_num ())
+ * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
+ {
+ if (warn_disabled_optimization)
+ warning ("%s: %d basic blocks and %d registers",
+ pass, n_basic_blocks, max_reg_num ());
+
+ return true;
+ }
+
+ return false;
+}
+
#include "gt-gcse.h"