/* Thread edges through blocks and update the control flow and SSA graphs.
- Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
This file is part of GCC.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
#include "config.h"
#include "system.h"
#include "ggc.h"
#include "basic-block.h"
#include "output.h"
-#include "errors.h"
#include "expr.h"
#include "function.h"
#include "diagnostic.h"
/* Main data structure to hold information for duplicates of BB. */
static htab_t redirection_data;
-bool rediscover_loops_after_threading;
-
/* Data structure of information to pass to hash table traversal routines. */
struct local_info
{
bool jumps_threaded;
};
+/* Passes which use the jump threading code register jump threading
+ opportunities as they are discovered. We keep the registered
+ jump threading opportunities in this vector as edge pairs
+ (original_edge, target_edge). */
+DEF_VEC_ALLOC_P(edge,heap);
+static VEC(edge,heap) *threaded_edges;
+
+
+/* Jump threading statistics. */
+
+struct thread_stats_d
+{
+ unsigned long num_threaded_edges;
+};
+
+struct thread_stats_d thread_stats;
+
+
/* Remove the last statement in block BB if it is a control statement
Also remove all outgoing edges except the edge which reaches DEST_BB.
If DEST_BB is NULL, then remove all outgoing edges. */
&& (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR
|| TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR
|| TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR))
- bsi_remove (&bsi);
+ bsi_remove (&bsi, true);
for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
{
{
/* We can use the generic block duplication code and simply remove
the stuff we do not need. */
- rd->dup_block = duplicate_block (bb, NULL);
+ rd->dup_block = duplicate_block (bb, NULL, NULL);
/* Zero out the profile, since the block is unreachable for now. */
rd->dup_block->frequency = 0;
/* Build a hash table element so we can see if E is already
in the table. */
- elt = xmalloc (sizeof (struct redirection_data));
+ elt = XNEW (struct redirection_data);
elt->outgoing_edge = e;
elt->dup_block = NULL;
elt->do_not_duplicate = false;
if (*slot == NULL)
{
*slot = (void *)elt;
- elt->incoming_edges = xmalloc (sizeof (struct el));
+ elt->incoming_edges = XNEW (struct el);
elt->incoming_edges->e = incoming_edge;
elt->incoming_edges->next = NULL;
return elt;
to the list of incoming edges associated with E. */
if (insert)
{
- struct el *el = xmalloc (sizeof (struct el));
+ struct el *el = XNEW (struct el);
el->next = elt->incoming_edges;
el->e = incoming_edge;
elt->incoming_edges = el;
edge e = make_edge (rd->dup_block, rd->outgoing_edge->dest, EDGE_FALLTHRU);
tree phi;
+ e->probability = REG_BR_PROB_BASE;
+ e->count = rd->dup_block->count;
+
/* If there are any PHI nodes at the destination of the outgoing edge
from the duplicate block, then we will need to add a new argument
to them. The argument should have the same value as the argument
for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
{
int indx = rd->outgoing_edge->dest_idx;
- add_phi_arg (phi, PHI_ARG_DEF_TREE (phi, indx), e);
+ add_phi_arg (phi, PHI_ARG_DEF (phi, indx), e);
}
}
to clear it will cause all kinds of unpleasant problems later. */
e->aux = NULL;
+ thread_stats.num_threaded_edges++;
+
if (rd->dup_block)
{
edge e2;
fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
e->src->index, e->dest->index, rd->dup_block->index);
+ rd->dup_block->count += e->count;
+ rd->dup_block->frequency += EDGE_FREQUENCY (e);
+ EDGE_SUCC (rd->dup_block, 0)->count += e->count;
/* Redirect the incoming edge to the appropriate duplicate
block. */
e2 = redirect_edge_and_branch (e, rd->dup_block);
return 1;
}
+/* Return true if this block has no executable statements other than
+ a simple ctrl flow instruction. When the number of outgoing edges
+ is one, this is equivalent to a "forwarder" block. */
+
+static bool
+redirection_block_p (basic_block bb)
+{
+ block_stmt_iterator bsi;
+
+ /* Advance to the first executable statement. */
+ bsi = bsi_start (bb);
+ while (!bsi_end_p (bsi)
+ && (TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR
+ || IS_EMPTY_STMT (bsi_stmt (bsi))))
+ bsi_next (&bsi);
+
+ /* Check if this is an empty block. */
+ if (bsi_end_p (bsi))
+ return true;
+
+ /* Test that we've reached the terminating control statement. */
+ return bsi_stmt (bsi)
+ && (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR
+ || TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR
+ || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR);
+}
+
/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
is reached via one or more specific incoming edges, we know which
outgoing edge from BB will be traversed.
the appropriate duplicate of BB.
BB and its duplicates will have assignments to the same set of
- SSA_NAMEs. Right now, we just call into rewrite_ssa_into_ssa
- to update the SSA graph for those names.
+ SSA_NAMEs. Right now, we just call into update_ssa to update the
+ SSA graph for those names.
We are also going to experiment with a true incremental update
scheme for the duplicated resources. One of the interesting
be threaded to a duplicate of BB. */
bool all = true;
+ /* If optimizing for size, only thread this block if we don't have
+ to duplicate it or it's an otherwise empty redirection block. */
+ if (optimize_size
+ && EDGE_COUNT (bb->preds) > 1
+ && !redirection_block_p (bb))
+ {
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ e->aux = NULL;
+ return false;
+ }
+
/* To avoid scanning a linear array for the element we need we instead
use a hash table. For normal code there should be no noticeable
difference. However, if we have a block with a large number of
else
{
edge e2 = e->aux;
-
- /* If we thread to a loop exit edge, then we will need to
- rediscover the loop exit edges. While it may seem that
- the new edge is a loop exit edge, that is not the case.
- Consider threading the edge (5,6) to E in the CFG on the
- left which creates the CFG on the right:
-
-
- 0<--+ 0<---+
- / \ | / \ |
- 1 2 | 1 2 |
- / \ | | / \ | |
- 3 4 | | 3 4 6--+
- \ / | | \ /
- 5 | | 5
- \ / | |
- 6---+ E
- |
- E
-
- After threading, the edge (0, 1) is the loop exit edge and
- the nodes 0, 2, 6 are the only nodes in the loop. */
- if (e2->flags & EDGE_LOOP_EXIT)
- rediscover_loops_after_threading = true;
+ update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
+ e->count, e->aux);
/* Insert the outgoing edge into the hash table if it is not
already in the hash table. */
return local_info.jumps_threaded;
}
-/* Walk through all blocks and thread incoming edges to the block's
- destinations as requested. This is the only entry point into this
- file.
+/* Walk through the registered jump threads and convert them into a
+ form convenient for this pass.
- Blocks which have one or more incoming edges have INCOMING_EDGE_THREADED
- set in the block's annotation.
+ Any block which has incoming edges threaded to outgoing edges
+ will have its entry in THREADED_BLOCK set.
- Each edge that should be threaded has the new destination edge stored in
- the original edge's AUX field.
+ Any threaded edge will have its new outgoing edge stored in the
+ original edge's AUX field.
- This routine (or one of its callees) will clear INCOMING_EDGE_THREADED
- in the block annotations and the AUX field in the edges.
+ This form avoids the need to walk all the edges in the CFG to
+ discover blocks which need processing and avoids unnecessary
+ hash table lookups to map from threaded edge to new target. */
+
+static void
+mark_threaded_blocks (bitmap threaded_blocks)
+{
+ unsigned int i;
+
+ for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
+ {
+ edge e = VEC_index (edge, threaded_edges, i);
+ edge e2 = VEC_index (edge, threaded_edges, i + 1);
+
+ e->aux = e2;
+ bitmap_set_bit (threaded_blocks, e->dest->index);
+ }
+}
+
+
+/* Walk through all blocks and thread incoming edges to the appropriate
+ outgoing edge for each edge pair recorded in THREADED_EDGES.
It is the caller's responsibility to fix the dominance information
and rewrite duplicated SSA_NAMEs back into SSA form.
bool
thread_through_all_blocks (void)
{
- basic_block bb;
bool retval = false;
+ unsigned int i;
+ bitmap_iterator bi;
+ bitmap threaded_blocks;
+
+ if (threaded_edges == NULL)
+ return false;
- rediscover_loops_after_threading = false;
+ threaded_blocks = BITMAP_ALLOC (NULL);
+ memset (&thread_stats, 0, sizeof (thread_stats));
- FOR_EACH_BB (bb)
+ mark_threaded_blocks (threaded_blocks);
+
+ EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
{
- if (bb_ann (bb)->incoming_edge_threaded)
- {
- retval |= thread_block (bb);
- bb_ann (bb)->incoming_edge_threaded = false;
-
- }
+ basic_block bb = BASIC_BLOCK (i);
+
+ if (EDGE_COUNT (bb->preds) > 0)
+ retval |= thread_block (bb);
}
+ if (dump_file && (dump_flags & TDF_STATS))
+ fprintf (dump_file, "\nJumps threaded: %lu\n",
+ thread_stats.num_threaded_edges);
+
+ BITMAP_FREE (threaded_blocks);
+ threaded_blocks = NULL;
+ VEC_free (edge, heap, threaded_edges);
+ threaded_edges = NULL;
return retval;
}
+
+/* Register a jump threading opportunity. We queue up all the jump
+ threading opportunities discovered by a pass and update the CFG
+ and SSA form all at once.
+
+ E is the edge we can thread, E2 is the new target edge. ie, we
+ are effectively recording that E->dest can be changed to E2->dest
+ after fixing the SSA graph. */
+
+void
+register_jump_thread (edge e, edge e2)
+{
+ if (threaded_edges == NULL)
+ threaded_edges = VEC_alloc (edge, heap, 10);
+
+ VEC_safe_push (edge, heap, threaded_edges, e);
+ VEC_safe_push (edge, heap, threaded_edges, e2);
+}