Steven Muchnick
Morgan Kaufmann, 1997
+ Building an Optimizing Compiler
+ Robert Morgan
+ Digital Press, 1998
+
People wishing to speed up the code here should read:
Elimination Algorithms for Data Flow Analysis
B.G. Ryder, M.C. Paull
static int run_jump_opt_after_gcse;
-/* Element I is a list of I's predecessors/successors. */
-static int_list_ptr *s_preds;
-static int_list_ptr *s_succs;
-
-/* Element I is the number of predecessors/successors of basic block I. */
-static int *num_preds;
-static int *num_succs;
-
/* Bitmaps are normally not included in debugging dumps.
However it's useful to be able to print them from GDB.
We could create special functions for this, but it's simpler to
static int pre_expr_reaches_here_p PROTO ((int, struct expr *,
int, int, char *));
static void insert_insn_end_bb PROTO ((struct expr *, int, int));
-static void pre_insert PROTO ((struct expr **));
static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
static void pre_insert_copies PROTO ((void));
static int pre_delete PROTO ((void));
static void free_avail_expr_mem PROTO ((void));
static void compute_ae_gen PROTO ((void));
static int expr_killed_p PROTO ((rtx, int));
-static void compute_ae_kill PROTO ((void));
-static void compute_available PROTO ((void));
+static void compute_ae_kill PROTO ((sbitmap *, sbitmap *));
static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
int, int, char *));
static rtx computing_insn PROTO ((struct expr *, rtx));
max_gcse_regno = max_reg_num ();
find_basic_blocks (f, max_gcse_regno, file, 1);
+ if (file)
+ dump_flow_info (file);
+
/* Return if there's nothing to do. */
if (n_basic_blocks <= 1)
{
}
gcc_obstack_init (&gcse_obstack);
-
- /* Allocate and compute predecessors/successors. */
-
- s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
- s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
- num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
- num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
- bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
- compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
-
- if (file)
- dump_bb_data (file, s_preds, s_succs, 0);
+ bytes_used = 0;
/* Record where pseudo-registers are set.
This data is kept accurate during each pass.
if (optimize_size)
changed |= one_classic_gcse_pass (pass + 1);
else
- changed |= one_pre_gcse_pass (pass + 1);
+ {
+ changed |= one_pre_gcse_pass (pass + 1);
+ free_reg_set_mem ();
+ alloc_reg_set_mem (max_reg_num ());
+ compute_sets (f);
+ run_jump_opt_after_gcse = 1;
+ }
if (max_pass_bytes < bytes_used)
max_pass_bytes = bytes_used;
/* Compute the set of available expressions killed in each basic block. */
static void
-compute_ae_kill ()
+compute_ae_kill (ae_gen, ae_kill)
+ sbitmap *ae_gen, *ae_kill;
{
int bb,i;
}
}
}
-
-/* Compute available expressions.
-
- Implement the algorithm to find available expressions
- as given in the Aho Sethi Ullman book, pages 627-631. */
-
-static void
-compute_available ()
-{
- int bb, changed, passes;
-
- sbitmap_zero (ae_in[0]);
-
- sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
-
- for (bb = 1; bb < n_basic_blocks; bb++)
- sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
-
- passes = 0;
- changed = 1;
- while (changed)
- {
- changed = 0;
- for (bb = 1; bb < n_basic_blocks; bb++)
- {
- sbitmap_intersection_of_preds (ae_in[bb], ae_out, bb);
- changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
- ae_in[bb], ae_kill[bb]);
- }
- passes++;
- }
-
- if (gcse_file)
- fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
-}
\f
/* Actually perform the Classic GCSE optimizations. */
expr_hash_table_size, n_exprs);
if (n_exprs > 0)
{
+ int passes;
compute_kill_rd ();
compute_rd ();
alloc_avail_expr_mem (n_basic_blocks, n_exprs);
compute_ae_gen ();
- compute_ae_kill ();
- compute_available ();
+ compute_ae_kill (ae_gen, ae_kill);
+ passes = compute_available (ae_gen, ae_kill, ae_out, ae_in);
+ if (gcse_file)
+ fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
changed = classic_gcse ();
free_avail_expr_mem ();
}
/* Nonzero for expressions which are redundant in a particular block. */
static sbitmap *pre_redundant;
+/* Nonzero for expressions which should be inserted on a specific edge. */
+static sbitmap *pre_insert_map;
+
+/* Nonzero for expressions which should be deleted in a specific block. */
+static sbitmap *pre_delete_map;
+
+/* Contains the edge_list returned by pre_edge_lcm. */
+static struct edge_list *edge_list;
+
static sbitmap *temp_bitmap;
/* Redundant insns. */
antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
- pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs);
- pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs);
+ pre_optimal = NULL;
+ pre_redundant = NULL;
+ pre_insert_map = NULL;
+ pre_delete_map = NULL;
+ ae_in = NULL;
+ ae_out = NULL;
+ u_bitmap = NULL;
transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
+ ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
+ /* pre_insert and pre_delete are allocated later. */
}
/* Free vars used for PRE analysis. */
free (comp);
free (antloc);
- free (temp_bitmap);
- free (pre_optimal);
- free (pre_redundant);
- free (transpout);
+ if (pre_optimal)
+ free (pre_optimal);
+ if (pre_redundant)
+ free (pre_redundant);
+ if (pre_insert_map)
+ free (pre_insert_map);
+ if (pre_delete_map)
+ free (pre_delete_map);
+ if (transpout)
+ free (transpout);
+
+ if (ae_in)
+ free (ae_in);
+ if (ae_out)
+ free (ae_out);
+ if (ae_kill)
+ free (ae_kill);
+ if (u_bitmap)
+ free (u_bitmap);
+
+ transp = comp = antloc = NULL;
+ pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
+ transpout = ae_in = ae_out = ae_kill = NULL;
+ u_bitmap = NULL;
+
}
/* Top level routine to do the dataflow analysis needed by PRE. */
{
compute_local_properties (transp, comp, antloc, 0);
compute_transpout ();
- pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp,
- antloc, pre_redundant, pre_optimal);
+ sbitmap_vector_zero (ae_kill, n_basic_blocks);
+ compute_ae_kill (comp, ae_kill);
+ edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
+ ae_kill, &pre_insert_map, &pre_delete_map);
}
\f
return 0;
}
\f
+
+/* Given an expr, generate RTL which we can insert at the end of a BB,
+ or on an edge. Set the block number of any insns generated to
+ the value of BB. */
+
+static rtx
+process_insert_insn (expr)
+ struct expr *expr;
+{
+ rtx reg = expr->reaching_reg;
+ rtx pat, copied_expr;
+ rtx first_new_insn;
+
+ start_sequence ();
+ copied_expr = copy_rtx (expr->expr);
+ emit_move_insn (reg, copied_expr);
+ first_new_insn = get_insns ();
+ pat = gen_sequence ();
+ end_sequence ();
+
+ return pat;
+}
+
/* Add EXPR to the end of basic block BB.
This is used by both the PRE and code hoisting.
rtx new_insn;
rtx reg = expr->reaching_reg;
int regno = REGNO (reg);
- rtx pat, copied_expr;
- rtx first_new_insn;
+ rtx pat;
- start_sequence ();
- copied_expr = copy_rtx (expr->expr);
- emit_move_insn (reg, copied_expr);
- first_new_insn = get_insns ();
- pat = gen_sequence ();
- end_sequence ();
+ pat = process_insert_insn (expr);
/* If the last insn is a jump, insert EXPR in front [taking care to
handle cc0, etc. properly]. */
}
}
-/* Insert partially redundant expressions at the ends of appropriate basic
- blocks making them fully redundant. */
+/* Insert partially redundant expressions on edges in the CFG to make
+ the expressions fully redundant. */
-static void
-pre_insert (index_map)
+static int
+pre_edge_insert (edge_list, index_map)
+ struct edge_list *edge_list;
struct expr **index_map;
{
- int bb, i, set_size;
+ int e, i, num_edges, set_size, did_insert = 0;
sbitmap *inserted;
- /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT.
- Where INSERT is nonzero, we add the expression at the end of the basic
- block if it reaches any of the deleted expressions. */
+ /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
+ if it reaches any of the deleted expressions. */
- set_size = pre_optimal[0]->size;
- inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
- sbitmap_vector_zero (inserted, n_basic_blocks);
+ set_size = pre_insert_map[0]->size;
+ num_edges = NUM_EDGES (edge_list);
+ inserted = sbitmap_vector_alloc (num_edges, n_exprs);
+ sbitmap_vector_zero (inserted, num_edges);
- for (bb = 0; bb < n_basic_blocks; bb++)
+ for (e = 0; e < num_edges; e++)
{
int indx;
+ basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
+ int bb = pred->index;
- /* This computes the number of potential insertions we need. */
- sbitmap_not (temp_bitmap[bb], pre_redundant[bb]);
- sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]);
-
- /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need
- to insert at the end of this basic block. */
for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
{
- SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i];
+ SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
int j;
for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
if (! occr->deleted_p)
continue;
- /* Insert this expression at the end of BB if it would
- reach the deleted occurence. */
- if (!TEST_BIT (inserted[bb], j)
- && pre_expr_reaches_here_p (bb, expr,
+ /* Insert this expression on this edge if if it would
+ reach the deleted occurence in BB. */
+ if (!TEST_BIT (inserted[e], j)
+ && (bb == ENTRY_BLOCK
+ || pre_expr_reaches_here_p (bb, expr,
BLOCK_NUM (occr->insn), 0,
- NULL))
+ NULL)))
{
- SET_BIT (inserted[bb], j);
- insert_insn_end_bb (index_map[j], bb, 1);
+ rtx insn;
+ edge eg = INDEX_EDGE (edge_list, e);
+ /* We can't insert anything on an abnormal
+ and critical edge, so we insert the
+ insn at the end of the previous block. There
+ are several alternatives detailed in
+ Morgans book P277 (sec 10.5) for handling
+ this situation. This one is easiest for now. */
+
+ if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
+ {
+ insert_insn_end_bb (index_map[j], bb, 0);
+ }
+ else
+ {
+ insn = process_insert_insn (index_map[j]);
+ insert_insn_on_edge (insn, eg);
+ }
+ if (gcse_file)
+ {
+ fprintf (gcse_file,
+ "PRE/HOIST: edge (%d,%d), copy expression %d\n",
+ bb,
+ INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
+ }
+ SET_BIT (inserted[e], j);
+ did_insert = 1;
+ gcse_create_count++;
}
}
}
}
}
}
-
- sbitmap_vector_free (inserted);
+ return did_insert;
}
/* Copy the result of INSN to REG.
int indx = expr->bitmap_index;
rtx set = single_set (insn);
rtx new_insn;
+ int bb = BLOCK_NUM (insn);
if (!set)
abort ();
new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
insn);
/* Keep block number table up to date. */
- set_block_num (new_insn, BLOCK_NUM (insn));
+ set_block_num (new_insn, bb);
/* Keep register set table up to date. */
record_one_set (regno, new_insn);
+ if (insn == BLOCK_END (bb))
+ BLOCK_END (bb) = new_insn;
gcse_create_count++;
if (gcse_file)
- {
- fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
- BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
- }
+ fprintf (gcse_file,
+ "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
+ BLOCK_NUM (insn), INSN_UID (new_insn), indx,
+ INSN_UID (insn), regno);
}
/* Copy available expressions that reach the redundant expression
{
int i, bb;
- for (bb = 0; bb < n_basic_blocks; bb++)
- {
- sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]);
- }
-
/* For each available expression in the table, copy the result to
`reaching_reg' if the expression reaches a deleted one.
for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
{
rtx insn = avail->insn;
- int bb = BLOCK_NUM (insn);
-
- if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index))
- continue;
/* No need to handle this one if handled already. */
if (avail->copied_p)
/* Compute the expressions which are redundant and need to be replaced by
copies from the reaching reg to the target reg. */
for (bb = 0; bb < n_basic_blocks; bb++)
- {
- sbitmap_not (temp_bitmap[bb], pre_optimal[bb]);
- sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]);
- }
+ sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
changed = 0;
for (i = 0; i < expr_hash_table_size; i++)
static int
pre_gcse ()
{
- int i;
+ int i, did_insert;
int changed;
struct expr **index_map;
- we know which insns are redundant when we go to create copies */
changed = pre_delete ();
- /* Insert insns in places that make partially redundant expressions
- fully redundant. */
- pre_insert (index_map);
-
+ did_insert = pre_edge_insert (edge_list, index_map);
/* In other places with reaching expressions, copy the expression to the
- specially allocated pseudo-reg that reaches the redundant expression. */
+ specially allocated pseudo-reg that reaches the redundant expr. */
pre_insert_copies ();
+ if (did_insert)
+ {
+ commit_edge_insertions ();
+ changed = 1;
+ }
free (pre_redundant_insns);
gcse_create_count = 0;
alloc_expr_hash_table (max_cuid);
+ add_noreturn_fake_exit_edges ();
compute_expr_hash_table ();
if (gcse_file)
dump_hash_table (gcse_file, "Expression", expr_hash_table,
alloc_pre_mem (n_basic_blocks, n_exprs);
compute_pre_data ();
changed |= pre_gcse ();
+ free_edge_list (edge_list);
free_pre_mem ();
}
+ remove_fake_edges ();
free_expr_hash_table ();
if (gcse_file)
changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
hoist_vbeout[bb], transp[bb]);
if (bb != n_basic_blocks - 1)
- sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
- bb, s_succs);
+ sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
}
passes++;
}
#include "recog.h"
#include "basic-block.h"
-static void compute_antinout PROTO ((int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-static void compute_earlyinout PROTO ((int, int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-static void compute_delayinout PROTO ((int, int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_latein PROTO ((int, int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_isoinout PROTO ((int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-static void compute_optimal PROTO ((int, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_redundant PROTO ((int, int, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-
-/* Similarly, but for the reversed flowgraph. */
-static void compute_avinout PROTO ((int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-static void compute_fartherinout PROTO ((int, int, int_list_ptr *,
- sbitmap *, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_earlierinout PROTO ((int, int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_firstout PROTO ((int, int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *));
-static void compute_rev_isoinout PROTO ((int, int_list_ptr *, sbitmap *,
- sbitmap *, sbitmap *, sbitmap *));
-
-/* Given local properties TRANSP, ANTLOC, return the redundant and optimal
- computation points for expressions.
-
- To reduce overall memory consumption, we allocate memory immediately
- before its needed and deallocate it as soon as possible. */
-void
-pre_lcm (n_blocks, n_exprs, s_preds, s_succs, transp,
- antloc, redundant, optimal)
- int n_blocks;
- int n_exprs;
- int_list_ptr *s_preds;
- int_list_ptr *s_succs;
- sbitmap *transp;
- sbitmap *antloc;
- sbitmap *redundant;
- sbitmap *optimal;
-{
- sbitmap *antin, *antout, *earlyin, *earlyout, *delayin, *delayout;
- sbitmap *latein, *isoin, *isoout;
-
- /* Compute global anticipatability. ANTOUT is not needed except to
- compute ANTIN, so free its memory as soon as we return from
- compute_antinout. */
- antin = sbitmap_vector_alloc (n_blocks, n_exprs);
- antout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_antinout (n_blocks, s_succs, antloc,
- transp, antin, antout);
- free (antout);
- antout = NULL;
-
- /* Compute earliestness. EARLYOUT is not needed except to compute
- EARLYIN, so free its memory as soon as we return from
- compute_earlyinout. */
- earlyin = sbitmap_vector_alloc (n_blocks, n_exprs);
- earlyout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin,
- earlyin, earlyout);
- free (earlyout);
- earlyout = NULL;
-
- /* Compute delayedness. DELAYOUT is not needed except to compute
- DELAYIN, so free its memory as soon as we return from
- compute_delayinout. We also no longer need ANTIN and EARLYIN. */
- delayin = sbitmap_vector_alloc (n_blocks, n_exprs);
- delayout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_delayinout (n_blocks, n_exprs, s_preds, antloc,
- antin, earlyin, delayin, delayout);
- free (delayout);
- delayout = NULL;
- free (antin);
- antin = NULL;
- free (earlyin);
- earlyin = NULL;
-
- /* Compute latestness. We no longer need DELAYIN after we compute
- LATEIN. */
- latein = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein);
- free (delayin);
- delayin = NULL;
-
- /* Compute isolatedness. ISOIN is not needed except to compute
- ISOOUT, so free its memory as soon as we return from
- compute_isoinout. */
- isoin = sbitmap_vector_alloc (n_blocks, n_exprs);
- isoout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout);
- free (isoin);
- isoin = NULL;
-
- /* Now compute optimal placement points and the redundant expressions. */
- compute_optimal (n_blocks, latein, isoout, optimal);
- compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant);
- free (latein);
- latein = NULL;
- free (isoout);
- isoout = NULL;
-}
-
-/* Given local properties TRANSP, AVLOC, return the redundant and optimal
- computation points for expressions on the reverse flowgraph.
-
- To reduce overall memory consumption, we allocate memory immediately
- before its needed and deallocate it as soon as possible. */
-
-void
-pre_rev_lcm (n_blocks, n_exprs, s_preds, s_succs, transp,
- avloc, redundant, optimal)
- int n_blocks;
- int n_exprs;
- int_list_ptr *s_preds;
- int_list_ptr *s_succs;
- sbitmap *transp;
- sbitmap *avloc;
- sbitmap *redundant;
- sbitmap *optimal;
-{
- sbitmap *avin, *avout, *fartherin, *fartherout, *earlierin, *earlierout;
- sbitmap *firstout, *rev_isoin, *rev_isoout;
-
- /* Compute global availability. AVIN is not needed except to
- compute AVOUT, so free its memory as soon as we return from
- compute_avinout. */
- avin = sbitmap_vector_alloc (n_blocks, n_exprs);
- avout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout);
- free (avin);
- avin = NULL;
-
- /* Compute fartherness. FARTHERIN is not needed except to compute
- FARTHEROUT, so free its memory as soon as we return from
- compute_earlyinout. */
- fartherin = sbitmap_vector_alloc (n_blocks, n_exprs);
- fartherout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_fartherinout (n_blocks, n_exprs, s_succs, transp,
- avout, fartherin, fartherout);
- free (fartherin);
- fartherin = NULL;
-
- /* Compute earlierness. EARLIERIN is not needed except to compute
- EARLIEROUT, so free its memory as soon as we return from
- compute_delayinout. We also no longer need AVOUT and FARTHEROUT. */
- earlierin = sbitmap_vector_alloc (n_blocks, n_exprs);
- earlierout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_earlierinout (n_blocks, n_exprs, s_succs, avloc,
- avout, fartherout, earlierin, earlierout);
- free (earlierin);
- earlierin = NULL;
- free (avout);
- avout = NULL;
- free (fartherout);
- fartherout = NULL;
-
- /* Compute firstness. We no longer need EARLIEROUT after we compute
- FIRSTOUT. */
- firstout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout);
- free (earlierout);
- earlierout = NULL;
-
- /* Compute rev_isolatedness. ISOIN is not needed except to compute
- ISOOUT, so free its memory as soon as we return from
- compute_isoinout. */
- rev_isoin = sbitmap_vector_alloc (n_blocks, n_exprs);
- rev_isoout = sbitmap_vector_alloc (n_blocks, n_exprs);
- compute_rev_isoinout (n_blocks, s_preds, avloc, firstout,
- rev_isoin, rev_isoout);
- free (rev_isoout);
- rev_isoout = NULL;
-
- /* Now compute optimal placement points and the redundant expressions. */
- compute_optimal (n_blocks, firstout, rev_isoin, optimal);
- compute_redundant (n_blocks, n_exprs, avloc, firstout, rev_isoin, redundant);
- free (firstout);
- firstout = NULL;
- free (rev_isoin);
- rev_isoin = NULL;
-}
-
-/* Compute expression anticipatability at entrance and exit of each block. */
+/* Edge based LCM routines. */
+static void compute_antinout_edge PROTO ((sbitmap *, sbitmap *,
+ sbitmap *, sbitmap *));
+static void compute_earliest PROTO((struct edge_list *, int, sbitmap *,
+ sbitmap *, sbitmap *, sbitmap *,
+ sbitmap *));
+static void compute_laterin PROTO((struct edge_list *, int, sbitmap *,
+ sbitmap *, sbitmap *, sbitmap *));
+static void compute_insert_delete PROTO ((struct edge_list *edge_list,
+ sbitmap *, sbitmap *, sbitmap *,
+ sbitmap *, sbitmap *));
+
+/* Edge based LCM routines on a reverse flowgraph. */
+static void compute_farthest PROTO ((struct edge_list *, int, sbitmap *,
+ sbitmap *, sbitmap*, sbitmap *,
+ sbitmap *));
+static void compute_nearerout PROTO((struct edge_list *, int, sbitmap *,
+ sbitmap *, sbitmap *, sbitmap *));
+static void compute_rev_insert_delete PROTO ((struct edge_list *edge_list,
+ sbitmap *, sbitmap *, sbitmap *,
+ sbitmap *, sbitmap *));
+
+\f
+/* Edge based lcm routines. */
+
+/* Compute expression anticipatability at entrance and exit of each block.
+ This is done based on the flow graph, and not on the pred-succ lists.
+ Other than that, its pretty much identical to compute_antinout. */
static void
-compute_antinout (n_blocks, s_succs, antloc, transp, antin, antout)
- int n_blocks;
- int_list_ptr *s_succs;
+compute_antinout_edge (antloc, transp, antin, antout)
sbitmap *antloc;
sbitmap *transp;
sbitmap *antin;
sbitmap *antout;
{
- int bb, changed, passes;
+ int i, changed, passes;
sbitmap old_changed, new_changed;
+ edge e;
- sbitmap_zero (antout[n_blocks - 1]);
- sbitmap_vector_ones (antin, n_blocks);
+ sbitmap_vector_zero (antout, n_basic_blocks);
+ sbitmap_vector_ones (antin, n_basic_blocks);
- old_changed = sbitmap_alloc (n_blocks);
- new_changed = sbitmap_alloc (n_blocks);
+ old_changed = sbitmap_alloc (n_basic_blocks);
+ new_changed = sbitmap_alloc (n_basic_blocks);
sbitmap_ones (old_changed);
passes = 0;
{
changed = 0;
sbitmap_zero (new_changed);
+
/* We scan the blocks in the reverse order to speed up
the convergence. */
- for (bb = n_blocks - 1; bb >= 0; bb--)
+ for (i = n_basic_blocks - 1; i >= 0; i--)
{
- int_list_ptr ps;
-
+ basic_block bb = BASIC_BLOCK (i);
/* If none of the successors of this block have changed,
then this block is not going to change. */
- for (ps = s_succs[bb] ; ps; ps = ps->next)
+ for (e = bb->succ ; e; e = e->succ_next)
{
- if (INT_LIST_VAL (ps) == EXIT_BLOCK
- || INT_LIST_VAL (ps) == ENTRY_BLOCK)
+ if (e->dest == EXIT_BLOCK_PTR)
break;
- if (TEST_BIT (old_changed, INT_LIST_VAL (ps))
- || TEST_BIT (new_changed, INT_LIST_VAL (ps)))
+ if (TEST_BIT (old_changed, e->dest->index)
+ || TEST_BIT (new_changed, e->dest->index))
break;
}
- if (!ps)
+ if (!e)
continue;
- if (bb != n_blocks - 1)
- sbitmap_intersect_of_successors (antout[bb], antin,
- bb, s_succs);
- if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb],
- transp[bb], antout[bb]))
+ /* If an Exit blocks is the ONLY successor, its has a zero ANTIN,
+ which is the opposite of the default definition for an
+ intersection of succs definition. */
+ if (e->dest == EXIT_BLOCK_PTR && e->succ_next == NULL
+ && e->src->succ == e)
+ sbitmap_zero (antout[bb->index]);
+ else
+ {
+ sbitmap_intersection_of_succs (antout[bb->index],
+ antin,
+ bb->index);
+ }
+
+ if (sbitmap_a_or_b_and_c (antin[bb->index], antloc[bb->index],
+ transp[bb->index], antout[bb->index]))
{
changed = 1;
- SET_BIT (new_changed, bb);
+ SET_BIT (new_changed, bb->index);
}
}
sbitmap_copy (old_changed, new_changed);
passes++;
}
+
free (old_changed);
free (new_changed);
}
-/* Compute expression earliestness at entrance and exit of each block.
-
- From Advanced Compiler Design and Implementation pp411.
-
- An expression is earliest at the entrance to basic block BB if no
- block from entry to block BB both evaluates the expression and
- produces the same value as evaluating it at the entry to block BB
- does. Similarly for earlistness at basic block BB exit. */
-
+/* Compute the earliest vector for edge based lcm. */
static void
-compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin,
- earlyin, earlyout)
- int n_blocks;
+compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
+ struct edge_list *edge_list;
int n_exprs;
- int_list_ptr *s_preds;
- sbitmap *transp;
- sbitmap *antin;
- sbitmap *earlyin;
- sbitmap *earlyout;
+ sbitmap *antin, *antout, *avout, *kill, *earliest;
{
- int bb, changed, passes;
- sbitmap temp_bitmap;
- sbitmap old_changed, new_changed;
-
- temp_bitmap = sbitmap_alloc (n_exprs);
+ sbitmap difference, temp_bitmap;
+ int x, num_edges;
+ basic_block pred, succ;
- sbitmap_vector_zero (earlyout, n_blocks);
- sbitmap_ones (earlyin[0]);
+ num_edges = NUM_EDGES (edge_list);
- old_changed = sbitmap_alloc (n_blocks);
- new_changed = sbitmap_alloc (n_blocks);
- sbitmap_ones (old_changed);
+ difference = sbitmap_alloc (n_exprs);
+ temp_bitmap = sbitmap_alloc (n_exprs);
- passes = 0;
- changed = 1;
- while (changed)
+ for (x = 0; x < num_edges; x++)
{
- changed = 0;
- sbitmap_zero (new_changed);
- for (bb = 0; bb < n_blocks; bb++)
- {
- int_list_ptr ps;
-
- /* If none of the predecessors of this block have changed,
- then this block is not going to change. */
- for (ps = s_preds[bb] ; ps; ps = ps->next)
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ if (pred == ENTRY_BLOCK_PTR)
+ sbitmap_copy (earliest[x], antin[succ->index]);
+ else
+ {
+ if (succ == EXIT_BLOCK_PTR)
{
- if (INT_LIST_VAL (ps) == EXIT_BLOCK
- || INT_LIST_VAL (ps) == ENTRY_BLOCK)
- break;
-
- if (TEST_BIT (old_changed, INT_LIST_VAL (ps))
- || TEST_BIT (new_changed, INT_LIST_VAL (ps)))
- break;
+ sbitmap_zero (earliest[x]);
}
-
- if (!ps)
- continue;
-
- if (bb != 0)
- sbitmap_union_of_predecessors (earlyin[bb], earlyout,
- bb, s_preds);
- sbitmap_not (temp_bitmap, transp[bb]);
- if (sbitmap_union_of_diff (earlyout[bb], temp_bitmap,
- earlyin[bb], antin[bb]))
+ else
{
- changed = 1;
- SET_BIT (new_changed, bb);
+ sbitmap_difference (difference, antin[succ->index],
+ avout[pred->index]);
+ sbitmap_not (temp_bitmap, antout[pred->index]);
+ sbitmap_a_and_b_or_c (earliest[x], difference, kill[pred->index],
+ temp_bitmap);
}
}
- sbitmap_copy (old_changed, new_changed);
- passes++;
}
- free (old_changed);
- free (new_changed);
free (temp_bitmap);
+ free (difference);
}
-/* Compute expression delayedness at entrance and exit of each block.
-
- From Advanced Compiler Design and Implementation pp411.
-
- An expression is delayed at the entrance to BB if it is anticipatable
- and earliest at that point and if all subsequent computations of
- the expression are in block BB. */
-
+/* Compute later and laterin vectors for edge based lcm. */
static void
-compute_delayinout (n_blocks, n_exprs, s_preds, antloc,
- antin, earlyin, delayin, delayout)
- int n_blocks;
+compute_laterin (edge_list, n_exprs,
+ earliest, antloc, later, laterin)
+ struct edge_list *edge_list;
int n_exprs;
- int_list_ptr *s_preds;
- sbitmap *antloc;
- sbitmap *antin;
- sbitmap *earlyin;
- sbitmap *delayin;
- sbitmap *delayout;
+ sbitmap *earliest, *antloc, *later, *laterin;
{
- int bb, changed, passes;
- sbitmap *anti_and_early;
- sbitmap temp_bitmap;
+ sbitmap difference, temp_bitmap;
+ int x, num_edges;
+ basic_block pred, succ;
+ int done = 0;
- temp_bitmap = sbitmap_alloc (n_exprs);
+ num_edges = NUM_EDGES (edge_list);
- /* This is constant throughout the flow equations below, so compute
- it once to save time. */
- anti_and_early = sbitmap_vector_alloc (n_blocks, n_exprs);
- for (bb = 0; bb < n_blocks; bb++)
- sbitmap_a_and_b (anti_and_early[bb], antin[bb], earlyin[bb]);
-
- sbitmap_vector_zero (delayout, n_blocks);
- sbitmap_copy (delayin[0], anti_and_early[0]);
+ /* Laterin has an extra block allocated for the exit block. */
+ sbitmap_vector_ones (laterin, n_basic_blocks + 1);
+ sbitmap_vector_zero (later, num_edges);
- passes = 0;
- changed = 1;
- while (changed)
+ /* Initialize laterin to the intersection of EARLIEST for all edges
+ from predecessors to this block. */
+
+ for (x = 0; x < num_edges; x++)
{
- changed = 0;
- for (bb = 0; bb < n_blocks; bb++)
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ if (succ != EXIT_BLOCK_PTR)
+ sbitmap_a_and_b (laterin[succ->index], laterin[succ->index],
+ earliest[x]);
+ /* We already know the correct value of later for edges from
+ the entry node, so set it now. */
+ if (pred == ENTRY_BLOCK_PTR)
+ sbitmap_copy (later[x], earliest[x]);
+ }
+
+ difference = sbitmap_alloc (n_exprs);
+
+ while (!done)
+ {
+ done = 1;
+ for (x = 0; x < num_edges; x++)
{
- if (bb != 0)
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ if (pred != ENTRY_BLOCK_PTR)
{
- sbitmap_intersect_of_predecessors (temp_bitmap, delayout,
- bb, s_preds);
- changed |= sbitmap_a_or_b (delayin[bb],
- anti_and_early[bb],
- temp_bitmap);
+ sbitmap_difference (difference, laterin[pred->index],
+ antloc[pred->index]);
+ if (sbitmap_a_or_b (later[x], difference, earliest[x]))
+ done = 0;
}
- sbitmap_not (temp_bitmap, antloc[bb]);
- changed |= sbitmap_a_and_b (delayout[bb],
- temp_bitmap,
- delayin[bb]);
}
- passes++;
+ if (done)
+ break;
+
+ sbitmap_vector_ones (laterin, n_basic_blocks);
+
+ for (x = 0; x < num_edges; x++)
+ {
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ if (succ != EXIT_BLOCK_PTR)
+ sbitmap_a_and_b (laterin[succ->index], laterin[succ->index],
+ later[x]);
+ else
+ /* We allocated an extra block for the exit node. */
+ sbitmap_a_and_b (laterin[n_basic_blocks], laterin[n_basic_blocks],
+ later[x]);
+ }
}
- /* We're done with this, so go ahead and free it's memory now instead
- of waiting until the end of pre. */
- free (anti_and_early);
- free (temp_bitmap);
+ free (difference);
}
-/* Compute latestness.
+/* Compute the insertion and deletion points for edge based LCM. */
+static void
+compute_insert_delete (edge_list, antloc, later, laterin,
+ insert, delete)
+ struct edge_list *edge_list;
+ sbitmap *antloc, *later, *laterin, *insert, *delete;
+{
+ int x;
- From Advanced Compiler Design and Implementation pp412.
+ for (x = 0; x < n_basic_blocks; x++)
+ sbitmap_difference (delete[x], antloc[x], laterin[x]);
+
+ for (x = 0; x < NUM_EDGES (edge_list); x++)
+ {
+ basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
+ if (b == EXIT_BLOCK_PTR)
+ sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]);
+ else
+ sbitmap_difference (insert[x], later[x], laterin[b->index]);
+ }
+}
- An expression is latest at the entrance to block BB if that is an optimal
- point for computing the expression and if on every path from block BB's
- entrance to the exit block, any optimal computation point for the
- expression occurs after one of the points at which the expression was
- computed in the original flowgraph. */
+/* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the
+ insert and delete vectors for edge based LCM. Returns an
+ edgelist which is used to map the insert vector to what edge
+ an expression should be inserted on. */
-static void
-compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein)
- int n_blocks;
+struct edge_list *
+pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
+ FILE *file;
int n_exprs;
- int_list_ptr *s_succs;
+ sbitmap *transp;
+ sbitmap *avloc;
sbitmap *antloc;
- sbitmap *delayin;
- sbitmap *latein;
+ sbitmap *kill;
+ sbitmap **insert;
+ sbitmap **delete;
{
- int bb;
- sbitmap temp_bitmap;
+ sbitmap *antin, *antout, *earliest;
+ sbitmap *avin, *avout;
+ sbitmap *later, *laterin;
+ struct edge_list *edge_list;
+ int num_edges;
- temp_bitmap = sbitmap_alloc (n_exprs);
+ edge_list = create_edge_list ();
+ num_edges = NUM_EDGES (edge_list);
- for (bb = 0; bb < n_blocks; bb++)
+#ifdef LCM_DEBUG_INFO
+ if (file)
{
- /* The last block is succeeded only by the exit block; therefore,
- temp_bitmap will not be set by the following call! */
- if (bb == n_blocks - 1)
- {
- sbitmap_intersect_of_successors (temp_bitmap, delayin,
- bb, s_succs);
- sbitmap_not (temp_bitmap, temp_bitmap);
- }
- else
- sbitmap_ones (temp_bitmap);
- sbitmap_a_and_b_or_c (latein[bb], delayin[bb],
- antloc[bb], temp_bitmap);
+ fprintf (file, "Edge List:\n");
+ verify_edge_list (file, edge_list);
+ print_edge_list (file, edge_list);
+ dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
+ dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks);
+ dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks);
+ dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks);
}
- free (temp_bitmap);
-}
-
-/* Compute isolated.
+#endif
- From Advanced Compiler Design and Implementation pp413.
+ /* Compute global availability. */
+ avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ compute_available (avloc, kill, avout, avin);
- A computationally optimal placement for the evaluation of an expression
- is defined to be isolated if and only if on every path from a successor
- of the block in which it is computed to the exit block, every original
- computation of the expression is preceded by the optimal placement point. */
-
-static void
-compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout)
- int n_blocks;
- int_list_ptr *s_succs;
- sbitmap *antloc;
- sbitmap *latein;
- sbitmap *isoin;
- sbitmap *isoout;
-{
- int bb, changed, passes;
+ free (avin);
- sbitmap_vector_zero (isoin, n_blocks);
- sbitmap_zero (isoout[n_blocks - 1]);
+ /* Compute global anticipatability. */
+ antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ compute_antinout_edge (antloc, transp, antin, antout);
- passes = 0;
- changed = 1;
- while (changed)
+#ifdef LCM_DEBUG_INFO
+ if (file)
{
- changed = 0;
- for (bb = n_blocks - 1; bb >= 0; bb--)
- {
- if (bb != n_blocks - 1)
- sbitmap_intersect_of_successors (isoout[bb], isoin,
- bb, s_succs);
- changed |= sbitmap_union_of_diff (isoin[bb], latein[bb],
- isoout[bb], antloc[bb]);
- }
- passes++;
+ dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks);
+ dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks);
}
-}
+#endif
-/* Compute the set of expressions which have optimal computational points
- in each basic block. This is the set of expressions that are latest, but
- that are not isolated in the block. */
+ /* Compute earliestness. */
+ earliest = sbitmap_vector_alloc (num_edges, n_exprs);
+ compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
-static void
-compute_optimal (n_blocks, latein, isoout, optimal)
- int n_blocks;
- sbitmap *latein;
- sbitmap *isoout;
- sbitmap *optimal;
-{
- int bb;
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
+#endif
- for (bb = 0; bb < n_blocks; bb++)
- sbitmap_difference (optimal[bb], latein[bb], isoout[bb]);
-}
+ free (antout);
+ free (antin);
+ free (avout);
-/* Compute the set of expressions that are redundant in a block. They are
- the expressions that are used in the block and that are neither isolated
- or latest. */
+ later = sbitmap_vector_alloc (num_edges, n_exprs);
+ /* Allocate an extra element for the exit block in the laterin vector. */
+ laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
+ compute_laterin (edge_list, n_exprs, earliest, antloc, later, laterin);
-static void
-compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant)
- int n_blocks;
- int n_exprs;
- sbitmap *antloc;
- sbitmap *latein;
- sbitmap *isoout;
- sbitmap *redundant;
-{
- int bb;
- sbitmap temp_bitmap;
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ {
+ dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1);
+ dump_sbitmap_vector (file, "later", "", later, num_edges);
+ }
+#endif
- temp_bitmap = sbitmap_alloc (n_exprs);
+ free (earliest);
+
+ *insert = sbitmap_vector_alloc (num_edges, n_exprs);
+ *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
- for (bb = 0; bb < n_blocks; bb++)
+ free (laterin);
+ free (later);
+
+#ifdef LCM_DEBUG_INFO
+ if (file)
{
- sbitmap_a_or_b (temp_bitmap, latein[bb], isoout[bb]);
- sbitmap_difference (redundant[bb], antloc[bb], temp_bitmap);
+ dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
+ dump_sbitmap_vector (file, "pre_delete_map", "", *delete, n_basic_blocks);
}
- free (temp_bitmap);
-}
+#endif
-/* Compute expression availability at entrance and exit of each block. */
+ return edge_list;
+}
-static void
-compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout)
- int n_blocks;
- int_list_ptr *s_preds;
- sbitmap *avloc;
- sbitmap *transp;
- sbitmap *avin;
- sbitmap *avout;
+/* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
+ Return the number of passes we performed to iterate to a solution. */
+int
+compute_available (avloc, kill, avout, avin)
+ sbitmap *avloc, *kill, *avout, *avin;
{
int bb, changed, passes;
+ int last = n_basic_blocks - 1;
sbitmap_zero (avin[0]);
- sbitmap_vector_ones (avout, n_blocks);
+ sbitmap_copy (avout[0] /*dst*/, avloc[0] /*src*/);
+ for (bb = 1; bb < n_basic_blocks; bb++)
+ sbitmap_not (avout[bb], kill[bb]);
+
passes = 0;
changed = 1;
while (changed)
{
changed = 0;
- for (bb = 0; bb < n_blocks; bb++)
- {
- if (bb != 0)
- sbitmap_intersect_of_predecessors (avin[bb], avout,
- bb, s_preds);
- changed |= sbitmap_a_or_b_and_c (avout[bb], avloc[bb],
- transp[bb], avin[bb]);
- }
+ for (bb = 1; bb < n_basic_blocks; bb++)
+ {
+ sbitmap_intersection_of_preds (avin[bb], avout, bb);
+ changed |= sbitmap_union_of_diff (avout[bb], avloc[bb],
+ avin[bb], kill[bb]);
+ }
passes++;
}
+ return passes;
}
-/* Compute expression latestness.
-
- This is effectively the same as earliestness computed on the reverse
- flow graph. */
-
+/* Compute the farthest vector for edge based lcm. */
static void
-compute_fartherinout (n_blocks, n_exprs, s_succs,
- transp, avout, fartherin, fartherout)
- int n_blocks;
+compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
+ kill, farthest)
+ struct edge_list *edge_list;
int n_exprs;
- int_list_ptr *s_succs;
- sbitmap *transp;
- sbitmap *avout;
- sbitmap *fartherin;
- sbitmap *fartherout;
+ sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
{
- int bb, changed, passes;
- sbitmap temp_bitmap;
+ sbitmap difference, temp_bitmap;
+ int x, num_edges;
+ basic_block pred, succ;
- temp_bitmap = sbitmap_alloc (n_exprs);
+ num_edges = NUM_EDGES (edge_list);
- sbitmap_vector_zero (fartherin, n_blocks);
- sbitmap_ones (fartherout[n_blocks - 1]);
+ difference = sbitmap_alloc (n_exprs);
+ temp_bitmap = sbitmap_alloc (n_exprs);
- passes = 0;
- changed = 1;
- while (changed)
+ for (x = 0; x < num_edges; x++)
{
- changed = 0;
- for (bb = n_blocks - 1; bb >= 0; bb--)
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ if (succ == EXIT_BLOCK_PTR)
+ sbitmap_copy (farthest[x], st_avout[pred->index]);
+ else
{
- if (bb != n_blocks - 1)
- sbitmap_union_of_successors (fartherout[bb], fartherin,
- bb, s_succs);
- sbitmap_not (temp_bitmap, transp[bb]);
- changed |= sbitmap_union_of_diff (fartherin[bb], temp_bitmap,
- fartherout[bb], avout[bb]);
+ if (pred == ENTRY_BLOCK_PTR)
+ {
+ sbitmap_zero (farthest[x]);
+ }
+ else
+ {
+ sbitmap_difference (difference, st_avout[pred->index],
+ st_antin[succ->index]);
+ sbitmap_not (temp_bitmap, st_avin[succ->index]);
+ sbitmap_a_and_b_or_c (farthest[x], difference,
+ kill[succ->index], temp_bitmap);
+ }
}
- passes++;
}
-
free (temp_bitmap);
+ free (difference);
}
-/* Compute expression earlierness at entrance and exit of each block.
-
- This is effectively the same as delayedness computed on the reverse
- flow graph. */
-
+/* Compute nearer and nearerout vectors for edge based lcm. */
static void
-compute_earlierinout (n_blocks, n_exprs, s_succs, avloc,
- avout, fartherout, earlierin, earlierout)
- int n_blocks;
+compute_nearerout (edge_list, n_exprs,
+ farthest, st_avloc, nearer, nearerout)
+ struct edge_list *edge_list;
int n_exprs;
- int_list_ptr *s_succs;
- sbitmap *avloc;
- sbitmap *avout;
- sbitmap *fartherout;
- sbitmap *earlierin;
- sbitmap *earlierout;
+ sbitmap *farthest, *st_avloc, *nearer, *nearerout;
{
- int bb, changed, passes;
- sbitmap *av_and_farther;
- sbitmap temp_bitmap;
+ sbitmap difference, temp_bitmap;
+ int x, num_edges;
+ basic_block pred, succ;
+ int done = 0;
- temp_bitmap = sbitmap_alloc (n_exprs);
+ num_edges = NUM_EDGES (edge_list);
- /* This is constant throughout the flow equations below, so compute
- it once to save time. */
- av_and_farther = sbitmap_vector_alloc (n_blocks, n_exprs);
- for (bb = 0; bb < n_blocks; bb++)
- sbitmap_a_and_b (av_and_farther[bb], avout[bb], fartherout[bb]);
-
- sbitmap_vector_zero (earlierin, n_blocks);
- sbitmap_copy (earlierout[n_blocks - 1], av_and_farther[n_blocks - 1]);
+ /* nearout has an extra block allocated for the entry block. */
+ sbitmap_vector_ones (nearerout, n_basic_blocks + 1);
+ sbitmap_vector_zero (nearer, num_edges);
- passes = 0;
- changed = 1;
- while (changed)
+ /* Initialize nearerout to the intersection of FARTHEST for all edges
+ from predecessors to this block. */
+
+ for (x = 0; x < num_edges; x++)
{
- changed = 0;
- for (bb = n_blocks - 1; bb >= 0; bb--)
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ if (pred != ENTRY_BLOCK_PTR)
+ {
+ sbitmap_a_and_b (nearerout[pred->index], nearerout[pred->index],
+ farthest[x]);
+ }
+ /* We already know the correct value of nearer for edges to
+ the exit node. */
+ if (succ == EXIT_BLOCK_PTR)
+ sbitmap_copy (nearer[x], farthest[x]);
+ }
+
+ difference = sbitmap_alloc (n_exprs);
+
+ while (!done)
+ {
+ done = 1;
+ for (x = 0; x < num_edges; x++)
{
- if (bb != n_blocks - 1)
+ succ = INDEX_EDGE_SUCC_BB (edge_list, x);
+ if (succ != EXIT_BLOCK_PTR)
{
- sbitmap_intersect_of_successors (temp_bitmap, earlierin,
- bb, s_succs);
- changed |= sbitmap_a_or_b (earlierout[bb],
- av_and_farther[bb],
- temp_bitmap);
+ sbitmap_difference (difference, nearerout[succ->index],
+ st_avloc[succ->index]);
+ if (sbitmap_a_or_b (nearer[x], difference, farthest[x]))
+ done = 0;
}
- sbitmap_not (temp_bitmap, avloc[bb]);
- changed |= sbitmap_a_and_b (earlierin[bb],
- temp_bitmap,
- earlierout[bb]);
}
- passes++;
- }
- /* We're done with this, so go ahead and free it's memory now instead
- of waiting until the end of pre. */
- free (av_and_farther);
- free (temp_bitmap);
-}
+ if (done)
+ break;
-/* Compute firstness.
+ sbitmap_vector_zero (nearerout, n_basic_blocks);
+
+ for (x = 0; x < num_edges; x++)
+ {
+ pred = INDEX_EDGE_PRED_BB (edge_list, x);
+ if (pred != ENTRY_BLOCK_PTR)
+ sbitmap_a_and_b (nearerout[pred->index],
+ nearerout[pred->index], nearer[x]);
+ else
+ sbitmap_a_and_b (nearerout[n_basic_blocks],
+ nearerout[n_basic_blocks], nearer[x]);
+ }
+ }
- This is effectively the same as latestness computed on the reverse
- flow graph. */
+ free (difference);
+}
+/* Compute the insertion and deletion points for edge based LCM. */
static void
-compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout)
- int n_blocks;
- int n_exprs;
- int_list_ptr *s_preds;
- sbitmap *avloc;
- sbitmap *earlierout;
- sbitmap *firstout;
+compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
+ insert, delete)
+ struct edge_list *edge_list;
+ sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
{
- int bb;
- sbitmap temp_bitmap;
+ int x;
- temp_bitmap = sbitmap_alloc (n_exprs);
-
- for (bb = 0; bb < n_blocks; bb++)
+ for (x = 0; x < n_basic_blocks; x++)
+ sbitmap_difference (delete[x], st_avloc[x], nearerout[x]);
+
+ for (x = 0; x < NUM_EDGES (edge_list); x++)
{
- /* The first block is preceded only by the entry block; therefore,
- temp_bitmap will not be set by the following call! */
- if (bb != 0)
- {
- sbitmap_intersect_of_predecessors (temp_bitmap, earlierout,
- bb, s_preds);
- sbitmap_not (temp_bitmap, temp_bitmap);
- }
+ basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
+ if (b == ENTRY_BLOCK_PTR)
+ sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]);
else
- {
- sbitmap_ones (temp_bitmap);
- }
- sbitmap_a_and_b_or_c (firstout[bb], earlierout[bb],
- avloc[bb], temp_bitmap);
+ sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
}
- free (temp_bitmap);
}
-/* Compute reverse isolated.
-
- This is effectively the same as isolatedness computed on the reverse
- flow graph. */
+/* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
+ insert and delete vectors for edge based reverse LCM. Returns an
+ edgelist which is used to map the insert vector to what edge
+ an expression should be inserted on. */
-static void
-compute_rev_isoinout (n_blocks, s_preds, avloc, firstout,
- rev_isoin, rev_isoout)
- int n_blocks;
- int_list_ptr *s_preds;
- sbitmap *avloc;
- sbitmap *firstout;
- sbitmap *rev_isoin;
- sbitmap *rev_isoout;
+struct edge_list *
+pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
+ insert, delete)
+ FILE *file;
+ int n_exprs;
+ sbitmap *transp;
+ sbitmap *st_avloc;
+ sbitmap *st_antloc;
+ sbitmap *kill;
+ sbitmap **insert;
+ sbitmap **delete;
{
- int bb, changed, passes;
+ sbitmap *st_antin, *st_antout;
+ sbitmap *st_avout, *st_avin, *farthest;
+ sbitmap *nearer, *nearerout;
+ struct edge_list *edge_list;
+ int x,num_edges;
+
+ edge_list = create_edge_list ();
+ num_edges = NUM_EDGES (edge_list);
+
+ st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ sbitmap_vector_zero (st_antin, n_basic_blocks);
+ sbitmap_vector_zero (st_antout, n_basic_blocks);
+ compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
+
+ /* Compute global anticipatability. */
+ st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ compute_available (st_avloc, kill, st_avout, st_avin);
+
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ {
+ fprintf (file, "Edge List:\n");
+ verify_edge_list (file, edge_list);
+ print_edge_list (file, edge_list);
+ dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks);
+ }
+#endif
- sbitmap_vector_zero (rev_isoout, n_blocks);
- sbitmap_zero (rev_isoin[0]);
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ {
+ dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks);
+ dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks);
+ }
+#endif
- passes = 0;
- changed = 1;
- while (changed)
+ /* Compute farthestness. */
+ farthest = sbitmap_vector_alloc (num_edges, n_exprs);
+ compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
+ kill, farthest);
+
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
+#endif
+
+ free (st_avin);
+ free (st_avout);
+
+ nearer = sbitmap_vector_alloc (num_edges, n_exprs);
+ /* Allocate an extra element for the entry block. */
+ nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
+ compute_nearerout (edge_list, n_exprs, farthest, st_avloc, nearer, nearerout);
+
+#ifdef LCM_DEBUG_INFO
+ if (file)
{
- changed = 0;
- for (bb = 0; bb < n_blocks; bb++)
- {
- if (bb != 0)
- sbitmap_intersect_of_predecessors (rev_isoin[bb], rev_isoout,
- bb, s_preds);
- changed |= sbitmap_union_of_diff (rev_isoout[bb], firstout[bb],
- rev_isoin[bb], avloc[bb]);
- }
- passes++;
+ dump_sbitmap_vector (file, "nearerout", "", nearerout,
+ n_basic_blocks + 1);
+ dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
}
+#endif
+
+ free (farthest);
+
+ *insert = sbitmap_vector_alloc (num_edges, n_exprs);
+ *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
+ compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, *insert, *delete);
+
+ free (nearerout);
+ free (nearer);
+
+#ifdef LCM_DEBUG_INFO
+ if (file)
+ {
+ dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
+ dump_sbitmap_vector (file, "pre_delete_map", "", *delete, n_basic_blocks);
+ }
+#endif
+
+ return edge_list;
}