From: law Date: Sun, 17 Oct 1999 09:21:25 +0000 (+0000) Subject: * basic-block.h (pre_edge_lcm, pre_edge_rev_lcm, compute_available): X-Git-Url: http://git.sourceforge.jp/view?a=commitdiff_plain;h=7bcd381b5359683da930aca2042e18bc98d71909;p=pf3gnuchains%2Fgcc-fork.git * basic-block.h (pre_edge_lcm, pre_edge_rev_lcm, compute_available): Prototype for exported functions. (pre_lcm, pre_rev_lcm): Remove prototypes. * gcse.c (compute_ae_kill): Add ae_gen and ae_kill as parameters. (compute_available): Move to lcm.c, and change parameter order. (one_classic_gcse_pass): Call compute_ae_kill with parameters. (pre_insert, s_preds, s_succs, num_preds, num_succs): Delete. (gcse_main): No longer call compute_preds_succs. Rebuild the set table after reach pre pass. (pre_insert_map, pre_delete_map, edge_list): New. (alloc_pre_mem): Allocate edge vectors. (free_pre_mem): Delete edge vectors. (compute_pre_data): Call new edge based lcm routines. (process_insert_insn): New function. (insert_insn_end_bb): Use it. (pre_edge_insert): New function. (pre_insert_copy_insn): Formatting fixes. Update BLOCK_END as needed. (pre_insert_copies): Revamp using new edge based lcm outputs. (pre_delete): Likewise. (one_pre_gcse_pass): Insert & remove fake edges to the exit block. (compute_code_hoist_vbeinout): New new edge based routines. * lcm.c: Remove all the old LCM functions. Replace with new ones that work with the new cfg datastructures and work with edges instead of blocks. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@30055 138bc75d-0d04-0410-961f-82ee72b054a4 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b413b027a14..de1764764c3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -6,8 +6,23 @@ Sun Oct 17 02:09:50 1999 Andrew MacLeod * gcse.c (compute_ae_kill): Add ae_gen and ae_kill as parameters. (compute_available): Move to lcm.c, and change parameter order. (one_classic_gcse_pass): Call compute_ae_kill with parameters. - (pre_insert): Delete. - + (pre_insert, s_preds, s_succs, num_preds, num_succs): Delete. + (gcse_main): No longer call compute_preds_succs. Rebuild the + set table after reach pre pass. + (pre_insert_map, pre_delete_map, edge_list): New. + (alloc_pre_mem): Allocate edge vectors. + (free_pre_mem): Delete edge vectors. + (compute_pre_data): Call new edge based lcm routines. + (process_insert_insn): New function. + (insert_insn_end_bb): Use it. + (pre_edge_insert): New function. + (pre_insert_copy_insn): Formatting fixes. Update BLOCK_END as + needed. + (pre_insert_copies): Revamp using new edge based lcm outputs. + (pre_delete): Likewise. + (one_pre_gcse_pass): Insert & remove fake edges to the exit + block. + (compute_code_hoist_vbeinout): New new edge based routines. * lcm.c: Remove all the old LCM functions. Replace with new ones that work with the new cfg datastructures and work with edges instead of blocks. diff --git a/gcc/basic-block.h b/gcc/basic-block.h index e7c5dfc6308..693726c4678 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -310,11 +310,13 @@ extern void update_life_info PROTO ((sbitmap, enum update_life_extent)); extern int count_or_remove_death_notes PROTO ((sbitmap, int)); /* In lcm.c */ -extern void pre_lcm PROTO ((int, int, int_list_ptr *, - int_list_ptr *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -extern void pre_rev_lcm PROTO ((int, int, int_list_ptr *, - int_list_ptr *, - sbitmap *, sbitmap *, +extern struct edge_list *pre_edge_lcm PROTO ((FILE *, int, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *, sbitmap **, + sbitmap **)); +extern struct edge_list *pre_edge_rev_lcm PROTO ((FILE *, int, sbitmap *, + sbitmap *, sbitmap *, + sbitmap *, sbitmap **, + sbitmap **)); +extern int compute_available PROTO ((sbitmap *, sbitmap *, sbitmap *, sbitmap *)); diff --git a/gcc/gcse.c b/gcc/gcse.c index bb6dfd82267..eb564c0826c 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -126,6 +126,10 @@ Boston, MA 02111-1307, USA. */ Steven Muchnick Morgan Kaufmann, 1997 + Building an Optimizing Compiler + Robert Morgan + Digital Press, 1998 + People wishing to speed up the code here should read: Elimination Algorithms for Data Flow Analysis B.G. Ryder, M.C. Paull @@ -285,14 +289,6 @@ static FILE *gcse_file; static int run_jump_opt_after_gcse; -/* Element I is a list of I's predecessors/successors. */ -static int_list_ptr *s_preds; -static int_list_ptr *s_succs; - -/* Element I is the number of predecessors/successors of basic block I. */ -static int *num_preds; -static int *num_succs; - /* Bitmaps are normally not included in debugging dumps. However it's useful to be able to print them from GDB. We could create special functions for this, but it's simpler to @@ -591,7 +587,6 @@ static void compute_pre_data PROTO ((void)); static int pre_expr_reaches_here_p PROTO ((int, struct expr *, int, int, char *)); static void insert_insn_end_bb PROTO ((struct expr *, int, int)); -static void pre_insert PROTO ((struct expr **)); static void pre_insert_copy_insn PROTO ((struct expr *, rtx)); static void pre_insert_copies PROTO ((void)); static int pre_delete PROTO ((void)); @@ -617,8 +612,7 @@ static void alloc_avail_expr_mem PROTO ((int, int)); static void free_avail_expr_mem PROTO ((void)); static void compute_ae_gen PROTO ((void)); static int expr_killed_p PROTO ((rtx, int)); -static void compute_ae_kill PROTO ((void)); -static void compute_available PROTO ((void)); +static void compute_ae_kill PROTO ((sbitmap *, sbitmap *)); static int expr_reaches_here_p PROTO ((struct occr *, struct expr *, int, int, char *)); static rtx computing_insn PROTO ((struct expr *, rtx)); @@ -664,6 +658,9 @@ gcse_main (f, file) max_gcse_regno = max_reg_num (); find_basic_blocks (f, max_gcse_regno, file, 1); + if (file) + dump_flow_info (file); + /* Return if there's nothing to do. */ if (n_basic_blocks <= 1) { @@ -695,18 +692,7 @@ gcse_main (f, file) } gcc_obstack_init (&gcse_obstack); - - /* Allocate and compute predecessors/successors. */ - - s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr)); - s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr)); - num_preds = (int *) alloca (n_basic_blocks * sizeof (int)); - num_succs = (int *) alloca (n_basic_blocks * sizeof (int)); - bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr); - compute_preds_succs (s_preds, s_succs, num_preds, num_succs); - - if (file) - dump_bb_data (file, s_preds, s_succs, 0); + bytes_used = 0; /* Record where pseudo-registers are set. This data is kept accurate during each pass. @@ -748,7 +734,13 @@ gcse_main (f, file) if (optimize_size) changed |= one_classic_gcse_pass (pass + 1); else - changed |= one_pre_gcse_pass (pass + 1); + { + changed |= one_pre_gcse_pass (pass + 1); + free_reg_set_mem (); + alloc_reg_set_mem (max_reg_num ()); + compute_sets (f); + run_jump_opt_after_gcse = 1; + } if (max_pass_bytes < bytes_used) max_pass_bytes = bytes_used; @@ -2846,7 +2838,8 @@ expr_killed_p (x, bb) /* Compute the set of available expressions killed in each basic block. */ static void -compute_ae_kill () +compute_ae_kill (ae_gen, ae_kill) + sbitmap *ae_gen, *ae_kill; { int bb,i; @@ -2868,41 +2861,6 @@ compute_ae_kill () } } } - -/* Compute available expressions. - - Implement the algorithm to find available expressions - as given in the Aho Sethi Ullman book, pages 627-631. */ - -static void -compute_available () -{ - int bb, changed, passes; - - sbitmap_zero (ae_in[0]); - - sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/); - - for (bb = 1; bb < n_basic_blocks; bb++) - sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]); - - passes = 0; - changed = 1; - while (changed) - { - changed = 0; - for (bb = 1; bb < n_basic_blocks; bb++) - { - sbitmap_intersection_of_preds (ae_in[bb], ae_out, bb); - changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb], - ae_in[bb], ae_kill[bb]); - } - passes++; - } - - if (gcse_file) - fprintf (gcse_file, "avail expr computation: %d passes\n", passes); -} /* Actually perform the Classic GCSE optimizations. */ @@ -3364,12 +3322,15 @@ one_classic_gcse_pass (pass) expr_hash_table_size, n_exprs); if (n_exprs > 0) { + int passes; compute_kill_rd (); compute_rd (); alloc_avail_expr_mem (n_basic_blocks, n_exprs); compute_ae_gen (); - compute_ae_kill (); - compute_available (); + compute_ae_kill (ae_gen, ae_kill); + passes = compute_available (ae_gen, ae_kill, ae_out, ae_in); + if (gcse_file) + fprintf (gcse_file, "avail expr computation: %d passes\n", passes); changed = classic_gcse (); free_avail_expr_mem (); } @@ -4111,6 +4072,15 @@ static sbitmap *pre_optimal; /* Nonzero for expressions which are redundant in a particular block. */ static sbitmap *pre_redundant; +/* Nonzero for expressions which should be inserted on a specific edge. */ +static sbitmap *pre_insert_map; + +/* Nonzero for expressions which should be deleted in a specific block. */ +static sbitmap *pre_delete_map; + +/* Contains the edge_list returned by pre_edge_lcm. */ +static struct edge_list *edge_list; + static sbitmap *temp_bitmap; /* Redundant insns. */ @@ -4127,9 +4097,16 @@ alloc_pre_mem (n_blocks, n_exprs) antloc = sbitmap_vector_alloc (n_blocks, n_exprs); temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs); - pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs); - pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs); + pre_optimal = NULL; + pre_redundant = NULL; + pre_insert_map = NULL; + pre_delete_map = NULL; + ae_in = NULL; + ae_out = NULL; + u_bitmap = NULL; transpout = sbitmap_vector_alloc (n_blocks, n_exprs); + ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs); + /* pre_insert and pre_delete are allocated later. */ } /* Free vars used for PRE analysis. */ @@ -4141,10 +4118,31 @@ free_pre_mem () free (comp); free (antloc); - free (temp_bitmap); - free (pre_optimal); - free (pre_redundant); - free (transpout); + if (pre_optimal) + free (pre_optimal); + if (pre_redundant) + free (pre_redundant); + if (pre_insert_map) + free (pre_insert_map); + if (pre_delete_map) + free (pre_delete_map); + if (transpout) + free (transpout); + + if (ae_in) + free (ae_in); + if (ae_out) + free (ae_out); + if (ae_kill) + free (ae_kill); + if (u_bitmap) + free (u_bitmap); + + transp = comp = antloc = NULL; + pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; + transpout = ae_in = ae_out = ae_kill = NULL; + u_bitmap = NULL; + } /* Top level routine to do the dataflow analysis needed by PRE. */ @@ -4154,8 +4152,10 @@ compute_pre_data () { compute_local_properties (transp, comp, antloc, 0); compute_transpout (); - pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp, - antloc, pre_redundant, pre_optimal); + sbitmap_vector_zero (ae_kill, n_basic_blocks); + compute_ae_kill (comp, ae_kill); + edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc, + ae_kill, &pre_insert_map, &pre_delete_map); } @@ -4231,6 +4231,29 @@ pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp, visited) return 0; } + +/* Given an expr, generate RTL which we can insert at the end of a BB, + or on an edge. Set the block number of any insns generated to + the value of BB. */ + +static rtx +process_insert_insn (expr) + struct expr *expr; +{ + rtx reg = expr->reaching_reg; + rtx pat, copied_expr; + rtx first_new_insn; + + start_sequence (); + copied_expr = copy_rtx (expr->expr); + emit_move_insn (reg, copied_expr); + first_new_insn = get_insns (); + pat = gen_sequence (); + end_sequence (); + + return pat; +} + /* Add EXPR to the end of basic block BB. This is used by both the PRE and code hoisting. @@ -4249,15 +4272,9 @@ insert_insn_end_bb (expr, bb, pre) rtx new_insn; rtx reg = expr->reaching_reg; int regno = REGNO (reg); - rtx pat, copied_expr; - rtx first_new_insn; + rtx pat; - start_sequence (); - copied_expr = copy_rtx (expr->expr); - emit_move_insn (reg, copied_expr); - first_new_insn = get_insns (); - pat = gen_sequence (); - end_sequence (); + pat = process_insert_insn (expr); /* If the last insn is a jump, insert EXPR in front [taking care to handle cc0, etc. properly]. */ @@ -4407,37 +4424,34 @@ insert_insn_end_bb (expr, bb, pre) } } -/* Insert partially redundant expressions at the ends of appropriate basic - blocks making them fully redundant. */ +/* Insert partially redundant expressions on edges in the CFG to make + the expressions fully redundant. */ -static void -pre_insert (index_map) +static int +pre_edge_insert (edge_list, index_map) + struct edge_list *edge_list; struct expr **index_map; { - int bb, i, set_size; + int e, i, num_edges, set_size, did_insert = 0; sbitmap *inserted; - /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT. - Where INSERT is nonzero, we add the expression at the end of the basic - block if it reaches any of the deleted expressions. */ + /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge + if it reaches any of the deleted expressions. */ - set_size = pre_optimal[0]->size; - inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs); - sbitmap_vector_zero (inserted, n_basic_blocks); + set_size = pre_insert_map[0]->size; + num_edges = NUM_EDGES (edge_list); + inserted = sbitmap_vector_alloc (num_edges, n_exprs); + sbitmap_vector_zero (inserted, num_edges); - for (bb = 0; bb < n_basic_blocks; bb++) + for (e = 0; e < num_edges; e++) { int indx; + basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e); + int bb = pred->index; - /* This computes the number of potential insertions we need. */ - sbitmap_not (temp_bitmap[bb], pre_redundant[bb]); - sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]); - - /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need - to insert at the end of this basic block. */ for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) { - SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i]; + SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; int j; for (j = indx; insert && j < n_exprs; j++, insert >>= 1) @@ -4453,23 +4467,49 @@ pre_insert (index_map) if (! occr->deleted_p) continue; - /* Insert this expression at the end of BB if it would - reach the deleted occurence. */ - if (!TEST_BIT (inserted[bb], j) - && pre_expr_reaches_here_p (bb, expr, + /* Insert this expression on this edge if if it would + reach the deleted occurence in BB. */ + if (!TEST_BIT (inserted[e], j) + && (bb == ENTRY_BLOCK + || pre_expr_reaches_here_p (bb, expr, BLOCK_NUM (occr->insn), 0, - NULL)) + NULL))) { - SET_BIT (inserted[bb], j); - insert_insn_end_bb (index_map[j], bb, 1); + rtx insn; + edge eg = INDEX_EDGE (edge_list, e); + /* We can't insert anything on an abnormal + and critical edge, so we insert the + insn at the end of the previous block. There + are several alternatives detailed in + Morgans book P277 (sec 10.5) for handling + this situation. This one is easiest for now. */ + + if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL) + { + insert_insn_end_bb (index_map[j], bb, 0); + } + else + { + insn = process_insert_insn (index_map[j]); + insert_insn_on_edge (insn, eg); + } + if (gcse_file) + { + fprintf (gcse_file, + "PRE/HOIST: edge (%d,%d), copy expression %d\n", + bb, + INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index); + } + SET_BIT (inserted[e], j); + did_insert = 1; + gcse_create_count++; } } } } } } - - sbitmap_vector_free (inserted); + return did_insert; } /* Copy the result of INSN to REG. @@ -4485,23 +4525,26 @@ pre_insert_copy_insn (expr, insn) int indx = expr->bitmap_index; rtx set = single_set (insn); rtx new_insn; + int bb = BLOCK_NUM (insn); if (!set) abort (); new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)), insn); /* Keep block number table up to date. */ - set_block_num (new_insn, BLOCK_NUM (insn)); + set_block_num (new_insn, bb); /* Keep register set table up to date. */ record_one_set (regno, new_insn); + if (insn == BLOCK_END (bb)) + BLOCK_END (bb) = new_insn; gcse_create_count++; if (gcse_file) - { - fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n", - BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno); - } + fprintf (gcse_file, + "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", + BLOCK_NUM (insn), INSN_UID (new_insn), indx, + INSN_UID (insn), regno); } /* Copy available expressions that reach the redundant expression @@ -4512,11 +4555,6 @@ pre_insert_copies () { int i, bb; - for (bb = 0; bb < n_basic_blocks; bb++) - { - sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]); - } - /* For each available expression in the table, copy the result to `reaching_reg' if the expression reaches a deleted one. @@ -4550,10 +4588,6 @@ pre_insert_copies () for (avail = expr->avail_occr; avail != NULL; avail = avail->next) { rtx insn = avail->insn; - int bb = BLOCK_NUM (insn); - - if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index)) - continue; /* No need to handle this one if handled already. */ if (avail->copied_p) @@ -4591,10 +4625,7 @@ pre_delete () /* Compute the expressions which are redundant and need to be replaced by copies from the reaching reg to the target reg. */ for (bb = 0; bb < n_basic_blocks; bb++) - { - sbitmap_not (temp_bitmap[bb], pre_optimal[bb]); - sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]); - } + sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]); changed = 0; for (i = 0; i < expr_hash_table_size; i++) @@ -4681,7 +4712,7 @@ pre_delete () static int pre_gcse () { - int i; + int i, did_insert; int changed; struct expr **index_map; @@ -4708,13 +4739,15 @@ pre_gcse () - we know which insns are redundant when we go to create copies */ changed = pre_delete (); - /* Insert insns in places that make partially redundant expressions - fully redundant. */ - pre_insert (index_map); - + did_insert = pre_edge_insert (edge_list, index_map); /* In other places with reaching expressions, copy the expression to the - specially allocated pseudo-reg that reaches the redundant expression. */ + specially allocated pseudo-reg that reaches the redundant expr. */ pre_insert_copies (); + if (did_insert) + { + commit_edge_insertions (); + changed = 1; + } free (pre_redundant_insns); @@ -4735,6 +4768,7 @@ one_pre_gcse_pass (pass) gcse_create_count = 0; alloc_expr_hash_table (max_cuid); + add_noreturn_fake_exit_edges (); compute_expr_hash_table (); if (gcse_file) dump_hash_table (gcse_file, "Expression", expr_hash_table, @@ -4744,8 +4778,10 @@ one_pre_gcse_pass (pass) alloc_pre_mem (n_basic_blocks, n_exprs); compute_pre_data (); changed |= pre_gcse (); + free_edge_list (edge_list); free_pre_mem (); } + remove_fake_edges (); free_expr_hash_table (); if (gcse_file) @@ -5194,8 +5230,7 @@ compute_code_hoist_vbeinout () changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb], hoist_vbeout[bb], transp[bb]); if (bb != n_basic_blocks - 1) - sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein, - bb, s_succs); + sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb); } passes++; } diff --git a/gcc/lcm.c b/gcc/lcm.c index 01367e36d5c..c0e272107bb 100644 --- a/gcc/lcm.c +++ b/gcc/lcm.c @@ -62,214 +62,51 @@ Boston, MA 02111-1307, USA. */ #include "recog.h" #include "basic-block.h" -static void compute_antinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_earlyinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_delayinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_latein PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_isoinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_optimal PROTO ((int, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_redundant PROTO ((int, int, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); - -/* Similarly, but for the reversed flowgraph. */ -static void compute_avinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); -static void compute_fartherinout PROTO ((int, int, int_list_ptr *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_earlierinout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_firstout PROTO ((int, int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *)); -static void compute_rev_isoinout PROTO ((int, int_list_ptr *, sbitmap *, - sbitmap *, sbitmap *, sbitmap *)); - -/* Given local properties TRANSP, ANTLOC, return the redundant and optimal - computation points for expressions. - - To reduce overall memory consumption, we allocate memory immediately - before its needed and deallocate it as soon as possible. */ -void -pre_lcm (n_blocks, n_exprs, s_preds, s_succs, transp, - antloc, redundant, optimal) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *antloc; - sbitmap *redundant; - sbitmap *optimal; -{ - sbitmap *antin, *antout, *earlyin, *earlyout, *delayin, *delayout; - sbitmap *latein, *isoin, *isoout; - - /* Compute global anticipatability. ANTOUT is not needed except to - compute ANTIN, so free its memory as soon as we return from - compute_antinout. */ - antin = sbitmap_vector_alloc (n_blocks, n_exprs); - antout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_antinout (n_blocks, s_succs, antloc, - transp, antin, antout); - free (antout); - antout = NULL; - - /* Compute earliestness. EARLYOUT is not needed except to compute - EARLYIN, so free its memory as soon as we return from - compute_earlyinout. */ - earlyin = sbitmap_vector_alloc (n_blocks, n_exprs); - earlyout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin, - earlyin, earlyout); - free (earlyout); - earlyout = NULL; - - /* Compute delayedness. DELAYOUT is not needed except to compute - DELAYIN, so free its memory as soon as we return from - compute_delayinout. We also no longer need ANTIN and EARLYIN. */ - delayin = sbitmap_vector_alloc (n_blocks, n_exprs); - delayout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_delayinout (n_blocks, n_exprs, s_preds, antloc, - antin, earlyin, delayin, delayout); - free (delayout); - delayout = NULL; - free (antin); - antin = NULL; - free (earlyin); - earlyin = NULL; - - /* Compute latestness. We no longer need DELAYIN after we compute - LATEIN. */ - latein = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein); - free (delayin); - delayin = NULL; - - /* Compute isolatedness. ISOIN is not needed except to compute - ISOOUT, so free its memory as soon as we return from - compute_isoinout. */ - isoin = sbitmap_vector_alloc (n_blocks, n_exprs); - isoout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout); - free (isoin); - isoin = NULL; - - /* Now compute optimal placement points and the redundant expressions. */ - compute_optimal (n_blocks, latein, isoout, optimal); - compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant); - free (latein); - latein = NULL; - free (isoout); - isoout = NULL; -} - -/* Given local properties TRANSP, AVLOC, return the redundant and optimal - computation points for expressions on the reverse flowgraph. - - To reduce overall memory consumption, we allocate memory immediately - before its needed and deallocate it as soon as possible. */ - -void -pre_rev_lcm (n_blocks, n_exprs, s_preds, s_succs, transp, - avloc, redundant, optimal) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *avloc; - sbitmap *redundant; - sbitmap *optimal; -{ - sbitmap *avin, *avout, *fartherin, *fartherout, *earlierin, *earlierout; - sbitmap *firstout, *rev_isoin, *rev_isoout; - - /* Compute global availability. AVIN is not needed except to - compute AVOUT, so free its memory as soon as we return from - compute_avinout. */ - avin = sbitmap_vector_alloc (n_blocks, n_exprs); - avout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout); - free (avin); - avin = NULL; - - /* Compute fartherness. FARTHERIN is not needed except to compute - FARTHEROUT, so free its memory as soon as we return from - compute_earlyinout. */ - fartherin = sbitmap_vector_alloc (n_blocks, n_exprs); - fartherout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_fartherinout (n_blocks, n_exprs, s_succs, transp, - avout, fartherin, fartherout); - free (fartherin); - fartherin = NULL; - - /* Compute earlierness. EARLIERIN is not needed except to compute - EARLIEROUT, so free its memory as soon as we return from - compute_delayinout. We also no longer need AVOUT and FARTHEROUT. */ - earlierin = sbitmap_vector_alloc (n_blocks, n_exprs); - earlierout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_earlierinout (n_blocks, n_exprs, s_succs, avloc, - avout, fartherout, earlierin, earlierout); - free (earlierin); - earlierin = NULL; - free (avout); - avout = NULL; - free (fartherout); - fartherout = NULL; - - /* Compute firstness. We no longer need EARLIEROUT after we compute - FIRSTOUT. */ - firstout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout); - free (earlierout); - earlierout = NULL; - - /* Compute rev_isolatedness. ISOIN is not needed except to compute - ISOOUT, so free its memory as soon as we return from - compute_isoinout. */ - rev_isoin = sbitmap_vector_alloc (n_blocks, n_exprs); - rev_isoout = sbitmap_vector_alloc (n_blocks, n_exprs); - compute_rev_isoinout (n_blocks, s_preds, avloc, firstout, - rev_isoin, rev_isoout); - free (rev_isoout); - rev_isoout = NULL; - - /* Now compute optimal placement points and the redundant expressions. */ - compute_optimal (n_blocks, firstout, rev_isoin, optimal); - compute_redundant (n_blocks, n_exprs, avloc, firstout, rev_isoin, redundant); - free (firstout); - firstout = NULL; - free (rev_isoin); - rev_isoin = NULL; -} - -/* Compute expression anticipatability at entrance and exit of each block. */ +/* Edge based LCM routines. */ +static void compute_antinout_edge PROTO ((sbitmap *, sbitmap *, + sbitmap *, sbitmap *)); +static void compute_earliest PROTO((struct edge_list *, int, sbitmap *, + sbitmap *, sbitmap *, sbitmap *, + sbitmap *)); +static void compute_laterin PROTO((struct edge_list *, int, sbitmap *, + sbitmap *, sbitmap *, sbitmap *)); +static void compute_insert_delete PROTO ((struct edge_list *edge_list, + sbitmap *, sbitmap *, sbitmap *, + sbitmap *, sbitmap *)); + +/* Edge based LCM routines on a reverse flowgraph. */ +static void compute_farthest PROTO ((struct edge_list *, int, sbitmap *, + sbitmap *, sbitmap*, sbitmap *, + sbitmap *)); +static void compute_nearerout PROTO((struct edge_list *, int, sbitmap *, + sbitmap *, sbitmap *, sbitmap *)); +static void compute_rev_insert_delete PROTO ((struct edge_list *edge_list, + sbitmap *, sbitmap *, sbitmap *, + sbitmap *, sbitmap *)); + + +/* Edge based lcm routines. */ + +/* Compute expression anticipatability at entrance and exit of each block. + This is done based on the flow graph, and not on the pred-succ lists. + Other than that, its pretty much identical to compute_antinout. */ static void -compute_antinout (n_blocks, s_succs, antloc, transp, antin, antout) - int n_blocks; - int_list_ptr *s_succs; +compute_antinout_edge (antloc, transp, antin, antout) sbitmap *antloc; sbitmap *transp; sbitmap *antin; sbitmap *antout; { - int bb, changed, passes; + int i, changed, passes; sbitmap old_changed, new_changed; + edge e; - sbitmap_zero (antout[n_blocks - 1]); - sbitmap_vector_ones (antin, n_blocks); + sbitmap_vector_zero (antout, n_basic_blocks); + sbitmap_vector_ones (antin, n_basic_blocks); - old_changed = sbitmap_alloc (n_blocks); - new_changed = sbitmap_alloc (n_blocks); + old_changed = sbitmap_alloc (n_basic_blocks); + new_changed = sbitmap_alloc (n_basic_blocks); sbitmap_ones (old_changed); passes = 0; @@ -278,522 +115,568 @@ compute_antinout (n_blocks, s_succs, antloc, transp, antin, antout) { changed = 0; sbitmap_zero (new_changed); + /* We scan the blocks in the reverse order to speed up the convergence. */ - for (bb = n_blocks - 1; bb >= 0; bb--) + for (i = n_basic_blocks - 1; i >= 0; i--) { - int_list_ptr ps; - + basic_block bb = BASIC_BLOCK (i); /* If none of the successors of this block have changed, then this block is not going to change. */ - for (ps = s_succs[bb] ; ps; ps = ps->next) + for (e = bb->succ ; e; e = e->succ_next) { - if (INT_LIST_VAL (ps) == EXIT_BLOCK - || INT_LIST_VAL (ps) == ENTRY_BLOCK) + if (e->dest == EXIT_BLOCK_PTR) break; - if (TEST_BIT (old_changed, INT_LIST_VAL (ps)) - || TEST_BIT (new_changed, INT_LIST_VAL (ps))) + if (TEST_BIT (old_changed, e->dest->index) + || TEST_BIT (new_changed, e->dest->index)) break; } - if (!ps) + if (!e) continue; - if (bb != n_blocks - 1) - sbitmap_intersect_of_successors (antout[bb], antin, - bb, s_succs); - if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], - transp[bb], antout[bb])) + /* If an Exit blocks is the ONLY successor, its has a zero ANTIN, + which is the opposite of the default definition for an + intersection of succs definition. */ + if (e->dest == EXIT_BLOCK_PTR && e->succ_next == NULL + && e->src->succ == e) + sbitmap_zero (antout[bb->index]); + else + { + sbitmap_intersection_of_succs (antout[bb->index], + antin, + bb->index); + } + + if (sbitmap_a_or_b_and_c (antin[bb->index], antloc[bb->index], + transp[bb->index], antout[bb->index])) { changed = 1; - SET_BIT (new_changed, bb); + SET_BIT (new_changed, bb->index); } } sbitmap_copy (old_changed, new_changed); passes++; } + free (old_changed); free (new_changed); } -/* Compute expression earliestness at entrance and exit of each block. - - From Advanced Compiler Design and Implementation pp411. - - An expression is earliest at the entrance to basic block BB if no - block from entry to block BB both evaluates the expression and - produces the same value as evaluating it at the entry to block BB - does. Similarly for earlistness at basic block BB exit. */ - +/* Compute the earliest vector for edge based lcm. */ static void -compute_earlyinout (n_blocks, n_exprs, s_preds, transp, antin, - earlyin, earlyout) - int n_blocks; +compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest) + struct edge_list *edge_list; int n_exprs; - int_list_ptr *s_preds; - sbitmap *transp; - sbitmap *antin; - sbitmap *earlyin; - sbitmap *earlyout; + sbitmap *antin, *antout, *avout, *kill, *earliest; { - int bb, changed, passes; - sbitmap temp_bitmap; - sbitmap old_changed, new_changed; - - temp_bitmap = sbitmap_alloc (n_exprs); + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; - sbitmap_vector_zero (earlyout, n_blocks); - sbitmap_ones (earlyin[0]); + num_edges = NUM_EDGES (edge_list); - old_changed = sbitmap_alloc (n_blocks); - new_changed = sbitmap_alloc (n_blocks); - sbitmap_ones (old_changed); + difference = sbitmap_alloc (n_exprs); + temp_bitmap = sbitmap_alloc (n_exprs); - passes = 0; - changed = 1; - while (changed) + for (x = 0; x < num_edges; x++) { - changed = 0; - sbitmap_zero (new_changed); - for (bb = 0; bb < n_blocks; bb++) - { - int_list_ptr ps; - - /* If none of the predecessors of this block have changed, - then this block is not going to change. */ - for (ps = s_preds[bb] ; ps; ps = ps->next) + pred = INDEX_EDGE_PRED_BB (edge_list, x); + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (pred == ENTRY_BLOCK_PTR) + sbitmap_copy (earliest[x], antin[succ->index]); + else + { + if (succ == EXIT_BLOCK_PTR) { - if (INT_LIST_VAL (ps) == EXIT_BLOCK - || INT_LIST_VAL (ps) == ENTRY_BLOCK) - break; - - if (TEST_BIT (old_changed, INT_LIST_VAL (ps)) - || TEST_BIT (new_changed, INT_LIST_VAL (ps))) - break; + sbitmap_zero (earliest[x]); } - - if (!ps) - continue; - - if (bb != 0) - sbitmap_union_of_predecessors (earlyin[bb], earlyout, - bb, s_preds); - sbitmap_not (temp_bitmap, transp[bb]); - if (sbitmap_union_of_diff (earlyout[bb], temp_bitmap, - earlyin[bb], antin[bb])) + else { - changed = 1; - SET_BIT (new_changed, bb); + sbitmap_difference (difference, antin[succ->index], + avout[pred->index]); + sbitmap_not (temp_bitmap, antout[pred->index]); + sbitmap_a_and_b_or_c (earliest[x], difference, kill[pred->index], + temp_bitmap); } } - sbitmap_copy (old_changed, new_changed); - passes++; } - free (old_changed); - free (new_changed); free (temp_bitmap); + free (difference); } -/* Compute expression delayedness at entrance and exit of each block. - - From Advanced Compiler Design and Implementation pp411. - - An expression is delayed at the entrance to BB if it is anticipatable - and earliest at that point and if all subsequent computations of - the expression are in block BB. */ - +/* Compute later and laterin vectors for edge based lcm. */ static void -compute_delayinout (n_blocks, n_exprs, s_preds, antloc, - antin, earlyin, delayin, delayout) - int n_blocks; +compute_laterin (edge_list, n_exprs, + earliest, antloc, later, laterin) + struct edge_list *edge_list; int n_exprs; - int_list_ptr *s_preds; - sbitmap *antloc; - sbitmap *antin; - sbitmap *earlyin; - sbitmap *delayin; - sbitmap *delayout; + sbitmap *earliest, *antloc, *later, *laterin; { - int bb, changed, passes; - sbitmap *anti_and_early; - sbitmap temp_bitmap; + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; + int done = 0; - temp_bitmap = sbitmap_alloc (n_exprs); + num_edges = NUM_EDGES (edge_list); - /* This is constant throughout the flow equations below, so compute - it once to save time. */ - anti_and_early = sbitmap_vector_alloc (n_blocks, n_exprs); - for (bb = 0; bb < n_blocks; bb++) - sbitmap_a_and_b (anti_and_early[bb], antin[bb], earlyin[bb]); - - sbitmap_vector_zero (delayout, n_blocks); - sbitmap_copy (delayin[0], anti_and_early[0]); + /* Laterin has an extra block allocated for the exit block. */ + sbitmap_vector_ones (laterin, n_basic_blocks + 1); + sbitmap_vector_zero (later, num_edges); - passes = 0; - changed = 1; - while (changed) + /* Initialize laterin to the intersection of EARLIEST for all edges + from predecessors to this block. */ + + for (x = 0; x < num_edges; x++) { - changed = 0; - for (bb = 0; bb < n_blocks; bb++) + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + pred = INDEX_EDGE_PRED_BB (edge_list, x); + if (succ != EXIT_BLOCK_PTR) + sbitmap_a_and_b (laterin[succ->index], laterin[succ->index], + earliest[x]); + /* We already know the correct value of later for edges from + the entry node, so set it now. */ + if (pred == ENTRY_BLOCK_PTR) + sbitmap_copy (later[x], earliest[x]); + } + + difference = sbitmap_alloc (n_exprs); + + while (!done) + { + done = 1; + for (x = 0; x < num_edges; x++) { - if (bb != 0) + pred = INDEX_EDGE_PRED_BB (edge_list, x); + if (pred != ENTRY_BLOCK_PTR) { - sbitmap_intersect_of_predecessors (temp_bitmap, delayout, - bb, s_preds); - changed |= sbitmap_a_or_b (delayin[bb], - anti_and_early[bb], - temp_bitmap); + sbitmap_difference (difference, laterin[pred->index], + antloc[pred->index]); + if (sbitmap_a_or_b (later[x], difference, earliest[x])) + done = 0; } - sbitmap_not (temp_bitmap, antloc[bb]); - changed |= sbitmap_a_and_b (delayout[bb], - temp_bitmap, - delayin[bb]); } - passes++; + if (done) + break; + + sbitmap_vector_ones (laterin, n_basic_blocks); + + for (x = 0; x < num_edges; x++) + { + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (succ != EXIT_BLOCK_PTR) + sbitmap_a_and_b (laterin[succ->index], laterin[succ->index], + later[x]); + else + /* We allocated an extra block for the exit node. */ + sbitmap_a_and_b (laterin[n_basic_blocks], laterin[n_basic_blocks], + later[x]); + } } - /* We're done with this, so go ahead and free it's memory now instead - of waiting until the end of pre. */ - free (anti_and_early); - free (temp_bitmap); + free (difference); } -/* Compute latestness. +/* Compute the insertion and deletion points for edge based LCM. */ +static void +compute_insert_delete (edge_list, antloc, later, laterin, + insert, delete) + struct edge_list *edge_list; + sbitmap *antloc, *later, *laterin, *insert, *delete; +{ + int x; - From Advanced Compiler Design and Implementation pp412. + for (x = 0; x < n_basic_blocks; x++) + sbitmap_difference (delete[x], antloc[x], laterin[x]); + + for (x = 0; x < NUM_EDGES (edge_list); x++) + { + basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x); + if (b == EXIT_BLOCK_PTR) + sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]); + else + sbitmap_difference (insert[x], later[x], laterin[b->index]); + } +} - An expression is latest at the entrance to block BB if that is an optimal - point for computing the expression and if on every path from block BB's - entrance to the exit block, any optimal computation point for the - expression occurs after one of the points at which the expression was - computed in the original flowgraph. */ +/* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the + insert and delete vectors for edge based LCM. Returns an + edgelist which is used to map the insert vector to what edge + an expression should be inserted on. */ -static void -compute_latein (n_blocks, n_exprs, s_succs, antloc, delayin, latein) - int n_blocks; +struct edge_list * +pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete) + FILE *file; int n_exprs; - int_list_ptr *s_succs; + sbitmap *transp; + sbitmap *avloc; sbitmap *antloc; - sbitmap *delayin; - sbitmap *latein; + sbitmap *kill; + sbitmap **insert; + sbitmap **delete; { - int bb; - sbitmap temp_bitmap; + sbitmap *antin, *antout, *earliest; + sbitmap *avin, *avout; + sbitmap *later, *laterin; + struct edge_list *edge_list; + int num_edges; - temp_bitmap = sbitmap_alloc (n_exprs); + edge_list = create_edge_list (); + num_edges = NUM_EDGES (edge_list); - for (bb = 0; bb < n_blocks; bb++) +#ifdef LCM_DEBUG_INFO + if (file) { - /* The last block is succeeded only by the exit block; therefore, - temp_bitmap will not be set by the following call! */ - if (bb == n_blocks - 1) - { - sbitmap_intersect_of_successors (temp_bitmap, delayin, - bb, s_succs); - sbitmap_not (temp_bitmap, temp_bitmap); - } - else - sbitmap_ones (temp_bitmap); - sbitmap_a_and_b_or_c (latein[bb], delayin[bb], - antloc[bb], temp_bitmap); + fprintf (file, "Edge List:\n"); + verify_edge_list (file, edge_list); + print_edge_list (file, edge_list); + dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); + dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks); + dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks); + dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks); } - free (temp_bitmap); -} - -/* Compute isolated. +#endif - From Advanced Compiler Design and Implementation pp413. + /* Compute global availability. */ + avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_available (avloc, kill, avout, avin); - A computationally optimal placement for the evaluation of an expression - is defined to be isolated if and only if on every path from a successor - of the block in which it is computed to the exit block, every original - computation of the expression is preceded by the optimal placement point. */ - -static void -compute_isoinout (n_blocks, s_succs, antloc, latein, isoin, isoout) - int n_blocks; - int_list_ptr *s_succs; - sbitmap *antloc; - sbitmap *latein; - sbitmap *isoin; - sbitmap *isoout; -{ - int bb, changed, passes; + free (avin); - sbitmap_vector_zero (isoin, n_blocks); - sbitmap_zero (isoout[n_blocks - 1]); + /* Compute global anticipatability. */ + antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_antinout_edge (antloc, transp, antin, antout); - passes = 0; - changed = 1; - while (changed) +#ifdef LCM_DEBUG_INFO + if (file) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) - { - if (bb != n_blocks - 1) - sbitmap_intersect_of_successors (isoout[bb], isoin, - bb, s_succs); - changed |= sbitmap_union_of_diff (isoin[bb], latein[bb], - isoout[bb], antloc[bb]); - } - passes++; + dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks); + dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks); } -} +#endif -/* Compute the set of expressions which have optimal computational points - in each basic block. This is the set of expressions that are latest, but - that are not isolated in the block. */ + /* Compute earliestness. */ + earliest = sbitmap_vector_alloc (num_edges, n_exprs); + compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest); -static void -compute_optimal (n_blocks, latein, isoout, optimal) - int n_blocks; - sbitmap *latein; - sbitmap *isoout; - sbitmap *optimal; -{ - int bb; +#ifdef LCM_DEBUG_INFO + if (file) + dump_sbitmap_vector (file, "earliest", "", earliest, num_edges); +#endif - for (bb = 0; bb < n_blocks; bb++) - sbitmap_difference (optimal[bb], latein[bb], isoout[bb]); -} + free (antout); + free (antin); + free (avout); -/* Compute the set of expressions that are redundant in a block. They are - the expressions that are used in the block and that are neither isolated - or latest. */ + later = sbitmap_vector_alloc (num_edges, n_exprs); + /* Allocate an extra element for the exit block in the laterin vector. */ + laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); + compute_laterin (edge_list, n_exprs, earliest, antloc, later, laterin); -static void -compute_redundant (n_blocks, n_exprs, antloc, latein, isoout, redundant) - int n_blocks; - int n_exprs; - sbitmap *antloc; - sbitmap *latein; - sbitmap *isoout; - sbitmap *redundant; -{ - int bb; - sbitmap temp_bitmap; +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1); + dump_sbitmap_vector (file, "later", "", later, num_edges); + } +#endif - temp_bitmap = sbitmap_alloc (n_exprs); + free (earliest); + + *insert = sbitmap_vector_alloc (num_edges, n_exprs); + *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete); - for (bb = 0; bb < n_blocks; bb++) + free (laterin); + free (later); + +#ifdef LCM_DEBUG_INFO + if (file) { - sbitmap_a_or_b (temp_bitmap, latein[bb], isoout[bb]); - sbitmap_difference (redundant[bb], antloc[bb], temp_bitmap); + dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); + dump_sbitmap_vector (file, "pre_delete_map", "", *delete, n_basic_blocks); } - free (temp_bitmap); -} +#endif -/* Compute expression availability at entrance and exit of each block. */ + return edge_list; +} -static void -compute_avinout (n_blocks, s_preds, avloc, transp, avin, avout) - int n_blocks; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *transp; - sbitmap *avin; - sbitmap *avout; +/* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors. + Return the number of passes we performed to iterate to a solution. */ +int +compute_available (avloc, kill, avout, avin) + sbitmap *avloc, *kill, *avout, *avin; { int bb, changed, passes; + int last = n_basic_blocks - 1; sbitmap_zero (avin[0]); - sbitmap_vector_ones (avout, n_blocks); + sbitmap_copy (avout[0] /*dst*/, avloc[0] /*src*/); + for (bb = 1; bb < n_basic_blocks; bb++) + sbitmap_not (avout[bb], kill[bb]); + passes = 0; changed = 1; while (changed) { changed = 0; - for (bb = 0; bb < n_blocks; bb++) - { - if (bb != 0) - sbitmap_intersect_of_predecessors (avin[bb], avout, - bb, s_preds); - changed |= sbitmap_a_or_b_and_c (avout[bb], avloc[bb], - transp[bb], avin[bb]); - } + for (bb = 1; bb < n_basic_blocks; bb++) + { + sbitmap_intersection_of_preds (avin[bb], avout, bb); + changed |= sbitmap_union_of_diff (avout[bb], avloc[bb], + avin[bb], kill[bb]); + } passes++; } + return passes; } -/* Compute expression latestness. - - This is effectively the same as earliestness computed on the reverse - flow graph. */ - +/* Compute the farthest vector for edge based lcm. */ static void -compute_fartherinout (n_blocks, n_exprs, s_succs, - transp, avout, fartherin, fartherout) - int n_blocks; +compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, + kill, farthest) + struct edge_list *edge_list; int n_exprs; - int_list_ptr *s_succs; - sbitmap *transp; - sbitmap *avout; - sbitmap *fartherin; - sbitmap *fartherout; + sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest; { - int bb, changed, passes; - sbitmap temp_bitmap; + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; - temp_bitmap = sbitmap_alloc (n_exprs); + num_edges = NUM_EDGES (edge_list); - sbitmap_vector_zero (fartherin, n_blocks); - sbitmap_ones (fartherout[n_blocks - 1]); + difference = sbitmap_alloc (n_exprs); + temp_bitmap = sbitmap_alloc (n_exprs); - passes = 0; - changed = 1; - while (changed) + for (x = 0; x < num_edges; x++) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) + pred = INDEX_EDGE_PRED_BB (edge_list, x); + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (succ == EXIT_BLOCK_PTR) + sbitmap_copy (farthest[x], st_avout[pred->index]); + else { - if (bb != n_blocks - 1) - sbitmap_union_of_successors (fartherout[bb], fartherin, - bb, s_succs); - sbitmap_not (temp_bitmap, transp[bb]); - changed |= sbitmap_union_of_diff (fartherin[bb], temp_bitmap, - fartherout[bb], avout[bb]); + if (pred == ENTRY_BLOCK_PTR) + { + sbitmap_zero (farthest[x]); + } + else + { + sbitmap_difference (difference, st_avout[pred->index], + st_antin[succ->index]); + sbitmap_not (temp_bitmap, st_avin[succ->index]); + sbitmap_a_and_b_or_c (farthest[x], difference, + kill[succ->index], temp_bitmap); + } } - passes++; } - free (temp_bitmap); + free (difference); } -/* Compute expression earlierness at entrance and exit of each block. - - This is effectively the same as delayedness computed on the reverse - flow graph. */ - +/* Compute nearer and nearerout vectors for edge based lcm. */ static void -compute_earlierinout (n_blocks, n_exprs, s_succs, avloc, - avout, fartherout, earlierin, earlierout) - int n_blocks; +compute_nearerout (edge_list, n_exprs, + farthest, st_avloc, nearer, nearerout) + struct edge_list *edge_list; int n_exprs; - int_list_ptr *s_succs; - sbitmap *avloc; - sbitmap *avout; - sbitmap *fartherout; - sbitmap *earlierin; - sbitmap *earlierout; + sbitmap *farthest, *st_avloc, *nearer, *nearerout; { - int bb, changed, passes; - sbitmap *av_and_farther; - sbitmap temp_bitmap; + sbitmap difference, temp_bitmap; + int x, num_edges; + basic_block pred, succ; + int done = 0; - temp_bitmap = sbitmap_alloc (n_exprs); + num_edges = NUM_EDGES (edge_list); - /* This is constant throughout the flow equations below, so compute - it once to save time. */ - av_and_farther = sbitmap_vector_alloc (n_blocks, n_exprs); - for (bb = 0; bb < n_blocks; bb++) - sbitmap_a_and_b (av_and_farther[bb], avout[bb], fartherout[bb]); - - sbitmap_vector_zero (earlierin, n_blocks); - sbitmap_copy (earlierout[n_blocks - 1], av_and_farther[n_blocks - 1]); + /* nearout has an extra block allocated for the entry block. */ + sbitmap_vector_ones (nearerout, n_basic_blocks + 1); + sbitmap_vector_zero (nearer, num_edges); - passes = 0; - changed = 1; - while (changed) + /* Initialize nearerout to the intersection of FARTHEST for all edges + from predecessors to this block. */ + + for (x = 0; x < num_edges; x++) { - changed = 0; - for (bb = n_blocks - 1; bb >= 0; bb--) + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + pred = INDEX_EDGE_PRED_BB (edge_list, x); + if (pred != ENTRY_BLOCK_PTR) + { + sbitmap_a_and_b (nearerout[pred->index], nearerout[pred->index], + farthest[x]); + } + /* We already know the correct value of nearer for edges to + the exit node. */ + if (succ == EXIT_BLOCK_PTR) + sbitmap_copy (nearer[x], farthest[x]); + } + + difference = sbitmap_alloc (n_exprs); + + while (!done) + { + done = 1; + for (x = 0; x < num_edges; x++) { - if (bb != n_blocks - 1) + succ = INDEX_EDGE_SUCC_BB (edge_list, x); + if (succ != EXIT_BLOCK_PTR) { - sbitmap_intersect_of_successors (temp_bitmap, earlierin, - bb, s_succs); - changed |= sbitmap_a_or_b (earlierout[bb], - av_and_farther[bb], - temp_bitmap); + sbitmap_difference (difference, nearerout[succ->index], + st_avloc[succ->index]); + if (sbitmap_a_or_b (nearer[x], difference, farthest[x])) + done = 0; } - sbitmap_not (temp_bitmap, avloc[bb]); - changed |= sbitmap_a_and_b (earlierin[bb], - temp_bitmap, - earlierout[bb]); } - passes++; - } - /* We're done with this, so go ahead and free it's memory now instead - of waiting until the end of pre. */ - free (av_and_farther); - free (temp_bitmap); -} + if (done) + break; -/* Compute firstness. + sbitmap_vector_zero (nearerout, n_basic_blocks); + + for (x = 0; x < num_edges; x++) + { + pred = INDEX_EDGE_PRED_BB (edge_list, x); + if (pred != ENTRY_BLOCK_PTR) + sbitmap_a_and_b (nearerout[pred->index], + nearerout[pred->index], nearer[x]); + else + sbitmap_a_and_b (nearerout[n_basic_blocks], + nearerout[n_basic_blocks], nearer[x]); + } + } - This is effectively the same as latestness computed on the reverse - flow graph. */ + free (difference); +} +/* Compute the insertion and deletion points for edge based LCM. */ static void -compute_firstout (n_blocks, n_exprs, s_preds, avloc, earlierout, firstout) - int n_blocks; - int n_exprs; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *earlierout; - sbitmap *firstout; +compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, + insert, delete) + struct edge_list *edge_list; + sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete; { - int bb; - sbitmap temp_bitmap; + int x; - temp_bitmap = sbitmap_alloc (n_exprs); - - for (bb = 0; bb < n_blocks; bb++) + for (x = 0; x < n_basic_blocks; x++) + sbitmap_difference (delete[x], st_avloc[x], nearerout[x]); + + for (x = 0; x < NUM_EDGES (edge_list); x++) { - /* The first block is preceded only by the entry block; therefore, - temp_bitmap will not be set by the following call! */ - if (bb != 0) - { - sbitmap_intersect_of_predecessors (temp_bitmap, earlierout, - bb, s_preds); - sbitmap_not (temp_bitmap, temp_bitmap); - } + basic_block b = INDEX_EDGE_PRED_BB (edge_list, x); + if (b == ENTRY_BLOCK_PTR) + sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]); else - { - sbitmap_ones (temp_bitmap); - } - sbitmap_a_and_b_or_c (firstout[bb], earlierout[bb], - avloc[bb], temp_bitmap); + sbitmap_difference (insert[x], nearer[x], nearerout[b->index]); } - free (temp_bitmap); } -/* Compute reverse isolated. - - This is effectively the same as isolatedness computed on the reverse - flow graph. */ +/* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the + insert and delete vectors for edge based reverse LCM. Returns an + edgelist which is used to map the insert vector to what edge + an expression should be inserted on. */ -static void -compute_rev_isoinout (n_blocks, s_preds, avloc, firstout, - rev_isoin, rev_isoout) - int n_blocks; - int_list_ptr *s_preds; - sbitmap *avloc; - sbitmap *firstout; - sbitmap *rev_isoin; - sbitmap *rev_isoout; +struct edge_list * +pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill, + insert, delete) + FILE *file; + int n_exprs; + sbitmap *transp; + sbitmap *st_avloc; + sbitmap *st_antloc; + sbitmap *kill; + sbitmap **insert; + sbitmap **delete; { - int bb, changed, passes; + sbitmap *st_antin, *st_antout; + sbitmap *st_avout, *st_avin, *farthest; + sbitmap *nearer, *nearerout; + struct edge_list *edge_list; + int x,num_edges; + + edge_list = create_edge_list (); + num_edges = NUM_EDGES (edge_list); + + st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); + st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); + sbitmap_vector_zero (st_antin, n_basic_blocks); + sbitmap_vector_zero (st_antout, n_basic_blocks); + compute_antinout_edge (st_antloc, transp, st_antin, st_antout); + + /* Compute global anticipatability. */ + st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_available (st_avloc, kill, st_avout, st_avin); + +#ifdef LCM_DEBUG_INFO + if (file) + { + fprintf (file, "Edge List:\n"); + verify_edge_list (file, edge_list); + print_edge_list (file, edge_list); + dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); + dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks); + dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks); + dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks); + dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks); + dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks); + } +#endif - sbitmap_vector_zero (rev_isoout, n_blocks); - sbitmap_zero (rev_isoin[0]); +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks); + dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks); + } +#endif - passes = 0; - changed = 1; - while (changed) + /* Compute farthestness. */ + farthest = sbitmap_vector_alloc (num_edges, n_exprs); + compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, + kill, farthest); + +#ifdef LCM_DEBUG_INFO + if (file) + dump_sbitmap_vector (file, "farthest", "", farthest, num_edges); +#endif + + free (st_avin); + free (st_avout); + + nearer = sbitmap_vector_alloc (num_edges, n_exprs); + /* Allocate an extra element for the entry block. */ + nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); + compute_nearerout (edge_list, n_exprs, farthest, st_avloc, nearer, nearerout); + +#ifdef LCM_DEBUG_INFO + if (file) { - changed = 0; - for (bb = 0; bb < n_blocks; bb++) - { - if (bb != 0) - sbitmap_intersect_of_predecessors (rev_isoin[bb], rev_isoout, - bb, s_preds); - changed |= sbitmap_union_of_diff (rev_isoout[bb], firstout[bb], - rev_isoin[bb], avloc[bb]); - } - passes++; + dump_sbitmap_vector (file, "nearerout", "", nearerout, + n_basic_blocks + 1); + dump_sbitmap_vector (file, "nearer", "", nearer, num_edges); } +#endif + + free (farthest); + + *insert = sbitmap_vector_alloc (num_edges, n_exprs); + *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); + compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, *insert, *delete); + + free (nearerout); + free (nearer); + +#ifdef LCM_DEBUG_INFO + if (file) + { + dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); + dump_sbitmap_vector (file, "pre_delete_map", "", *delete, n_basic_blocks); + } +#endif + + return edge_list; }