1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 People wishing to speed up the code here should read:
130 Elimination Algorithms for Data Flow Analysis
131 B.G. Ryder, M.C. Paull
132 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
134 How to Analyze Large Programs Efficiently and Informatively
135 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
136 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
138 People wishing to do something different can find various possibilities
139 in the above papers and elsewhere.
149 #include "hard-reg-set.h"
152 #include "insn-config.h"
154 #include "basic-block.h"
156 #include "function.h"
160 #define obstack_chunk_alloc gmalloc
161 #define obstack_chunk_free free
163 /* Maximum number of passes to perform. */
166 /* Propagate flow information through back edges and thus enable PRE's
167 moving loop invariant calculations out of loops.
169 Originally this tended to create worse overall code, but several
170 improvements during the development of PRE seem to have made following
171 back edges generally a win.
173 Note much of the loop invariant code motion done here would normally
174 be done by loop.c, which has more heuristics for when to move invariants
175 out of loops. At some point we might need to move some of those
176 heuristics into gcse.c. */
177 #define FOLLOW_BACK_EDGES 1
179 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
180 are a superset of those done by GCSE.
182 We perform the following steps:
184 1) Compute basic block information.
186 2) Compute table of places where registers are set.
188 3) Perform copy/constant propagation.
190 4) Perform global cse.
192 5) Perform another pass of copy/constant propagation.
194 Two passes of copy/constant propagation are done because the first one
195 enables more GCSE and the second one helps to clean up the copies that
196 GCSE creates. This is needed more for PRE than for Classic because Classic
197 GCSE will try to use an existing register containing the common
198 subexpression rather than create a new one. This is harder to do for PRE
199 because of the code motion (which Classic GCSE doesn't do).
201 Expressions we are interested in GCSE-ing are of the form
202 (set (pseudo-reg) (expression)).
203 Function want_to_gcse_p says what these are.
205 PRE handles moving invariant expressions out of loops (by treating them as
206 partially redundant).
208 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
209 assignment) based GVN (global value numbering). L. T. Simpson's paper
210 (Rice University) on value numbering is a useful reference for this.
212 **********************
214 We used to support multiple passes but there are diminishing returns in
215 doing so. The first pass usually makes 90% of the changes that are doable.
216 A second pass can make a few more changes made possible by the first pass.
217 Experiments show any further passes don't make enough changes to justify
220 A study of spec92 using an unlimited number of passes:
221 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
222 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
223 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
225 It was found doing copy propagation between each pass enables further
228 PRE is quite expensive in complicated functions because the DFA can take
229 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
230 be modified if one wants to experiment.
232 **********************
234 The steps for PRE are:
236 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
238 2) Perform the data flow analysis for PRE.
240 3) Delete the redundant instructions
242 4) Insert the required copies [if any] that make the partially
243 redundant instructions fully redundant.
245 5) For other reaching expressions, insert an instruction to copy the value
246 to a newly created pseudo that will reach the redundant instruction.
248 The deletion is done first so that when we do insertions we
249 know which pseudo reg to use.
251 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
252 argue it is not. The number of iterations for the algorithm to converge
253 is typically 2-4 so I don't view it as that expensive (relatively speaking).
255 PRE GCSE depends heavily on the second CSE pass to clean up the copies
256 we create. To make an expression reach the place where it's redundant,
257 the result of the expression is copied to a new register, and the redundant
258 expression is deleted by replacing it with this new register. Classic GCSE
259 doesn't have this problem as much as it computes the reaching defs of
260 each register in each block and thus can try to use an existing register.
262 **********************
264 A fair bit of simplicity is created by creating small functions for simple
265 tasks, even when the function is only called in one place. This may
266 measurably slow things down [or may not] by creating more function call
267 overhead than is necessary. The source is laid out so that it's trivial
268 to make the affected functions inline so that one can measure what speed
269 up, if any, can be achieved, and maybe later when things settle things can
272 Help stamp out big monolithic functions! */
274 /* GCSE global vars. */
277 static FILE *gcse_file;
279 /* Note whether or not we should run jump optimization after gcse. We
280 want to do this for two cases.
282 * If we changed any jumps via cprop.
284 * If we added any labels via edge splitting. */
286 static int run_jump_opt_after_gcse;
288 /* Element I is a list of I's predecessors/successors. */
289 static int_list_ptr *s_preds;
290 static int_list_ptr *s_succs;
292 /* Element I is the number of predecessors/successors of basic block I. */
293 static int *num_preds;
294 static int *num_succs;
296 /* Bitmaps are normally not included in debugging dumps.
297 However it's useful to be able to print them from GDB.
298 We could create special functions for this, but it's simpler to
299 just allow passing stderr to the dump_foo fns. Since stderr can
300 be a macro, we store a copy here. */
301 static FILE *debug_stderr;
303 /* An obstack for our working variables. */
304 static struct obstack gcse_obstack;
306 /* Non-zero for each mode that supports (set (reg) (reg)).
307 This is trivially true for integer and floating point values.
308 It may or may not be true for condition codes. */
309 static char can_copy_p[(int) NUM_MACHINE_MODES];
311 /* Non-zero if can_copy_p has been initialized. */
312 static int can_copy_init_p;
318 /* Hash table of expressions. */
322 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
324 /* Index in the available expression bitmaps. */
326 /* Next entry with the same hash. */
327 struct expr *next_same_hash;
328 /* List of anticipatable occurrences in basic blocks in the function.
329 An "anticipatable occurrence" is one that is the first occurrence in the
330 basic block, the operands are not modified in the basic block prior
331 to the occurrence and the output is not used between the start of
332 the block and the occurrence. */
333 struct occr *antic_occr;
334 /* List of available occurrence in basic blocks in the function.
335 An "available occurrence" is one that is the last occurrence in the
336 basic block and the operands are not modified by following statements in
337 the basic block [including this insn]. */
338 struct occr *avail_occr;
339 /* Non-null if the computation is PRE redundant.
340 The value is the newly created pseudo-reg to record a copy of the
341 expression in all the places that reach the redundant copy. */
345 /* Occurrence of an expression.
346 There is one per basic block. If a pattern appears more than once the
347 last appearance is used [or first for anticipatable expressions]. */
351 /* Next occurrence of this expression. */
353 /* The insn that computes the expression. */
355 /* Non-zero if this [anticipatable] occurrence has been deleted. */
357 /* Non-zero if this [available] occurrence has been copied to
359 /* ??? This is mutually exclusive with deleted_p, so they could share
364 /* Expression and copy propagation hash tables.
365 Each hash table is an array of buckets.
366 ??? It is known that if it were an array of entries, structure elements
367 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
368 not clear whether in the final analysis a sufficient amount of memory would
369 be saved as the size of the available expression bitmaps would be larger
370 [one could build a mapping table without holes afterwards though].
371 Someday I'll perform the computation and figure it out.
374 /* Total size of the expression hash table, in elements. */
375 static int expr_hash_table_size;
377 This is an array of `expr_hash_table_size' elements. */
378 static struct expr **expr_hash_table;
380 /* Total size of the copy propagation hash table, in elements. */
381 static int set_hash_table_size;
383 This is an array of `set_hash_table_size' elements. */
384 static struct expr **set_hash_table;
386 /* Mapping of uids to cuids.
387 Only real insns get cuids. */
388 static int *uid_cuid;
390 /* Highest UID in UID_CUID. */
393 /* Get the cuid of an insn. */
394 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
396 /* Number of cuids. */
399 /* Mapping of cuids to insns. */
400 static rtx *cuid_insn;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static int max_gcse_regno;
410 /* Maximum number of cse-able expressions found. */
412 /* Maximum number of assignments for copy propagation found. */
415 /* Table of registers that are modified.
416 For each register, each element is a list of places where the pseudo-reg
419 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
420 requires knowledge of which blocks kill which regs [and thus could use
421 a bitmap instead of the lists `reg_set_table' uses].
423 `reg_set_table' and could be turned into an array of bitmaps
425 [however perhaps it may be useful to keep the data as is].
426 One advantage of recording things this way is that `reg_set_table' is
427 fairly sparse with respect to pseudo regs but for hard regs could be
428 fairly dense [relatively speaking].
429 And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over hard-regs
437 stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set {
440 /* The next setting of this register. */
441 struct reg_set *next;
442 /* The insn where it was set. */
445 static reg_set **reg_set_table;
446 /* Size of `reg_set_table'.
447 The table starts out at max_gcse_regno + slop, and is enlarged as
449 static int reg_set_table_size;
450 /* Amount to grow `reg_set_table' by when it's full. */
451 #define REG_SET_TABLE_SLOP 100
453 /* Bitmap containing one bit for each register in the program.
454 Used when performing GCSE to track which registers have been set since
455 the start of the basic block. */
456 static sbitmap reg_set_bitmap;
458 /* For each block, a bitmap of registers set in the block.
459 This is used by expr_killed_p and compute_transp.
460 It is computed during hash table computation and not by compute_sets
461 as it includes registers added since the last pass (or between cprop and
462 gcse) and it's currently not easy to realloc sbitmap vectors. */
463 static sbitmap *reg_set_in_block;
465 /* For each block, non-zero if memory is set in that block.
466 This is computed during hash table computation and is used by
467 expr_killed_p and compute_transp.
468 ??? Handling of memory is very simple, we don't make any attempt
469 to optimize things (later).
470 ??? This can be computed by compute_sets since the information
472 static char *mem_set_in_block;
474 /* Various variables for statistics gathering. */
476 /* Memory used in a pass.
477 This isn't intended to be absolutely precise. Its intent is only
478 to keep an eye on memory usage. */
479 static int bytes_used;
480 /* GCSE substitutions made. */
481 static int gcse_subst_count;
482 /* Number of copy instructions created. */
483 static int gcse_create_count;
484 /* Number of constants propagated. */
485 static int const_prop_count;
486 /* Number of copys propagated. */
487 static int copy_prop_count;
489 /* These variables are used by classic GCSE.
490 Normally they'd be defined a bit later, but `rd_gen' needs to
491 be declared sooner. */
493 /* A bitmap of all ones for implementing the algorithm for available
494 expressions and reaching definitions. */
495 /* ??? Available expression bitmaps have a different size than reaching
496 definition bitmaps. This should be the larger of the two, however, it
497 is not currently used for reaching definitions. */
498 static sbitmap u_bitmap;
500 /* Each block has a bitmap of each type.
501 The length of each blocks bitmap is:
503 max_cuid - for reaching definitions
504 n_exprs - for available expressions
506 Thus we view the bitmaps as 2 dimensional arrays. i.e.
507 rd_kill[block_num][cuid_num]
508 ae_kill[block_num][expr_num]
511 /* For reaching defs */
512 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
514 /* for available exprs */
515 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
518 static void compute_can_copy PROTO ((void));
520 static char *gmalloc PROTO ((unsigned int));
521 static char *grealloc PROTO ((char *, unsigned int));
522 static char *gcse_alloc PROTO ((unsigned long));
523 static void alloc_gcse_mem PROTO ((rtx));
524 static void free_gcse_mem PROTO ((void));
525 static void alloc_reg_set_mem PROTO ((int));
526 static void free_reg_set_mem PROTO ((void));
527 static void record_one_set PROTO ((int, rtx));
528 static void record_set_info PROTO ((rtx, rtx));
529 static void compute_sets PROTO ((rtx));
531 static void hash_scan_insn PROTO ((rtx, int, int));
532 static void hash_scan_set PROTO ((rtx, rtx, int));
533 static void hash_scan_clobber PROTO ((rtx, rtx));
534 static void hash_scan_call PROTO ((rtx, rtx));
535 static int want_to_gcse_p PROTO ((rtx));
536 static int oprs_unchanged_p PROTO ((rtx, rtx, int));
537 static int oprs_anticipatable_p PROTO ((rtx, rtx));
538 static int oprs_available_p PROTO ((rtx, rtx));
539 static void insert_expr_in_table PROTO ((rtx, enum machine_mode,
541 static void insert_set_in_table PROTO ((rtx, rtx));
542 static unsigned int hash_expr PROTO ((rtx, enum machine_mode,
544 static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *));
545 static unsigned int hash_set PROTO ((int, int));
546 static int expr_equiv_p PROTO ((rtx, rtx));
547 static void record_last_reg_set_info PROTO ((rtx, int));
548 static void record_last_mem_set_info PROTO ((rtx));
549 static void record_last_set_info PROTO ((rtx, rtx));
550 static void compute_hash_table PROTO ((int));
551 static void alloc_set_hash_table PROTO ((int));
552 static void free_set_hash_table PROTO ((void));
553 static void compute_set_hash_table PROTO ((void));
554 static void alloc_expr_hash_table PROTO ((int));
555 static void free_expr_hash_table PROTO ((void));
556 static void compute_expr_hash_table PROTO ((void));
557 static void dump_hash_table PROTO ((FILE *, const char *, struct expr **,
559 static struct expr *lookup_expr PROTO ((rtx));
560 static struct expr *lookup_set PROTO ((int, rtx));
561 static struct expr *next_set PROTO ((int, struct expr *));
562 static void reset_opr_set_tables PROTO ((void));
563 static int oprs_not_set_p PROTO ((rtx, rtx));
564 static void mark_call PROTO ((rtx));
565 static void mark_set PROTO ((rtx, rtx));
566 static void mark_clobber PROTO ((rtx, rtx));
567 static void mark_oprs_set PROTO ((rtx));
569 static void alloc_cprop_mem PROTO ((int, int));
570 static void free_cprop_mem PROTO ((void));
571 static void compute_transp PROTO ((rtx, int, sbitmap *, int));
572 static void compute_transpout PROTO ((void));
573 static void compute_local_properties PROTO ((sbitmap *, sbitmap *,
575 static void compute_cprop_avinout PROTO ((void));
576 static void compute_cprop_data PROTO ((void));
577 static void find_used_regs PROTO ((rtx));
578 static int try_replace_reg PROTO ((rtx, rtx, rtx));
579 static struct expr *find_avail_set PROTO ((int, rtx));
580 static int cprop_jump PROTO((rtx, rtx, struct reg_use *, rtx));
582 static int cprop_cc0_jump PROTO((rtx, struct reg_use *, rtx));
584 static int cprop_insn PROTO ((rtx, int));
585 static int cprop PROTO ((int));
586 static int one_cprop_pass PROTO ((int, int));
588 static void alloc_pre_mem PROTO ((int, int));
589 static void free_pre_mem PROTO ((void));
590 static void compute_pre_data PROTO ((void));
591 static int pre_expr_reaches_here_p PROTO ((int, struct expr *,
593 static void insert_insn_end_bb PROTO ((struct expr *, int, int));
594 static void pre_insert PROTO ((struct expr **));
595 static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
596 static void pre_insert_copies PROTO ((void));
597 static int pre_delete PROTO ((void));
598 static int pre_gcse PROTO ((void));
599 static int one_pre_gcse_pass PROTO ((int));
601 static void add_label_notes PROTO ((rtx, rtx));
603 static void alloc_code_hoist_mem PROTO ((int, int));
604 static void free_code_hoist_mem PROTO ((void));
605 static void compute_code_hoist_vbeinout PROTO ((void));
606 static void compute_code_hoist_data PROTO ((void));
607 static int hoist_expr_reaches_here_p PROTO ((int, int, int, char *));
608 static void hoist_code PROTO ((void));
609 static int one_code_hoisting_pass PROTO ((void));
611 static void alloc_rd_mem PROTO ((int, int));
612 static void free_rd_mem PROTO ((void));
613 static void handle_rd_kill_set PROTO ((rtx, int, int));
614 static void compute_kill_rd PROTO ((void));
615 static void compute_rd PROTO ((void));
616 static void alloc_avail_expr_mem PROTO ((int, int));
617 static void free_avail_expr_mem PROTO ((void));
618 static void compute_ae_gen PROTO ((void));
619 static int expr_killed_p PROTO ((rtx, int));
620 static void compute_ae_kill PROTO ((void));
621 static void compute_available PROTO ((void));
622 static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
624 static rtx computing_insn PROTO ((struct expr *, rtx));
625 static int def_reaches_here_p PROTO ((rtx, rtx));
626 static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int));
627 static int handle_avail_expr PROTO ((rtx, struct expr *));
628 static int classic_gcse PROTO ((void));
629 static int one_classic_gcse_pass PROTO ((int));
631 static void invalidate_nonnull_info PROTO ((rtx, rtx));
634 /* Entry point for global common subexpression elimination.
635 F is the first instruction in the function. */
643 /* Bytes used at start of pass. */
644 int initial_bytes_used;
645 /* Maximum number of bytes used by a pass. */
647 /* Point to release obstack data from for each pass. */
648 char *gcse_obstack_bottom;
650 /* We do not construct an accurate cfg in functions which call
651 setjmp, so just punt to be safe. */
652 if (current_function_calls_setjmp)
655 /* Assume that we do not need to run jump optimizations after gcse. */
656 run_jump_opt_after_gcse = 0;
658 /* For calling dump_foo fns from gdb. */
659 debug_stderr = stderr;
662 /* Identify the basic block information for this function, including
663 successors and predecessors. */
664 max_gcse_regno = max_reg_num ();
665 find_basic_blocks (f, max_gcse_regno, file, 1);
667 /* Return if there's nothing to do. */
668 if (n_basic_blocks <= 1)
670 /* Free storage allocated by find_basic_blocks. */
671 free_basic_block_vars (0);
675 /* Trying to perform global optimizations on flow graphs which have
676 a high connectivity will take a long time and is unlikely to be
679 In normal circumstances a cfg should have about twice has many edges
680 as blocks. But we do not want to punish small functions which have
681 a couple switch statements. So we require a relatively large number
682 of basic blocks and the ratio of edges to blocks to be high. */
683 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
685 /* Free storage allocated by find_basic_blocks. */
686 free_basic_block_vars (0);
690 /* See what modes support reg/reg copy operations. */
691 if (! can_copy_init_p)
697 gcc_obstack_init (&gcse_obstack);
699 /* Allocate and compute predecessors/successors. */
701 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
702 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
703 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
704 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
705 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
706 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
709 dump_bb_data (file, s_preds, s_succs, 0);
711 /* Record where pseudo-registers are set.
712 This data is kept accurate during each pass.
713 ??? We could also record hard-reg information here
714 [since it's unchanging], however it is currently done during
715 hash table computation.
717 It may be tempting to compute MEM set information here too, but MEM
718 sets will be subject to code motion one day and thus we need to compute
719 information about memory sets when we build the hash tables. */
721 alloc_reg_set_mem (max_gcse_regno);
725 initial_bytes_used = bytes_used;
727 gcse_obstack_bottom = gcse_alloc (1);
729 while (changed && pass < MAX_PASSES)
733 fprintf (file, "GCSE pass %d\n\n", pass + 1);
735 /* Initialize bytes_used to the space for the pred/succ lists,
736 and the reg_set_table data. */
737 bytes_used = initial_bytes_used;
739 /* Each pass may create new registers, so recalculate each time. */
740 max_gcse_regno = max_reg_num ();
744 /* Don't allow constant propagation to modify jumps
746 changed = one_cprop_pass (pass + 1, 0);
749 changed |= one_classic_gcse_pass (pass + 1);
751 changed |= one_pre_gcse_pass (pass + 1);
753 if (max_pass_bytes < bytes_used)
754 max_pass_bytes = bytes_used;
756 /* Free up memory, then reallocate for code hoisting. We can
757 not re-use the existing allocated memory because the tables
758 will not have info for the insns or registers created by
759 partial redundancy elimination. */
762 /* It does not make sense to run code hoisting unless we optimizing
763 for code size -- it rarely makes programs faster, and can make
764 them bigger if we did partial redundancy elimination (when optimizing
765 for space, we use a classic gcse algorithm instead of partial
766 redundancy algorithms). */
769 max_gcse_regno = max_reg_num ();
771 changed |= one_code_hoisting_pass ();
774 if (max_pass_bytes < bytes_used)
775 max_pass_bytes = bytes_used;
780 fprintf (file, "\n");
783 obstack_free (&gcse_obstack, gcse_obstack_bottom);
787 /* Do one last pass of copy propagation, including cprop into
788 conditional jumps. */
790 max_gcse_regno = max_reg_num ();
792 /* This time, go ahead and allow cprop to alter jumps. */
793 one_cprop_pass (pass + 1, 1);
798 fprintf (file, "GCSE of %s: %d basic blocks, ",
799 current_function_name, n_basic_blocks);
800 fprintf (file, "%d pass%s, %d bytes\n\n",
801 pass, pass > 1 ? "es" : "", max_pass_bytes);
804 /* Free our obstack. */
805 obstack_free (&gcse_obstack, NULL_PTR);
806 /* Free reg_set_table. */
808 /* Free storage used to record predecessor/successor data. */
810 /* Free storage allocated by find_basic_blocks. */
811 free_basic_block_vars (0);
812 return run_jump_opt_after_gcse;
815 /* Misc. utilities. */
817 /* Compute which modes support reg/reg copy operations. */
823 #ifndef AVOID_CCMODE_COPIES
826 char *free_point = (char *) oballoc (1);
828 bzero (can_copy_p, NUM_MACHINE_MODES);
831 for (i = 0; i < NUM_MACHINE_MODES; i++)
833 switch (GET_MODE_CLASS (i))
836 #ifdef AVOID_CCMODE_COPIES
839 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
840 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
841 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
852 /* Free the objects we just allocated. */
856 /* Cover function to xmalloc to record bytes allocated. */
863 return xmalloc (size);
866 /* Cover function to xrealloc.
867 We don't record the additional size since we don't know it.
868 It won't affect memory usage stats much anyway. */
875 return xrealloc (ptr, size);
878 /* Cover function to obstack_alloc.
879 We don't need to record the bytes allocated here since
880 obstack_chunk_alloc is set to gmalloc. */
886 return (char *) obstack_alloc (&gcse_obstack, size);
889 /* Allocate memory for the cuid mapping array,
890 and reg/memory set tracking tables.
892 This is called at the start of each pass. */
901 /* Find the largest UID and create a mapping from UIDs to CUIDs.
902 CUIDs are like UIDs except they increase monotonically, have no gaps,
903 and only apply to real insns. */
905 max_uid = get_max_uid ();
906 n = (max_uid + 1) * sizeof (int);
907 uid_cuid = (int *) gmalloc (n);
908 bzero ((char *) uid_cuid, n);
909 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
911 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
912 INSN_CUID (insn) = i++;
914 INSN_CUID (insn) = i;
917 /* Create a table mapping cuids to insns. */
920 n = (max_cuid + 1) * sizeof (rtx);
921 cuid_insn = (rtx *) gmalloc (n);
922 bzero ((char *) cuid_insn, n);
923 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
925 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
927 CUID_INSN (i) = insn;
932 /* Allocate vars to track sets of regs. */
934 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
936 /* Allocate vars to track sets of regs, memory per block. */
938 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
940 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
943 /* Free memory allocated by alloc_gcse_mem. */
951 free (reg_set_bitmap);
953 free (reg_set_in_block);
954 free (mem_set_in_block);
958 /* Compute the local properties of each recorded expression.
959 Local properties are those that are defined by the block, irrespective
962 An expression is transparent in a block if its operands are not modified
965 An expression is computed (locally available) in a block if it is computed
966 at least once and expression would contain the same value if the
967 computation was moved to the end of the block.
969 An expression is locally anticipatable in a block if it is computed at
970 least once and expression would contain the same value if the computation
971 was moved to the beginning of the block.
973 We call this routine for cprop, pre and code hoisting. They all
974 compute basically the same information and thus can easily share
977 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
978 local properties. If NULL, then it is not necessary to compute
979 or record that particular property.
981 SETP controls which hash table to look at. If zero, this routine
982 looks at the expr hash table; if nonzero this routine looks at
983 the set hash table. Additionally, TRANSP is computed as ~TRANSP,
984 since this is really cprop's ABSALTERED. */
987 compute_local_properties (transp, comp, antloc, setp)
993 int i, hash_table_size;
994 struct expr **hash_table;
996 /* Initialize any bitmaps that were passed in. */
1000 sbitmap_vector_zero (transp, n_basic_blocks);
1002 sbitmap_vector_ones (transp, n_basic_blocks);
1005 sbitmap_vector_zero (comp, n_basic_blocks);
1007 sbitmap_vector_zero (antloc, n_basic_blocks);
1009 /* We use the same code for cprop, pre and hoisting. For cprop
1010 we care about the set hash table, for pre and hoisting we
1011 care about the expr hash table. */
1012 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1013 hash_table = setp ? set_hash_table : expr_hash_table;
1015 for (i = 0; i < hash_table_size; i++)
1019 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1022 int indx = expr->bitmap_index;
1024 /* The expression is transparent in this block if it is not killed.
1025 We start by assuming all are transparent [none are killed], and
1026 then reset the bits for those that are. */
1029 compute_transp (expr->expr, indx, transp, setp);
1031 /* The occurrences recorded in antic_occr are exactly those that
1032 we want to set to non-zero in ANTLOC. */
1036 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1038 int bb = BLOCK_NUM (occr->insn);
1039 SET_BIT (antloc[bb], indx);
1041 /* While we're scanning the table, this is a good place to
1043 occr->deleted_p = 0;
1047 /* The occurrences recorded in avail_occr are exactly those that
1048 we want to set to non-zero in COMP. */
1052 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1054 int bb = BLOCK_NUM (occr->insn);
1055 SET_BIT (comp[bb], indx);
1057 /* While we're scanning the table, this is a good place to
1063 /* While we're scanning the table, this is a good place to
1065 expr->reaching_reg = 0;
1071 /* Register set information.
1073 `reg_set_table' records where each register is set or otherwise
1076 static struct obstack reg_set_obstack;
1079 alloc_reg_set_mem (n_regs)
1084 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1085 n = reg_set_table_size * sizeof (struct reg_set *);
1086 reg_set_table = (struct reg_set **) gmalloc (n);
1087 bzero ((char *) reg_set_table, n);
1089 gcc_obstack_init (®_set_obstack);
1095 free (reg_set_table);
1096 obstack_free (®_set_obstack, NULL_PTR);
1099 /* Record REGNO in the reg_set table. */
1102 record_one_set (regno, insn)
1106 /* allocate a new reg_set element and link it onto the list */
1107 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1109 /* If the table isn't big enough, enlarge it. */
1110 if (regno >= reg_set_table_size)
1112 int new_size = regno + REG_SET_TABLE_SLOP;
1113 reg_set_table = (struct reg_set **)
1114 grealloc ((char *) reg_set_table,
1115 new_size * sizeof (struct reg_set *));
1116 bzero ((char *) (reg_set_table + reg_set_table_size),
1117 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1118 reg_set_table_size = new_size;
1121 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack,
1122 sizeof (struct reg_set));
1123 bytes_used += sizeof (struct reg_set);
1124 new_reg_info->insn = insn;
1125 new_reg_info->next = NULL;
1126 if (reg_set_table[regno] == NULL)
1127 reg_set_table[regno] = new_reg_info;
1130 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1131 /* ??? One could keep a "last" pointer to speed this up. */
1132 while (reg_info_ptr1 != NULL)
1134 reg_info_ptr2 = reg_info_ptr1;
1135 reg_info_ptr1 = reg_info_ptr1->next;
1137 reg_info_ptr2->next = new_reg_info;
1141 /* For communication between next two functions (via note_stores). */
1142 static rtx record_set_insn;
1144 /* Called from compute_sets via note_stores to handle one
1145 SET or CLOBBER in an insn. */
1148 record_set_info (dest, setter)
1149 rtx dest, setter ATTRIBUTE_UNUSED;
1151 if (GET_CODE (dest) == SUBREG)
1152 dest = SUBREG_REG (dest);
1154 if (GET_CODE (dest) == REG)
1156 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1157 record_one_set (REGNO (dest), record_set_insn);
1161 /* Scan the function and record each set of each pseudo-register.
1163 This is called once, at the start of the gcse pass.
1164 See the comments for `reg_set_table' for further docs. */
1174 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1176 record_set_insn = insn;
1177 note_stores (PATTERN (insn), record_set_info);
1179 insn = NEXT_INSN (insn);
1183 /* Hash table support. */
1185 #define NEVER_SET -1
1187 /* For each register, the cuid of the first/last insn in the block to set it,
1188 or -1 if not set. */
1189 static int *reg_first_set;
1190 static int *reg_last_set;
1192 /* While computing "first/last set" info, this is the CUID of first/last insn
1193 to set memory or -1 if not set. `mem_last_set' is also used when
1194 performing GCSE to record whether memory has been set since the beginning
1196 Note that handling of memory is very simple, we don't make any attempt
1197 to optimize things (later). */
1198 static int mem_first_set;
1199 static int mem_last_set;
1201 /* Perform a quick check whether X, the source of a set, is something
1202 we want to consider for GCSE. */
1208 enum rtx_code code = GET_CODE (x);
1226 /* Return non-zero if the operands of expression X are unchanged from the
1227 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1228 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1231 oprs_unchanged_p (x, insn, avail_p)
1239 /* repeat is used to turn tail-recursion into iteration. */
1245 code = GET_CODE (x);
1250 return (reg_last_set[REGNO (x)] == NEVER_SET
1251 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1253 return (reg_first_set[REGNO (x)] == NEVER_SET
1254 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1259 if (mem_last_set != NEVER_SET
1260 && mem_last_set >= INSN_CUID (insn))
1265 if (mem_first_set != NEVER_SET
1266 && mem_first_set < INSN_CUID (insn))
1293 i = GET_RTX_LENGTH (code) - 1;
1294 fmt = GET_RTX_FORMAT (code);
1299 rtx tem = XEXP (x, i);
1301 /* If we are about to do the last recursive call
1302 needed at this level, change it into iteration.
1303 This function is called enough to be worth it. */
1309 if (! oprs_unchanged_p (tem, insn, avail_p))
1312 else if (fmt[i] == 'E')
1315 for (j = 0; j < XVECLEN (x, i); j++)
1317 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1326 /* Return non-zero if the operands of expression X are unchanged from
1327 the start of INSN's basic block up to but not including INSN. */
1330 oprs_anticipatable_p (x, insn)
1333 return oprs_unchanged_p (x, insn, 0);
1336 /* Return non-zero if the operands of expression X are unchanged from
1337 INSN to the end of INSN's basic block. */
1340 oprs_available_p (x, insn)
1343 return oprs_unchanged_p (x, insn, 1);
1346 /* Hash expression X.
1347 MODE is only used if X is a CONST_INT.
1348 A boolean indicating if a volatile operand is found or if the expression
1349 contains something we don't want to insert in the table is stored in
1352 ??? One might want to merge this with canon_hash. Later. */
1355 hash_expr (x, mode, do_not_record_p, hash_table_size)
1357 enum machine_mode mode;
1358 int *do_not_record_p;
1359 int hash_table_size;
1363 *do_not_record_p = 0;
1365 hash = hash_expr_1 (x, mode, do_not_record_p);
1366 return hash % hash_table_size;
1369 /* Subroutine of hash_expr to do the actual work. */
1372 hash_expr_1 (x, mode, do_not_record_p)
1374 enum machine_mode mode;
1375 int *do_not_record_p;
1382 /* repeat is used to turn tail-recursion into iteration. */
1388 code = GET_CODE (x);
1393 register int regno = REGNO (x);
1394 hash += ((unsigned) REG << 7) + regno;
1400 unsigned HOST_WIDE_INT tem = INTVAL (x);
1401 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1406 /* This is like the general case, except that it only counts
1407 the integers representing the constant. */
1408 hash += (unsigned) code + (unsigned) GET_MODE (x);
1409 if (GET_MODE (x) != VOIDmode)
1410 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1412 unsigned tem = XWINT (x, i);
1416 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1417 + (unsigned) CONST_DOUBLE_HIGH (x));
1420 /* Assume there is only one rtx object for any given label. */
1422 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1423 differences and differences between each stage's debugging dumps. */
1424 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1429 /* Don't hash on the symbol's address to avoid bootstrap differences.
1430 Different hash values may cause expressions to be recorded in
1431 different orders and thus different registers to be used in the
1432 final assembler. This also avoids differences in the dump files
1433 between various stages. */
1435 unsigned char *p = (unsigned char *) XSTR (x, 0);
1437 h += (h << 7) + *p++; /* ??? revisit */
1438 hash += ((unsigned) SYMBOL_REF << 7) + h;
1443 if (MEM_VOLATILE_P (x))
1445 *do_not_record_p = 1;
1448 hash += (unsigned) MEM;
1449 hash += MEM_ALIAS_SET (x);
1460 case UNSPEC_VOLATILE:
1461 *do_not_record_p = 1;
1465 if (MEM_VOLATILE_P (x))
1467 *do_not_record_p = 1;
1475 i = GET_RTX_LENGTH (code) - 1;
1476 hash += (unsigned) code + (unsigned) GET_MODE (x);
1477 fmt = GET_RTX_FORMAT (code);
1482 rtx tem = XEXP (x, i);
1484 /* If we are about to do the last recursive call
1485 needed at this level, change it into iteration.
1486 This function is called enough to be worth it. */
1492 hash += hash_expr_1 (tem, 0, do_not_record_p);
1493 if (*do_not_record_p)
1496 else if (fmt[i] == 'E')
1497 for (j = 0; j < XVECLEN (x, i); j++)
1499 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1500 if (*do_not_record_p)
1503 else if (fmt[i] == 's')
1505 register unsigned char *p = (unsigned char *) XSTR (x, i);
1510 else if (fmt[i] == 'i')
1512 register unsigned tem = XINT (x, i);
1522 /* Hash a set of register REGNO.
1524 Sets are hashed on the register that is set.
1525 This simplifies the PRE copy propagation code.
1527 ??? May need to make things more elaborate. Later, as necessary. */
1530 hash_set (regno, hash_table_size)
1532 int hash_table_size;
1537 return hash % hash_table_size;
1540 /* Return non-zero if exp1 is equivalent to exp2.
1541 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1548 register enum rtx_code code;
1549 register const char *fmt;
1553 if (x == 0 || y == 0)
1556 code = GET_CODE (x);
1557 if (code != GET_CODE (y))
1560 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1561 if (GET_MODE (x) != GET_MODE (y))
1571 return INTVAL (x) == INTVAL (y);
1574 return XEXP (x, 0) == XEXP (y, 0);
1577 return XSTR (x, 0) == XSTR (y, 0);
1580 return REGNO (x) == REGNO (y);
1583 /* Can't merge two expressions in different alias sets, since we can
1584 decide that the expression is transparent in a block when it isn't,
1585 due to it being set with the different alias set. */
1586 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1590 /* For commutative operations, check both orders. */
1598 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1599 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1600 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1601 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1607 /* Compare the elements. If any pair of corresponding elements
1608 fail to match, return 0 for the whole thing. */
1610 fmt = GET_RTX_FORMAT (code);
1611 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1616 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1621 if (XVECLEN (x, i) != XVECLEN (y, i))
1623 for (j = 0; j < XVECLEN (x, i); j++)
1624 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1629 if (strcmp (XSTR (x, i), XSTR (y, i)))
1634 if (XINT (x, i) != XINT (y, i))
1639 if (XWINT (x, i) != XWINT (y, i))
1654 /* Insert expression X in INSN in the hash table.
1655 If it is already present, record it as the last occurrence in INSN's
1658 MODE is the mode of the value X is being stored into.
1659 It is only used if X is a CONST_INT.
1661 ANTIC_P is non-zero if X is an anticipatable expression.
1662 AVAIL_P is non-zero if X is an available expression. */
1665 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1667 enum machine_mode mode;
1669 int antic_p, avail_p;
1671 int found, do_not_record_p;
1673 struct expr *cur_expr, *last_expr = NULL;
1674 struct occr *antic_occr, *avail_occr;
1675 struct occr *last_occr = NULL;
1677 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1679 /* Do not insert expression in table if it contains volatile operands,
1680 or if hash_expr determines the expression is something we don't want
1681 to or can't handle. */
1682 if (do_not_record_p)
1685 cur_expr = expr_hash_table[hash];
1688 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1690 /* If the expression isn't found, save a pointer to the end of
1692 last_expr = cur_expr;
1693 cur_expr = cur_expr->next_same_hash;
1698 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1699 bytes_used += sizeof (struct expr);
1700 if (expr_hash_table[hash] == NULL)
1702 /* This is the first pattern that hashed to this index. */
1703 expr_hash_table[hash] = cur_expr;
1707 /* Add EXPR to end of this hash chain. */
1708 last_expr->next_same_hash = cur_expr;
1710 /* Set the fields of the expr element. */
1712 cur_expr->bitmap_index = n_exprs++;
1713 cur_expr->next_same_hash = NULL;
1714 cur_expr->antic_occr = NULL;
1715 cur_expr->avail_occr = NULL;
1718 /* Now record the occurrence(s). */
1722 antic_occr = cur_expr->antic_occr;
1724 /* Search for another occurrence in the same basic block. */
1725 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1727 /* If an occurrence isn't found, save a pointer to the end of
1729 last_occr = antic_occr;
1730 antic_occr = antic_occr->next;
1735 /* Found another instance of the expression in the same basic block.
1736 Prefer the currently recorded one. We want the first one in the
1737 block and the block is scanned from start to end. */
1738 ; /* nothing to do */
1742 /* First occurrence of this expression in this basic block. */
1743 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1744 bytes_used += sizeof (struct occr);
1745 /* First occurrence of this expression in any block? */
1746 if (cur_expr->antic_occr == NULL)
1747 cur_expr->antic_occr = antic_occr;
1749 last_occr->next = antic_occr;
1750 antic_occr->insn = insn;
1751 antic_occr->next = NULL;
1757 avail_occr = cur_expr->avail_occr;
1759 /* Search for another occurrence in the same basic block. */
1760 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1762 /* If an occurrence isn't found, save a pointer to the end of
1764 last_occr = avail_occr;
1765 avail_occr = avail_occr->next;
1770 /* Found another instance of the expression in the same basic block.
1771 Prefer this occurrence to the currently recorded one. We want
1772 the last one in the block and the block is scanned from start
1774 avail_occr->insn = insn;
1778 /* First occurrence of this expression in this basic block. */
1779 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1780 bytes_used += sizeof (struct occr);
1781 /* First occurrence of this expression in any block? */
1782 if (cur_expr->avail_occr == NULL)
1783 cur_expr->avail_occr = avail_occr;
1785 last_occr->next = avail_occr;
1786 avail_occr->insn = insn;
1787 avail_occr->next = NULL;
1792 /* Insert pattern X in INSN in the hash table.
1793 X is a SET of a reg to either another reg or a constant.
1794 If it is already present, record it as the last occurrence in INSN's
1798 insert_set_in_table (x, insn)
1804 struct expr *cur_expr, *last_expr = NULL;
1805 struct occr *cur_occr, *last_occr = NULL;
1807 if (GET_CODE (x) != SET
1808 || GET_CODE (SET_DEST (x)) != REG)
1811 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1813 cur_expr = set_hash_table[hash];
1816 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1818 /* If the expression isn't found, save a pointer to the end of
1820 last_expr = cur_expr;
1821 cur_expr = cur_expr->next_same_hash;
1826 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1827 bytes_used += sizeof (struct expr);
1828 if (set_hash_table[hash] == NULL)
1830 /* This is the first pattern that hashed to this index. */
1831 set_hash_table[hash] = cur_expr;
1835 /* Add EXPR to end of this hash chain. */
1836 last_expr->next_same_hash = cur_expr;
1838 /* Set the fields of the expr element.
1839 We must copy X because it can be modified when copy propagation is
1840 performed on its operands. */
1841 /* ??? Should this go in a different obstack? */
1842 cur_expr->expr = copy_rtx (x);
1843 cur_expr->bitmap_index = n_sets++;
1844 cur_expr->next_same_hash = NULL;
1845 cur_expr->antic_occr = NULL;
1846 cur_expr->avail_occr = NULL;
1849 /* Now record the occurrence. */
1851 cur_occr = cur_expr->avail_occr;
1853 /* Search for another occurrence in the same basic block. */
1854 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1856 /* If an occurrence isn't found, save a pointer to the end of
1858 last_occr = cur_occr;
1859 cur_occr = cur_occr->next;
1864 /* Found another instance of the expression in the same basic block.
1865 Prefer this occurrence to the currently recorded one. We want
1866 the last one in the block and the block is scanned from start
1868 cur_occr->insn = insn;
1872 /* First occurrence of this expression in this basic block. */
1873 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1874 bytes_used += sizeof (struct occr);
1875 /* First occurrence of this expression in any block? */
1876 if (cur_expr->avail_occr == NULL)
1877 cur_expr->avail_occr = cur_occr;
1879 last_occr->next = cur_occr;
1880 cur_occr->insn = insn;
1881 cur_occr->next = NULL;
1885 /* Scan pattern PAT of INSN and add an entry to the hash table.
1886 If SET_P is non-zero, this is for the assignment hash table,
1887 otherwise it is for the expression hash table. */
1890 hash_scan_set (pat, insn, set_p)
1894 rtx src = SET_SRC (pat);
1895 rtx dest = SET_DEST (pat);
1897 if (GET_CODE (src) == CALL)
1898 hash_scan_call (src, insn);
1900 if (GET_CODE (dest) == REG)
1902 int regno = REGNO (dest);
1905 /* Only record sets of pseudo-regs in the hash table. */
1907 && regno >= FIRST_PSEUDO_REGISTER
1908 /* Don't GCSE something if we can't do a reg/reg copy. */
1909 && can_copy_p [GET_MODE (dest)]
1910 /* Is SET_SRC something we want to gcse? */
1911 && want_to_gcse_p (src))
1913 /* An expression is not anticipatable if its operands are
1914 modified before this insn. */
1915 int antic_p = oprs_anticipatable_p (src, insn);
1916 /* An expression is not available if its operands are
1917 subsequently modified, including this insn. */
1918 int avail_p = oprs_available_p (src, insn);
1919 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1921 /* Record sets for constant/copy propagation. */
1923 && regno >= FIRST_PSEUDO_REGISTER
1924 && ((GET_CODE (src) == REG
1925 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1926 && can_copy_p [GET_MODE (dest)])
1927 || GET_CODE (src) == CONST_INT
1928 || GET_CODE (src) == SYMBOL_REF
1929 || GET_CODE (src) == CONST_DOUBLE)
1930 /* A copy is not available if its src or dest is subsequently
1931 modified. Here we want to search from INSN+1 on, but
1932 oprs_available_p searches from INSN on. */
1933 && (insn == BLOCK_END (BLOCK_NUM (insn))
1934 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1935 && oprs_available_p (pat, tmp))))
1936 insert_set_in_table (pat, insn);
1941 hash_scan_clobber (x, insn)
1942 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1944 /* Currently nothing to do. */
1948 hash_scan_call (x, insn)
1949 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1951 /* Currently nothing to do. */
1954 /* Process INSN and add hash table entries as appropriate.
1956 Only available expressions that set a single pseudo-reg are recorded.
1958 Single sets in a PARALLEL could be handled, but it's an extra complication
1959 that isn't dealt with right now. The trick is handling the CLOBBERs that
1960 are also in the PARALLEL. Later.
1962 If SET_P is non-zero, this is for the assignment hash table,
1963 otherwise it is for the expression hash table.
1964 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1965 not record any expressions. */
1968 hash_scan_insn (insn, set_p, in_libcall_block)
1971 int in_libcall_block;
1973 rtx pat = PATTERN (insn);
1975 /* Pick out the sets of INSN and for other forms of instructions record
1976 what's been modified. */
1978 if (GET_CODE (pat) == SET && ! in_libcall_block)
1980 /* Ignore obvious no-ops. */
1981 if (SET_SRC (pat) != SET_DEST (pat))
1982 hash_scan_set (pat, insn, set_p);
1984 else if (GET_CODE (pat) == PARALLEL)
1988 for (i = 0; i < XVECLEN (pat, 0); i++)
1990 rtx x = XVECEXP (pat, 0, i);
1992 if (GET_CODE (x) == SET)
1994 if (GET_CODE (SET_SRC (x)) == CALL)
1995 hash_scan_call (SET_SRC (x), insn);
1997 else if (GET_CODE (x) == CLOBBER)
1998 hash_scan_clobber (x, insn);
1999 else if (GET_CODE (x) == CALL)
2000 hash_scan_call (x, insn);
2003 else if (GET_CODE (pat) == CLOBBER)
2004 hash_scan_clobber (pat, insn);
2005 else if (GET_CODE (pat) == CALL)
2006 hash_scan_call (pat, insn);
2010 dump_hash_table (file, name, table, table_size, total_size)
2013 struct expr **table;
2014 int table_size, total_size;
2017 /* Flattened out table, so it's printed in proper order. */
2018 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
2019 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
2021 bzero ((char *) flat_table, total_size * sizeof (struct expr *));
2022 for (i = 0; i < table_size; i++)
2026 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2028 flat_table[expr->bitmap_index] = expr;
2029 hash_val[expr->bitmap_index] = i;
2033 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2034 name, table_size, total_size);
2036 for (i = 0; i < total_size; i++)
2038 struct expr *expr = flat_table[i];
2040 fprintf (file, "Index %d (hash value %d)\n ",
2041 expr->bitmap_index, hash_val[i]);
2042 print_rtl (file, expr->expr);
2043 fprintf (file, "\n");
2046 fprintf (file, "\n");
2049 /* Record register first/last/block set information for REGNO in INSN.
2050 reg_first_set records the first place in the block where the register
2051 is set and is used to compute "anticipatability".
2052 reg_last_set records the last place in the block where the register
2053 is set and is used to compute "availability".
2054 reg_set_in_block records whether the register is set in the block
2055 and is used to compute "transparency". */
2058 record_last_reg_set_info (insn, regno)
2062 if (reg_first_set[regno] == NEVER_SET)
2063 reg_first_set[regno] = INSN_CUID (insn);
2064 reg_last_set[regno] = INSN_CUID (insn);
2065 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2068 /* Record memory first/last/block set information for INSN. */
2071 record_last_mem_set_info (insn)
2074 if (mem_first_set == NEVER_SET)
2075 mem_first_set = INSN_CUID (insn);
2076 mem_last_set = INSN_CUID (insn);
2077 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2080 /* Used for communicating between next two routines. */
2081 static rtx last_set_insn;
2083 /* Called from compute_hash_table via note_stores to handle one
2084 SET or CLOBBER in an insn. */
2087 record_last_set_info (dest, setter)
2088 rtx dest, setter ATTRIBUTE_UNUSED;
2090 if (GET_CODE (dest) == SUBREG)
2091 dest = SUBREG_REG (dest);
2093 if (GET_CODE (dest) == REG)
2094 record_last_reg_set_info (last_set_insn, REGNO (dest));
2095 else if (GET_CODE (dest) == MEM
2096 /* Ignore pushes, they clobber nothing. */
2097 && ! push_operand (dest, GET_MODE (dest)))
2098 record_last_mem_set_info (last_set_insn);
2101 /* Top level function to create an expression or assignment hash table.
2103 Expression entries are placed in the hash table if
2104 - they are of the form (set (pseudo-reg) src),
2105 - src is something we want to perform GCSE on,
2106 - none of the operands are subsequently modified in the block
2108 Assignment entries are placed in the hash table if
2109 - they are of the form (set (pseudo-reg) src),
2110 - src is something we want to perform const/copy propagation on,
2111 - none of the operands or target are subsequently modified in the block
2112 Currently src must be a pseudo-reg or a const_int.
2114 F is the first insn.
2115 SET_P is non-zero for computing the assignment hash table. */
2118 compute_hash_table (set_p)
2123 /* While we compute the hash table we also compute a bit array of which
2124 registers are set in which blocks.
2125 We also compute which blocks set memory, in the absence of aliasing
2126 support [which is TODO].
2127 ??? This isn't needed during const/copy propagation, but it's cheap to
2129 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2130 bzero ((char *) mem_set_in_block, n_basic_blocks);
2132 /* Some working arrays used to track first and last set in each block. */
2133 /* ??? One could use alloca here, but at some size a threshold is crossed
2134 beyond which one should use malloc. Are we at that threshold here? */
2135 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2136 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2138 for (bb = 0; bb < n_basic_blocks; bb++)
2142 int in_libcall_block;
2145 /* First pass over the instructions records information used to
2146 determine when registers and memory are first and last set.
2147 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2148 could be moved to compute_sets since they currently don't change. */
2150 for (i = 0; i < max_gcse_regno; i++)
2151 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2152 mem_first_set = NEVER_SET;
2153 mem_last_set = NEVER_SET;
2155 for (insn = BLOCK_HEAD (bb);
2156 insn && insn != NEXT_INSN (BLOCK_END (bb));
2157 insn = NEXT_INSN (insn))
2159 #ifdef NON_SAVING_SETJMP
2160 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2161 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2163 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2164 record_last_reg_set_info (insn, regno);
2169 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2172 if (GET_CODE (insn) == CALL_INSN)
2174 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2175 if ((call_used_regs[regno]
2176 && regno != STACK_POINTER_REGNUM
2177 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2178 && regno != HARD_FRAME_POINTER_REGNUM
2180 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2181 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2183 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2184 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2187 && regno != FRAME_POINTER_REGNUM)
2188 || global_regs[regno])
2189 record_last_reg_set_info (insn, regno);
2190 if (! CONST_CALL_P (insn))
2191 record_last_mem_set_info (insn);
2194 last_set_insn = insn;
2195 note_stores (PATTERN (insn), record_last_set_info);
2198 /* The next pass builds the hash table. */
2200 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2201 insn && insn != NEXT_INSN (BLOCK_END (bb));
2202 insn = NEXT_INSN (insn))
2204 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2206 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2207 in_libcall_block = 1;
2208 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2209 in_libcall_block = 0;
2210 hash_scan_insn (insn, set_p, in_libcall_block);
2215 free (reg_first_set);
2216 free (reg_last_set);
2217 /* Catch bugs early. */
2218 reg_first_set = reg_last_set = 0;
2221 /* Allocate space for the set hash table.
2222 N_INSNS is the number of instructions in the function.
2223 It is used to determine the number of buckets to use. */
2226 alloc_set_hash_table (n_insns)
2231 set_hash_table_size = n_insns / 4;
2232 if (set_hash_table_size < 11)
2233 set_hash_table_size = 11;
2234 /* Attempt to maintain efficient use of hash table.
2235 Making it an odd number is simplest for now.
2236 ??? Later take some measurements. */
2237 set_hash_table_size |= 1;
2238 n = set_hash_table_size * sizeof (struct expr *);
2239 set_hash_table = (struct expr **) gmalloc (n);
2242 /* Free things allocated by alloc_set_hash_table. */
2245 free_set_hash_table ()
2247 free (set_hash_table);
2250 /* Compute the hash table for doing copy/const propagation. */
2253 compute_set_hash_table ()
2255 /* Initialize count of number of entries in hash table. */
2257 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2259 compute_hash_table (1);
2262 /* Allocate space for the expression hash table.
2263 N_INSNS is the number of instructions in the function.
2264 It is used to determine the number of buckets to use. */
2267 alloc_expr_hash_table (n_insns)
2272 expr_hash_table_size = n_insns / 2;
2273 /* Make sure the amount is usable. */
2274 if (expr_hash_table_size < 11)
2275 expr_hash_table_size = 11;
2276 /* Attempt to maintain efficient use of hash table.
2277 Making it an odd number is simplest for now.
2278 ??? Later take some measurements. */
2279 expr_hash_table_size |= 1;
2280 n = expr_hash_table_size * sizeof (struct expr *);
2281 expr_hash_table = (struct expr **) gmalloc (n);
2284 /* Free things allocated by alloc_expr_hash_table. */
2287 free_expr_hash_table ()
2289 free (expr_hash_table);
2292 /* Compute the hash table for doing GCSE. */
2295 compute_expr_hash_table ()
2297 /* Initialize count of number of entries in hash table. */
2299 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2301 compute_hash_table (0);
2304 /* Expression tracking support. */
2306 /* Lookup pattern PAT in the expression table.
2307 The result is a pointer to the table entry, or NULL if not found. */
2309 static struct expr *
2313 int do_not_record_p;
2314 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2315 expr_hash_table_size);
2318 if (do_not_record_p)
2321 expr = expr_hash_table[hash];
2323 while (expr && ! expr_equiv_p (expr->expr, pat))
2324 expr = expr->next_same_hash;
2329 /* Lookup REGNO in the set table.
2330 If PAT is non-NULL look for the entry that matches it, otherwise return
2331 the first entry for REGNO.
2332 The result is a pointer to the table entry, or NULL if not found. */
2334 static struct expr *
2335 lookup_set (regno, pat)
2339 unsigned int hash = hash_set (regno, set_hash_table_size);
2342 expr = set_hash_table[hash];
2346 while (expr && ! expr_equiv_p (expr->expr, pat))
2347 expr = expr->next_same_hash;
2351 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2352 expr = expr->next_same_hash;
2358 /* Return the next entry for REGNO in list EXPR. */
2360 static struct expr *
2361 next_set (regno, expr)
2366 expr = expr->next_same_hash;
2367 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2371 /* Reset tables used to keep track of what's still available [since the
2372 start of the block]. */
2375 reset_opr_set_tables ()
2377 /* Maintain a bitmap of which regs have been set since beginning of
2379 sbitmap_zero (reg_set_bitmap);
2380 /* Also keep a record of the last instruction to modify memory.
2381 For now this is very trivial, we only record whether any memory
2382 location has been modified. */
2386 /* Return non-zero if the operands of X are not set before INSN in
2387 INSN's basic block. */
2390 oprs_not_set_p (x, insn)
2397 /* repeat is used to turn tail-recursion into iteration. */
2403 code = GET_CODE (x);
2418 if (mem_last_set != 0)
2424 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2430 fmt = GET_RTX_FORMAT (code);
2431 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2436 /* If we are about to do the last recursive call
2437 needed at this level, change it into iteration.
2438 This function is called enough to be worth it. */
2444 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2448 else if (fmt[i] == 'E')
2451 for (j = 0; j < XVECLEN (x, i); j++)
2453 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2463 /* Mark things set by a CALL. */
2469 mem_last_set = INSN_CUID (insn);
2472 /* Mark things set by a SET. */
2475 mark_set (pat, insn)
2478 rtx dest = SET_DEST (pat);
2480 while (GET_CODE (dest) == SUBREG
2481 || GET_CODE (dest) == ZERO_EXTRACT
2482 || GET_CODE (dest) == SIGN_EXTRACT
2483 || GET_CODE (dest) == STRICT_LOW_PART)
2484 dest = XEXP (dest, 0);
2486 if (GET_CODE (dest) == REG)
2487 SET_BIT (reg_set_bitmap, REGNO (dest));
2488 else if (GET_CODE (dest) == MEM)
2489 mem_last_set = INSN_CUID (insn);
2491 if (GET_CODE (SET_SRC (pat)) == CALL)
2495 /* Record things set by a CLOBBER. */
2498 mark_clobber (pat, insn)
2501 rtx clob = XEXP (pat, 0);
2503 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2504 clob = XEXP (clob, 0);
2506 if (GET_CODE (clob) == REG)
2507 SET_BIT (reg_set_bitmap, REGNO (clob));
2509 mem_last_set = INSN_CUID (insn);
2512 /* Record things set by INSN.
2513 This data is used by oprs_not_set_p. */
2516 mark_oprs_set (insn)
2519 rtx pat = PATTERN (insn);
2521 if (GET_CODE (pat) == SET)
2522 mark_set (pat, insn);
2523 else if (GET_CODE (pat) == PARALLEL)
2527 for (i = 0; i < XVECLEN (pat, 0); i++)
2529 rtx x = XVECEXP (pat, 0, i);
2531 if (GET_CODE (x) == SET)
2533 else if (GET_CODE (x) == CLOBBER)
2534 mark_clobber (x, insn);
2535 else if (GET_CODE (x) == CALL)
2539 else if (GET_CODE (pat) == CLOBBER)
2540 mark_clobber (pat, insn);
2541 else if (GET_CODE (pat) == CALL)
2546 /* Classic GCSE reaching definition support. */
2548 /* Allocate reaching def variables. */
2551 alloc_rd_mem (n_blocks, n_insns)
2552 int n_blocks, n_insns;
2554 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2555 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2557 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2558 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2560 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2561 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2563 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2564 sbitmap_vector_zero (rd_out, n_basic_blocks);
2567 /* Free reaching def variables. */
2574 free (reaching_defs);
2578 /* Add INSN to the kills of BB.
2579 REGNO, set in BB, is killed by INSN. */
2582 handle_rd_kill_set (insn, regno, bb)
2586 struct reg_set *this_reg = reg_set_table[regno];
2590 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2591 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2592 this_reg = this_reg->next;
2596 /* Compute the set of kill's for reaching definitions. */
2604 For each set bit in `gen' of the block (i.e each insn which
2605 generates a definition in the block)
2606 Call the reg set by the insn corresponding to that bit regx
2607 Look at the linked list starting at reg_set_table[regx]
2608 For each setting of regx in the linked list, which is not in
2610 Set the bit in `kill' corresponding to that insn
2613 for (bb = 0; bb < n_basic_blocks; bb++)
2615 for (cuid = 0; cuid < max_cuid; cuid++)
2617 if (TEST_BIT (rd_gen[bb], cuid))
2619 rtx insn = CUID_INSN (cuid);
2620 rtx pat = PATTERN (insn);
2622 if (GET_CODE (insn) == CALL_INSN)
2626 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2628 if ((call_used_regs[regno]
2629 && regno != STACK_POINTER_REGNUM
2630 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2631 && regno != HARD_FRAME_POINTER_REGNUM
2633 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2634 && ! (regno == ARG_POINTER_REGNUM
2635 && fixed_regs[regno])
2637 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2638 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2640 && regno != FRAME_POINTER_REGNUM)
2641 || global_regs[regno])
2642 handle_rd_kill_set (insn, regno, bb);
2646 if (GET_CODE (pat) == PARALLEL)
2650 /* We work backwards because ... */
2651 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2653 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2654 if ((code == SET || code == CLOBBER)
2655 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2656 handle_rd_kill_set (insn,
2657 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2661 else if (GET_CODE (pat) == SET)
2663 if (GET_CODE (SET_DEST (pat)) == REG)
2665 /* Each setting of this register outside of this block
2666 must be marked in the set of kills in this block. */
2667 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2670 /* FIXME: CLOBBER? */
2676 /* Compute the reaching definitions as in
2677 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2678 Chapter 10. It is the same algorithm as used for computing available
2679 expressions but applied to the gens and kills of reaching definitions. */
2684 int bb, changed, passes;
2686 for (bb = 0; bb < n_basic_blocks; bb++)
2687 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2694 for (bb = 0; bb < n_basic_blocks; bb++)
2696 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2697 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2698 reaching_defs[bb], rd_kill[bb]);
2704 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2707 /* Classic GCSE available expression support. */
2709 /* Allocate memory for available expression computation. */
2712 alloc_avail_expr_mem (n_blocks, n_exprs)
2713 int n_blocks, n_exprs;
2715 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2716 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2718 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2719 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2721 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2722 sbitmap_vector_zero (ae_in, n_basic_blocks);
2724 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2725 sbitmap_vector_zero (ae_out, n_basic_blocks);
2727 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2728 sbitmap_ones (u_bitmap);
2732 free_avail_expr_mem ()
2741 /* Compute the set of available expressions generated in each basic block. */
2748 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2749 This is all we have to do because an expression is not recorded if it
2750 is not available, and the only expressions we want to work with are the
2751 ones that are recorded. */
2753 for (i = 0; i < expr_hash_table_size; i++)
2755 struct expr *expr = expr_hash_table[i];
2756 while (expr != NULL)
2758 struct occr *occr = expr->avail_occr;
2759 while (occr != NULL)
2761 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2764 expr = expr->next_same_hash;
2769 /* Return non-zero if expression X is killed in BB. */
2772 expr_killed_p (x, bb)
2780 /* repeat is used to turn tail-recursion into iteration. */
2786 code = GET_CODE (x);
2790 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2793 if (mem_set_in_block[bb])
2813 i = GET_RTX_LENGTH (code) - 1;
2814 fmt = GET_RTX_FORMAT (code);
2819 rtx tem = XEXP (x, i);
2821 /* If we are about to do the last recursive call
2822 needed at this level, change it into iteration.
2823 This function is called enough to be worth it. */
2829 if (expr_killed_p (tem, bb))
2832 else if (fmt[i] == 'E')
2835 for (j = 0; j < XVECLEN (x, i); j++)
2837 if (expr_killed_p (XVECEXP (x, i, j), bb))
2846 /* Compute the set of available expressions killed in each basic block. */
2853 for (bb = 0; bb < n_basic_blocks; bb++)
2855 for (i = 0; i < expr_hash_table_size; i++)
2857 struct expr *expr = expr_hash_table[i];
2859 for ( ; expr != NULL; expr = expr->next_same_hash)
2861 /* Skip EXPR if generated in this block. */
2862 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2865 if (expr_killed_p (expr->expr, bb))
2866 SET_BIT (ae_kill[bb], expr->bitmap_index);
2872 /* Compute available expressions.
2874 Implement the algorithm to find available expressions
2875 as given in the Aho Sethi Ullman book, pages 627-631. */
2878 compute_available ()
2880 int bb, changed, passes;
2882 sbitmap_zero (ae_in[0]);
2884 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2886 for (bb = 1; bb < n_basic_blocks; bb++)
2887 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2894 for (bb = 1; bb < n_basic_blocks; bb++)
2896 sbitmap_intersection_of_preds (ae_in[bb], ae_out, bb);
2897 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2898 ae_in[bb], ae_kill[bb]);
2904 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2907 /* Actually perform the Classic GCSE optimizations. */
2909 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2911 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2912 as a positive reach. We want to do this when there are two computations
2913 of the expression in the block.
2915 VISITED is a pointer to a working buffer for tracking which BB's have
2916 been visited. It is NULL for the top-level call.
2918 We treat reaching expressions that go through blocks containing the same
2919 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2920 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2921 2 as not reaching. The intent is to improve the probability of finding
2922 only one reaching expression and to reduce register lifetimes by picking
2923 the closest such expression. */
2926 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2930 int check_self_loop;
2935 if (visited == NULL)
2937 visited = (char *) alloca (n_basic_blocks);
2938 bzero (visited, n_basic_blocks);
2941 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2943 int pred_bb = pred->src->index;
2945 if (visited[pred_bb])
2947 /* This predecessor has already been visited.
2951 else if (pred_bb == bb)
2953 /* BB loops on itself. */
2955 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2956 && BLOCK_NUM (occr->insn) == pred_bb)
2958 visited[pred_bb] = 1;
2960 /* Ignore this predecessor if it kills the expression. */
2961 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2962 visited[pred_bb] = 1;
2963 /* Does this predecessor generate this expression? */
2964 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2966 /* Is this the occurrence we're looking for?
2967 Note that there's only one generating occurrence per block
2968 so we just need to check the block number. */
2969 if (BLOCK_NUM (occr->insn) == pred_bb)
2971 visited[pred_bb] = 1;
2973 /* Neither gen nor kill. */
2976 visited[pred_bb] = 1;
2977 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2982 /* All paths have been checked. */
2986 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2987 If there is more than one such instruction, return NULL.
2989 Called only by handle_avail_expr. */
2992 computing_insn (expr, insn)
2996 int bb = BLOCK_NUM (insn);
2998 if (expr->avail_occr->next == NULL)
3000 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3002 /* The available expression is actually itself
3003 (i.e. a loop in the flow graph) so do nothing. */
3006 /* (FIXME) Case that we found a pattern that was created by
3007 a substitution that took place. */
3008 return expr->avail_occr->insn;
3012 /* Pattern is computed more than once.
3013 Search backwards from this insn to see how many of these
3014 computations actually reach this insn. */
3016 rtx insn_computes_expr = NULL;
3019 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3021 if (BLOCK_NUM (occr->insn) == bb)
3023 /* The expression is generated in this block.
3024 The only time we care about this is when the expression
3025 is generated later in the block [and thus there's a loop].
3026 We let the normal cse pass handle the other cases. */
3027 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3029 if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
3034 insn_computes_expr = occr->insn;
3038 else /* Computation of the pattern outside this block. */
3040 if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
3045 insn_computes_expr = occr->insn;
3050 if (insn_computes_expr == NULL)
3052 return insn_computes_expr;
3056 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3057 Only called by can_disregard_other_sets. */
3060 def_reaches_here_p (insn, def_insn)
3065 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3068 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3070 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3072 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3074 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3075 reg = XEXP (PATTERN (def_insn), 0);
3076 else if (GET_CODE (PATTERN (def_insn)) == SET)
3077 reg = SET_DEST (PATTERN (def_insn));
3080 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3089 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3090 The value returned is the number of definitions that reach INSN.
3091 Returning a value of zero means that [maybe] more than one definition
3092 reaches INSN and the caller can't perform whatever optimization it is
3093 trying. i.e. it is always safe to return zero. */
3096 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3097 struct reg_set **addr_this_reg;
3101 int number_of_reaching_defs = 0;
3102 struct reg_set *this_reg = *addr_this_reg;
3106 if (def_reaches_here_p (insn, this_reg->insn))
3108 number_of_reaching_defs++;
3109 /* Ignore parallels for now. */
3110 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3113 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3114 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3115 SET_SRC (PATTERN (insn)))))
3117 /* A setting of the reg to a different value reaches INSN. */
3120 if (number_of_reaching_defs > 1)
3122 /* If in this setting the value the register is being
3123 set to is equal to the previous value the register
3124 was set to and this setting reaches the insn we are
3125 trying to do the substitution on then we are ok. */
3127 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3129 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3130 SET_SRC (PATTERN (insn))))
3133 *addr_this_reg = this_reg;
3136 /* prev_this_reg = this_reg; */
3137 this_reg = this_reg->next;
3140 return number_of_reaching_defs;
3143 /* Expression computed by insn is available and the substitution is legal,
3144 so try to perform the substitution.
3146 The result is non-zero if any changes were made. */
3149 handle_avail_expr (insn, expr)
3153 rtx pat, insn_computes_expr;
3155 struct reg_set *this_reg;
3156 int found_setting, use_src;
3159 /* We only handle the case where one computation of the expression
3160 reaches this instruction. */
3161 insn_computes_expr = computing_insn (expr, insn);
3162 if (insn_computes_expr == NULL)
3168 /* At this point we know only one computation of EXPR outside of this
3169 block reaches this insn. Now try to find a register that the
3170 expression is computed into. */
3172 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3174 /* This is the case when the available expression that reaches
3175 here has already been handled as an available expression. */
3176 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3177 /* If the register was created by GCSE we can't use `reg_set_table',
3178 however we know it's set only once. */
3179 if (regnum_for_replacing >= max_gcse_regno
3180 /* If the register the expression is computed into is set only once,
3181 or only one set reaches this insn, we can use it. */
3182 || (((this_reg = reg_set_table[regnum_for_replacing]),
3183 this_reg->next == NULL)
3184 || can_disregard_other_sets (&this_reg, insn, 0)))
3193 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3194 /* This shouldn't happen. */
3195 if (regnum_for_replacing >= max_gcse_regno)
3197 this_reg = reg_set_table[regnum_for_replacing];
3198 /* If the register the expression is computed into is set only once,
3199 or only one set reaches this insn, use it. */
3200 if (this_reg->next == NULL
3201 || can_disregard_other_sets (&this_reg, insn, 0))
3207 pat = PATTERN (insn);
3209 to = SET_SRC (PATTERN (insn_computes_expr));
3211 to = SET_DEST (PATTERN (insn_computes_expr));
3212 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3214 /* We should be able to ignore the return code from validate_change but
3215 to play it safe we check. */
3219 if (gcse_file != NULL)
3221 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3222 INSN_UID (insn), REGNO (to),
3223 use_src ? "from" : "set in",
3224 INSN_UID (insn_computes_expr));
3229 /* The register that the expr is computed into is set more than once. */
3230 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3232 /* Insert an insn after insnx that copies the reg set in insnx
3233 into a new pseudo register call this new register REGN.
3234 From insnb until end of basic block or until REGB is set
3235 replace all uses of REGB with REGN. */
3238 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3240 /* Generate the new insn. */
3241 /* ??? If the change fails, we return 0, even though we created
3242 an insn. I think this is ok. */
3244 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3245 SET_DEST (PATTERN (insn_computes_expr))),
3246 insn_computes_expr);
3247 /* Keep block number table up to date. */
3248 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3249 /* Keep register set table up to date. */
3250 record_one_set (REGNO (to), new_insn);
3252 gcse_create_count++;
3253 if (gcse_file != NULL)
3255 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3256 INSN_UID (NEXT_INSN (insn_computes_expr)),
3257 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3258 INSN_UID (insn_computes_expr));
3259 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
3262 pat = PATTERN (insn);
3264 /* Do register replacement for INSN. */
3265 changed = validate_change (insn, &SET_SRC (pat),
3266 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3269 /* We should be able to ignore the return code from validate_change but
3270 to play it safe we check. */
3274 if (gcse_file != NULL)
3276 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3278 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3279 INSN_UID (insn_computes_expr));
3288 /* Perform classic GCSE.
3289 This is called by one_classic_gcse_pass after all the dataflow analysis
3292 The result is non-zero if a change was made. */
3300 /* Note we start at block 1. */
3303 for (bb = 1; bb < n_basic_blocks; bb++)
3305 /* Reset tables used to keep track of what's still valid [since the
3306 start of the block]. */
3307 reset_opr_set_tables ();
3309 for (insn = BLOCK_HEAD (bb);
3310 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3311 insn = NEXT_INSN (insn))
3313 /* Is insn of form (set (pseudo-reg) ...)? */
3315 if (GET_CODE (insn) == INSN
3316 && GET_CODE (PATTERN (insn)) == SET
3317 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3318 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3320 rtx pat = PATTERN (insn);
3321 rtx src = SET_SRC (pat);
3324 if (want_to_gcse_p (src)
3325 /* Is the expression recorded? */
3326 && ((expr = lookup_expr (src)) != NULL)
3327 /* Is the expression available [at the start of the
3329 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3330 /* Are the operands unchanged since the start of the
3332 && oprs_not_set_p (src, insn))
3333 changed |= handle_avail_expr (insn, expr);
3336 /* Keep track of everything modified by this insn. */
3337 /* ??? Need to be careful w.r.t. mods done to INSN. */
3338 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3339 mark_oprs_set (insn);
3346 /* Top level routine to perform one classic GCSE pass.
3348 Return non-zero if a change was made. */
3351 one_classic_gcse_pass (pass)
3356 gcse_subst_count = 0;
3357 gcse_create_count = 0;
3359 alloc_expr_hash_table (max_cuid);
3360 alloc_rd_mem (n_basic_blocks, max_cuid);
3361 compute_expr_hash_table ();
3363 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3364 expr_hash_table_size, n_exprs);
3369 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3372 compute_available ();
3373 changed = classic_gcse ();
3374 free_avail_expr_mem ();
3377 free_expr_hash_table ();
3381 fprintf (gcse_file, "\n");
3382 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3383 current_function_name, pass,
3384 bytes_used, gcse_subst_count, gcse_create_count);
3390 /* Compute copy/constant propagation working variables. */
3392 /* Local properties of assignments. */
3394 static sbitmap *cprop_pavloc;
3395 static sbitmap *cprop_absaltered;
3397 /* Global properties of assignments (computed from the local properties). */
3399 static sbitmap *cprop_avin;
3400 static sbitmap *cprop_avout;
3402 /* Allocate vars used for copy/const propagation.
3403 N_BLOCKS is the number of basic blocks.
3404 N_SETS is the number of sets. */
3407 alloc_cprop_mem (n_blocks, n_sets)
3408 int n_blocks, n_sets;
3410 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3411 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3413 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3414 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3417 /* Free vars used by copy/const propagation. */
3422 free (cprop_pavloc);
3423 free (cprop_absaltered);
3428 /* For each block, compute whether X is transparent.
3429 X is either an expression or an assignment [though we don't care which,
3430 for this context an assignment is treated as an expression].
3431 For each block where an element of X is modified, set (SET_P == 1) or reset
3432 (SET_P == 0) the INDX bit in BMAP. */
3435 compute_transp (x, indx, bmap, set_p)
3445 /* repeat is used to turn tail-recursion into iteration. */
3451 code = GET_CODE (x);
3457 int regno = REGNO (x);
3461 if (regno < FIRST_PSEUDO_REGISTER)
3463 for (bb = 0; bb < n_basic_blocks; bb++)
3464 if (TEST_BIT (reg_set_in_block[bb], regno))
3465 SET_BIT (bmap[bb], indx);
3469 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3471 bb = BLOCK_NUM (r->insn);
3472 SET_BIT (bmap[bb], indx);
3478 if (regno < FIRST_PSEUDO_REGISTER)
3480 for (bb = 0; bb < n_basic_blocks; bb++)
3481 if (TEST_BIT (reg_set_in_block[bb], regno))
3482 RESET_BIT (bmap[bb], indx);
3486 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3488 bb = BLOCK_NUM (r->insn);
3489 RESET_BIT (bmap[bb], indx);
3499 for (bb = 0; bb < n_basic_blocks; bb++)
3500 if (mem_set_in_block[bb])
3501 SET_BIT (bmap[bb], indx);
3505 for (bb = 0; bb < n_basic_blocks; bb++)
3506 if (mem_set_in_block[bb])
3507 RESET_BIT (bmap[bb], indx);
3527 i = GET_RTX_LENGTH (code) - 1;
3528 fmt = GET_RTX_FORMAT (code);
3533 rtx tem = XEXP (x, i);
3535 /* If we are about to do the last recursive call
3536 needed at this level, change it into iteration.
3537 This function is called enough to be worth it. */
3543 compute_transp (tem, indx, bmap, set_p);
3545 else if (fmt[i] == 'E')
3548 for (j = 0; j < XVECLEN (x, i); j++)
3549 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3554 /* Compute the available expressions at the start and end of each
3555 basic block for cprop. This particular dataflow equation is
3556 used often enough that we might want to generalize it and make
3557 as a subroutine for other global optimizations that need available
3558 in/out information. */
3560 compute_cprop_avinout ()
3562 int bb, changed, passes;
3564 sbitmap_zero (cprop_avin[0]);
3565 sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3572 for (bb = 0; bb < n_basic_blocks; bb++)
3575 sbitmap_intersection_of_preds (cprop_avin[bb], cprop_avout, bb);
3576 changed |= sbitmap_union_of_diff (cprop_avout[bb],
3579 cprop_absaltered[bb]);
3585 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3588 /* Top level routine to do the dataflow analysis needed by copy/const
3592 compute_cprop_data ()
3594 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3595 compute_cprop_avinout ();
3598 /* Copy/constant propagation. */
3600 /* Maximum number of register uses in an insn that we handle. */
3603 /* Table of uses found in an insn.
3604 Allocated statically to avoid alloc/free complexity and overhead. */
3605 static struct reg_use reg_use_table[MAX_USES];
3607 /* Index into `reg_use_table' while building it. */
3608 static int reg_use_count;
3610 /* Set up a list of register numbers used in INSN.
3611 The found uses are stored in `reg_use_table'.
3612 `reg_use_count' is initialized to zero before entry, and
3613 contains the number of uses in the table upon exit.
3615 ??? If a register appears multiple times we will record it multiple
3616 times. This doesn't hurt anything but it will slow things down. */
3626 /* repeat is used to turn tail-recursion into iteration. */
3632 code = GET_CODE (x);
3636 if (reg_use_count == MAX_USES)
3638 reg_use_table[reg_use_count].reg_rtx = x;
3656 case ASM_INPUT: /*FIXME*/
3660 if (GET_CODE (SET_DEST (x)) == MEM)
3661 find_used_regs (SET_DEST (x));
3669 /* Recursively scan the operands of this expression. */
3671 fmt = GET_RTX_FORMAT (code);
3672 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3676 /* If we are about to do the last recursive call
3677 needed at this level, change it into iteration.
3678 This function is called enough to be worth it. */
3684 find_used_regs (XEXP (x, i));
3686 else if (fmt[i] == 'E')
3689 for (j = 0; j < XVECLEN (x, i); j++)
3690 find_used_regs (XVECEXP (x, i, j));
3695 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3696 Returns non-zero is successful. */
3699 try_replace_reg (from, to, insn)
3702 /* If this fails we could try to simplify the result of the
3703 replacement and attempt to recognize the simplified insn.
3705 But we need a general simplify_rtx that doesn't have pass
3706 specific state variables. I'm not aware of one at the moment. */
3707 return validate_replace_src (from, to, insn);
3710 /* Find a set of REGNO that is available on entry to INSN's block.
3711 Returns NULL if not found. */
3713 static struct expr *
3714 find_avail_set (regno, insn)
3718 /* SET1 contains the last set found that can be returned to the caller for
3719 use in a substitution. */
3720 struct expr *set1 = 0;
3722 /* Loops are not possible here. To get a loop we would need two sets
3723 available at the start of the block containing INSN. ie we would
3724 need two sets like this available at the start of the block:
3726 (set (reg X) (reg Y))
3727 (set (reg Y) (reg X))
3729 This can not happen since the set of (reg Y) would have killed the
3730 set of (reg X) making it unavailable at the start of this block. */
3734 struct expr *set = lookup_set (regno, NULL_RTX);
3736 /* Find a set that is available at the start of the block
3737 which contains INSN. */
3740 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3742 set = next_set (regno, set);
3745 /* If no available set was found we've reached the end of the
3746 (possibly empty) copy chain. */
3750 if (GET_CODE (set->expr) != SET)
3753 src = SET_SRC (set->expr);
3755 /* We know the set is available.
3756 Now check that SRC is ANTLOC (i.e. none of the source operands
3757 have changed since the start of the block).
3759 If the source operand changed, we may still use it for the next
3760 iteration of this loop, but we may not use it for substitutions. */
3761 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3764 /* If the source of the set is anything except a register, then
3765 we have reached the end of the copy chain. */
3766 if (GET_CODE (src) != REG)
3769 /* Follow the copy chain, ie start another iteration of the loop
3770 and see if we have an available copy into SRC. */
3771 regno = REGNO (src);
3774 /* SET1 holds the last set that was available and anticipatable at
3779 /* Subroutine of cprop_insn that tries to propagate constants into
3780 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3781 that we can use for substitutions.
3782 REG_USED is the use we will try to replace, SRC is the constant we
3783 will try to substitute for it.
3784 Returns nonzero if a change was made. */
3786 cprop_jump (insn, copy, reg_used, src)
3788 struct reg_use *reg_used;
3791 rtx set = PATTERN (copy);
3794 /* Replace the register with the appropriate constant. */
3795 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3797 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3798 GET_MODE (SET_SRC (set)),
3799 GET_MODE (XEXP (SET_SRC (set), 0)),
3800 XEXP (SET_SRC (set), 0),
3801 XEXP (SET_SRC (set), 1),
3802 XEXP (SET_SRC (set), 2));
3804 /* If no simplification can be made, then try the next
3809 SET_SRC (set) = temp;
3811 /* That may have changed the structure of TEMP, so
3812 force it to be rerecognized if it has not turned
3813 into a nop or unconditional jump. */
3815 INSN_CODE (copy) = -1;
3816 if ((SET_DEST (set) == pc_rtx
3817 && (SET_SRC (set) == pc_rtx
3818 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3819 || recog (PATTERN (copy), copy, NULL) >= 0)
3821 /* This has either become an unconditional jump
3822 or a nop-jump. We'd like to delete nop jumps
3823 here, but doing so confuses gcse. So we just
3824 make the replacement and let later passes
3826 PATTERN (insn) = set;
3827 INSN_CODE (insn) = -1;
3829 /* One less use of the label this insn used to jump to
3830 if we turned this into a NOP jump. */
3831 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3832 --LABEL_NUSES (JUMP_LABEL (insn));
3834 /* If this has turned into an unconditional jump,
3835 then put a barrier after it so that the unreachable
3836 code will be deleted. */
3837 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3838 emit_barrier_after (insn);
3840 run_jump_opt_after_gcse = 1;
3843 if (gcse_file != NULL)
3845 int regno = REGNO (reg_used->reg_rtx);
3846 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3847 regno, INSN_UID (insn));
3848 print_rtl (gcse_file, src);
3849 fprintf (gcse_file, "\n");
3857 /* Subroutine of cprop_insn that tries to propagate constants into
3858 JUMP_INSNS for machines that have CC0. INSN is a single set that
3859 stores into CC0; the insn following it is a conditional jump.
3860 REG_USED is the use we will try to replace, SRC is the constant we
3861 will try to substitute for it.
3862 Returns nonzero if a change was made. */
3864 cprop_cc0_jump (insn, reg_used, src)
3866 struct reg_use *reg_used;
3869 rtx jump = NEXT_INSN (insn);
3870 rtx copy = copy_rtx (jump);
3871 rtx set = PATTERN (copy);
3873 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3874 substitute into it. */
3875 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3876 if (! cprop_jump (jump, copy, reg_used, src))
3879 /* If we succeeded, delete the cc0 setter. */
3880 PUT_CODE (insn, NOTE);
3881 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3882 NOTE_SOURCE_FILE (insn) = 0;
3887 /* Perform constant and copy propagation on INSN.
3888 The result is non-zero if a change was made. */
3891 cprop_insn (insn, alter_jumps)
3895 struct reg_use *reg_used;
3898 /* Only propagate into SETs. Note that a conditional jump is a
3899 SET with pc_rtx as the destination. */
3900 if ((GET_CODE (insn) != INSN
3901 && GET_CODE (insn) != JUMP_INSN)
3902 || GET_CODE (PATTERN (insn)) != SET)
3906 find_used_regs (PATTERN (insn));
3908 reg_used = ®_use_table[0];
3909 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3913 int regno = REGNO (reg_used->reg_rtx);
3915 /* Ignore registers created by GCSE.
3916 We do this because ... */
3917 if (regno >= max_gcse_regno)
3920 /* If the register has already been set in this block, there's
3921 nothing we can do. */
3922 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3925 /* Find an assignment that sets reg_used and is available
3926 at the start of the block. */
3927 set = find_avail_set (regno, insn);
3932 /* ??? We might be able to handle PARALLELs. Later. */
3933 if (GET_CODE (pat) != SET)
3935 src = SET_SRC (pat);
3937 /* Constant propagation. */
3938 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3939 || GET_CODE (src) == SYMBOL_REF)
3941 /* Handle normal insns first. */
3942 if (GET_CODE (insn) == INSN
3943 && try_replace_reg (reg_used->reg_rtx, src, insn))
3947 if (gcse_file != NULL)
3949 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3950 regno, INSN_UID (insn));
3951 print_rtl (gcse_file, src);
3952 fprintf (gcse_file, "\n");
3955 /* The original insn setting reg_used may or may not now be
3956 deletable. We leave the deletion to flow. */
3959 /* Try to propagate a CONST_INT into a conditional jump.
3960 We're pretty specific about what we will handle in this
3961 code, we can extend this as necessary over time.
3963 Right now the insn in question must look like
3964 (set (pc) (if_then_else ...)) */
3965 else if (alter_jumps
3966 && GET_CODE (insn) == JUMP_INSN
3967 && condjump_p (insn)
3968 && ! simplejump_p (insn))
3969 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3971 /* Similar code for machines that use a pair of CC0 setter and
3972 conditional jump insn. */
3973 else if (alter_jumps
3974 && GET_CODE (PATTERN (insn)) == SET
3975 && SET_DEST (PATTERN (insn)) == cc0_rtx
3976 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3977 && condjump_p (NEXT_INSN (insn))
3978 && ! simplejump_p (NEXT_INSN (insn)))
3979 changed |= cprop_cc0_jump (insn, reg_used, src);
3982 else if (GET_CODE (src) == REG
3983 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3984 && REGNO (src) != regno)
3986 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3990 if (gcse_file != NULL)
3992 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3993 regno, INSN_UID (insn), REGNO (src));
3996 /* The original insn setting reg_used may or may not now be
3997 deletable. We leave the deletion to flow. */
3998 /* FIXME: If it turns out that the insn isn't deletable,
3999 then we may have unnecessarily extended register lifetimes
4000 and made things worse. */
4008 /* Forward propagate copies.
4009 This includes copies and constants.
4010 Return non-zero if a change was made. */
4019 /* Note we start at block 1. */
4022 for (bb = 1; bb < n_basic_blocks; bb++)
4024 /* Reset tables used to keep track of what's still valid [since the
4025 start of the block]. */
4026 reset_opr_set_tables ();
4028 for (insn = BLOCK_HEAD (bb);
4029 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4030 insn = NEXT_INSN (insn))
4032 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4034 changed |= cprop_insn (insn, alter_jumps);
4036 /* Keep track of everything modified by this insn. */
4037 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4038 call mark_oprs_set if we turned the insn into a NOTE. */
4039 if (GET_CODE (insn) != NOTE)
4040 mark_oprs_set (insn);
4045 if (gcse_file != NULL)
4046 fprintf (gcse_file, "\n");
4051 /* Perform one copy/constant propagation pass.
4052 F is the first insn in the function.
4053 PASS is the pass count. */
4056 one_cprop_pass (pass, alter_jumps)
4062 const_prop_count = 0;
4063 copy_prop_count = 0;
4065 alloc_set_hash_table (max_cuid);
4066 compute_set_hash_table ();
4068 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4072 alloc_cprop_mem (n_basic_blocks, n_sets);
4073 compute_cprop_data ();
4074 changed = cprop (alter_jumps);
4077 free_set_hash_table ();
4081 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4082 current_function_name, pass,
4083 bytes_used, const_prop_count, copy_prop_count);
4084 fprintf (gcse_file, "\n");
4090 /* Compute PRE+LCM working variables. */
4092 /* Local properties of expressions. */
4093 /* Nonzero for expressions that are transparent in the block. */
4094 static sbitmap *transp;
4096 /* Nonzero for expressions that are transparent at the end of the block.
4097 This is only zero for expressions killed by abnormal critical edge
4098 created by a calls. */
4099 static sbitmap *transpout;
4101 /* Nonzero for expressions that are computed (available) in the block. */
4102 static sbitmap *comp;
4104 /* Nonzero for expressions that are locally anticipatable in the block. */
4105 static sbitmap *antloc;
4107 /* Nonzero for expressions where this block is an optimal computation
4109 static sbitmap *pre_optimal;
4111 /* Nonzero for expressions which are redundant in a particular block. */
4112 static sbitmap *pre_redundant;
4114 static sbitmap *temp_bitmap;
4116 /* Redundant insns. */
4117 static sbitmap pre_redundant_insns;
4119 /* Allocate vars used for PRE analysis. */
4122 alloc_pre_mem (n_blocks, n_exprs)
4123 int n_blocks, n_exprs;
4125 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4126 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4127 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4129 temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4130 pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs);
4131 pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs);
4132 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4135 /* Free vars used for PRE analysis. */
4146 free (pre_redundant);
4150 /* Top level routine to do the dataflow analysis needed by PRE. */
4155 compute_local_properties (transp, comp, antloc, 0);
4156 compute_transpout ();
4157 pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp,
4158 antloc, pre_redundant, pre_optimal);
4164 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4167 VISITED is a pointer to a working buffer for tracking which BB's have
4168 been visited. It is NULL for the top-level call.
4170 CHECK_PRE_COMP controls whether or not we check for a computation of
4173 We treat reaching expressions that go through blocks containing the same
4174 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4175 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4176 2 as not reaching. The intent is to improve the probability of finding
4177 only one reaching expression and to reduce register lifetimes by picking
4178 the closest such expression. */
4181 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp, visited)
4190 if (visited == NULL)
4192 visited = (char *) alloca (n_basic_blocks);
4193 bzero (visited, n_basic_blocks);
4196 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4198 int pred_bb = pred->src->index;
4200 if (pred->src == ENTRY_BLOCK_PTR
4201 /* Has predecessor has already been visited? */
4202 || visited[pred_bb])
4204 /* Nothing to do. */
4206 /* Does this predecessor generate this expression? */
4207 else if ((!check_pre_comp && occr_bb == pred_bb)
4208 || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4210 /* Is this the occurrence we're looking for?
4211 Note that there's only one generating occurrence per block
4212 so we just need to check the block number. */
4213 if (occr_bb == pred_bb)
4215 visited[pred_bb] = 1;
4217 /* Ignore this predecessor if it kills the expression. */
4218 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4219 visited[pred_bb] = 1;
4220 /* Neither gen nor kill. */
4223 visited[pred_bb] = 1;
4224 if (pre_expr_reaches_here_p (occr_bb, expr, pred_bb,
4225 check_pre_comp, visited))
4230 /* All paths have been checked. */
4234 /* Add EXPR to the end of basic block BB.
4236 This is used by both the PRE and code hoisting.
4238 For PRE, we want to verify that the expr is either transparent
4239 or locally anticipatable in the target block. This check makes
4240 no sense for code hoisting. */
4243 insert_insn_end_bb (expr, bb, pre)
4248 rtx insn = BLOCK_END (bb);
4250 rtx reg = expr->reaching_reg;
4251 int regno = REGNO (reg);
4252 rtx pat, copied_expr;
4256 copied_expr = copy_rtx (expr->expr);
4257 emit_move_insn (reg, copied_expr);
4258 first_new_insn = get_insns ();
4259 pat = gen_sequence ();
4262 /* If the last insn is a jump, insert EXPR in front [taking care to
4263 handle cc0, etc. properly]. */
4265 if (GET_CODE (insn) == JUMP_INSN)
4271 /* If this is a jump table, then we can't insert stuff here. Since
4272 we know the previous real insn must be the tablejump, we insert
4273 the new instruction just before the tablejump. */
4274 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4275 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4276 insn = prev_real_insn (insn);
4279 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4280 if cc0 isn't set. */
4281 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4283 insn = XEXP (note, 0);
4286 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4287 if (maybe_cc0_setter
4288 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4289 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4290 insn = maybe_cc0_setter;
4293 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4294 new_insn = emit_insn_before (pat, insn);
4295 if (BLOCK_HEAD (bb) == insn)
4296 BLOCK_HEAD (bb) = new_insn;
4298 /* Likewise if the last insn is a call, as will happen in the presence
4299 of exception handling. */
4300 else if (GET_CODE (insn) == CALL_INSN)
4302 HARD_REG_SET parm_regs;
4306 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4307 we search backward and place the instructions before the first
4308 parameter is loaded. Do this for everyone for consistency and a
4309 presumtion that we'll get better code elsewhere as well. */
4311 /* It should always be the case that we can put these instructions
4312 anywhere in the basic block with performing PRE optimizations.
4315 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4316 && !TEST_BIT (transp[bb], expr->bitmap_index))
4319 /* Since different machines initialize their parameter registers
4320 in different orders, assume nothing. Collect the set of all
4321 parameter registers. */
4322 CLEAR_HARD_REG_SET (parm_regs);
4324 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4325 if (GET_CODE (XEXP (p, 0)) == USE
4326 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4328 int regno = REGNO (XEXP (XEXP (p, 0), 0));
4329 if (regno >= FIRST_PSEUDO_REGISTER)
4331 SET_HARD_REG_BIT (parm_regs, regno);
4335 /* Search backward for the first set of a register in this set. */
4336 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4338 insn = PREV_INSN (insn);
4339 p = single_set (insn);
4340 if (p && GET_CODE (SET_DEST (p)) == REG
4341 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4342 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4344 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4349 /* If we found all the parameter loads, then we want to insert
4350 before the first parameter load.
4352 If we did not find all the parameter loads, then we might have
4353 stopped on the head of the block, which could be a CODE_LABEL.
4354 If we inserted before the CODE_LABEL, then we would be putting
4355 the insn in the wrong basic block. In that case, put the insn
4356 after the CODE_LABEL.
4358 ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here? */
4359 if (GET_CODE (insn) != CODE_LABEL)
4361 new_insn = emit_insn_before (pat, insn);
4362 if (BLOCK_HEAD (bb) == insn)
4363 BLOCK_HEAD (bb) = new_insn;
4367 new_insn = emit_insn_after (pat, insn);
4372 new_insn = emit_insn_after (pat, insn);
4373 BLOCK_END (bb) = new_insn;
4376 /* Keep block number table up to date.
4377 Note, PAT could be a multiple insn sequence, we have to make
4378 sure that each insn in the sequence is handled. */
4379 if (GET_CODE (pat) == SEQUENCE)
4383 for (i = 0; i < XVECLEN (pat, 0); i++)
4385 rtx insn = XVECEXP (pat, 0, i);
4386 set_block_num (insn, bb);
4387 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4388 add_label_notes (PATTERN (insn), new_insn);
4389 record_set_insn = insn;
4390 note_stores (PATTERN (insn), record_set_info);
4395 add_label_notes (SET_SRC (pat), new_insn);
4396 set_block_num (new_insn, bb);
4397 /* Keep register set table up to date. */
4398 record_one_set (regno, new_insn);
4401 gcse_create_count++;
4405 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4406 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4410 /* Insert partially redundant expressions at the ends of appropriate basic
4411 blocks making them fully redundant. */
4414 pre_insert (index_map)
4415 struct expr **index_map;
4417 int bb, i, set_size;
4420 /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT.
4421 Where INSERT is nonzero, we add the expression at the end of the basic
4422 block if it reaches any of the deleted expressions. */
4424 set_size = pre_optimal[0]->size;
4425 inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
4426 sbitmap_vector_zero (inserted, n_basic_blocks);
4428 for (bb = 0; bb < n_basic_blocks; bb++)
4432 /* This computes the number of potential insertions we need. */
4433 sbitmap_not (temp_bitmap[bb], pre_redundant[bb]);
4434 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]);
4436 /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need
4437 to insert at the end of this basic block. */
4438 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4440 SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i];
4443 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4445 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4447 struct expr *expr = index_map[j];
4450 /* Now look at each deleted occurence of this expression. */
4451 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4453 if (! occr->deleted_p)
4456 /* Insert this expression at the end of BB if it would
4457 reach the deleted occurence. */
4458 if (!TEST_BIT (inserted[bb], j)
4459 && pre_expr_reaches_here_p (bb, expr,
4460 BLOCK_NUM (occr->insn), 0,
4463 SET_BIT (inserted[bb], j);
4464 insert_insn_end_bb (index_map[j], bb, 1);
4472 sbitmap_vector_free (inserted);
4475 /* Copy the result of INSN to REG.
4476 INDX is the expression number. */
4479 pre_insert_copy_insn (expr, insn)
4483 rtx reg = expr->reaching_reg;
4484 int regno = REGNO (reg);
4485 int indx = expr->bitmap_index;
4486 rtx set = single_set (insn);
4491 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4493 /* Keep block number table up to date. */
4494 set_block_num (new_insn, BLOCK_NUM (insn));
4495 /* Keep register set table up to date. */
4496 record_one_set (regno, new_insn);
4498 gcse_create_count++;
4502 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4503 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4507 /* Copy available expressions that reach the redundant expression
4508 to `reaching_reg'. */
4511 pre_insert_copies ()
4515 for (bb = 0; bb < n_basic_blocks; bb++)
4517 sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]);
4520 /* For each available expression in the table, copy the result to
4521 `reaching_reg' if the expression reaches a deleted one.
4523 ??? The current algorithm is rather brute force.
4524 Need to do some profiling. */
4526 for (i = 0; i < expr_hash_table_size; i++)
4530 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4534 /* If the basic block isn't reachable, PPOUT will be TRUE.
4535 However, we don't want to insert a copy here because the
4536 expression may not really be redundant. So only insert
4537 an insn if the expression was deleted.
4538 This test also avoids further processing if the expression
4539 wasn't deleted anywhere. */
4540 if (expr->reaching_reg == NULL)
4543 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4547 if (! occr->deleted_p)
4550 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4552 rtx insn = avail->insn;
4553 int bb = BLOCK_NUM (insn);
4555 if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index))
4558 /* No need to handle this one if handled already. */
4559 if (avail->copied_p)
4561 /* Don't handle this one if it's a redundant one. */
4562 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4564 /* Or if the expression doesn't reach the deleted one. */
4565 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4566 BLOCK_NUM (occr->insn),
4570 /* Copy the result of avail to reaching_reg. */
4571 pre_insert_copy_insn (expr, insn);
4572 avail->copied_p = 1;
4579 /* Delete redundant computations.
4580 Deletion is done by changing the insn to copy the `reaching_reg' of
4581 the expression into the result of the SET. It is left to later passes
4582 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4584 Returns non-zero if a change is made. */
4591 /* Compute the expressions which are redundant and need to be replaced by
4592 copies from the reaching reg to the target reg. */
4593 for (bb = 0; bb < n_basic_blocks; bb++)
4595 sbitmap_not (temp_bitmap[bb], pre_optimal[bb]);
4596 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]);
4600 for (i = 0; i < expr_hash_table_size; i++)
4604 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4607 int indx = expr->bitmap_index;
4609 /* We only need to search antic_occr since we require
4612 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4614 rtx insn = occr->insn;
4616 int bb = BLOCK_NUM (insn);
4618 if (TEST_BIT (temp_bitmap[bb], indx))
4620 set = single_set (insn);
4624 /* Create a pseudo-reg to store the result of reaching
4625 expressions into. Get the mode for the new pseudo
4626 from the mode of the original destination pseudo. */
4627 if (expr->reaching_reg == NULL)
4629 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4631 /* In theory this should never fail since we're creating
4634 However, on the x86 some of the movXX patterns actually
4635 contain clobbers of scratch regs. This may cause the
4636 insn created by validate_change to not match any pattern
4637 and thus cause validate_change to fail. */
4638 if (validate_change (insn, &SET_SRC (set),
4639 expr->reaching_reg, 0))
4641 occr->deleted_p = 1;
4642 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4650 "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4651 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4661 /* Perform GCSE optimizations using PRE.
4662 This is called by one_pre_gcse_pass after all the dataflow analysis
4665 This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4666 and lazy code motion from Knoop, Ruthing and Steffen as described in
4667 Advanced Compiler Design and Implementation.
4669 ??? A new pseudo reg is created to hold the reaching expression.
4670 The nice thing about the classical approach is that it would try to
4671 use an existing reg. If the register can't be adequately optimized
4672 [i.e. we introduce reload problems], one could add a pass here to
4673 propagate the new register through the block.
4675 ??? We don't handle single sets in PARALLELs because we're [currently]
4676 not able to copy the rest of the parallel when we insert copies to create
4677 full redundancies from partial redundancies. However, there's no reason
4678 why we can't handle PARALLELs in the cases where there are no partial
4686 struct expr **index_map;
4688 /* Compute a mapping from expression number (`bitmap_index') to
4689 hash table entry. */
4691 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4692 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4693 for (i = 0; i < expr_hash_table_size; i++)
4697 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4698 index_map[expr->bitmap_index] = expr;
4701 /* Reset bitmap used to track which insns are redundant. */
4702 pre_redundant_insns = sbitmap_alloc (max_cuid);
4703 sbitmap_zero (pre_redundant_insns);
4705 /* Delete the redundant insns first so that
4706 - we know what register to use for the new insns and for the other
4707 ones with reaching expressions
4708 - we know which insns are redundant when we go to create copies */
4709 changed = pre_delete ();
4711 /* Insert insns in places that make partially redundant expressions
4713 pre_insert (index_map);
4715 /* In other places with reaching expressions, copy the expression to the
4716 specially allocated pseudo-reg that reaches the redundant expression. */
4717 pre_insert_copies ();
4719 free (pre_redundant_insns);
4724 /* Top level routine to perform one PRE GCSE pass.
4726 Return non-zero if a change was made. */
4729 one_pre_gcse_pass (pass)
4734 gcse_subst_count = 0;
4735 gcse_create_count = 0;
4737 alloc_expr_hash_table (max_cuid);
4738 compute_expr_hash_table ();
4740 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4741 expr_hash_table_size, n_exprs);
4744 alloc_pre_mem (n_basic_blocks, n_exprs);
4745 compute_pre_data ();
4746 changed |= pre_gcse ();
4749 free_expr_hash_table ();
4753 fprintf (gcse_file, "\n");
4754 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4755 current_function_name, pass,
4756 bytes_used, gcse_subst_count, gcse_create_count);
4762 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4763 We have to add REG_LABEL notes, because the following loop optimization
4764 pass requires them. */
4766 /* ??? This is very similar to the loop.c add_label_notes function. We
4767 could probably share code here. */
4769 /* ??? If there was a jump optimization pass after gcse and before loop,
4770 then we would not need to do this here, because jump would add the
4771 necessary REG_LABEL notes. */
4774 add_label_notes (x, insn)
4778 enum rtx_code code = GET_CODE (x);
4782 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4784 /* This code used to ignore labels that referred to dispatch tables to
4785 avoid flow generating (slighly) worse code.
4787 We no longer ignore such label references (see LABEL_REF handling in
4788 mark_jump_label for additional information). */
4789 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4794 fmt = GET_RTX_FORMAT (code);
4795 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4798 add_label_notes (XEXP (x, i), insn);
4799 else if (fmt[i] == 'E')
4800 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4801 add_label_notes (XVECEXP (x, i, j), insn);
4805 /* Compute transparent outgoing information for each block.
4807 An expression is transparent to an edge unless it is killed by
4808 the edge itself. This can only happen with abnormal control flow,
4809 when the edge is traversed through a call. This happens with
4810 non-local labels and exceptions.
4812 This would not be necessary if we split the edge. While this is
4813 normally impossible for abnormal critical edges, with some effort
4814 it should be possible with exception handling, since we still have
4815 control over which handler should be invoked. But due to increased
4816 EH table sizes, this may not be worthwhile. */
4819 compute_transpout ()
4823 sbitmap_vector_ones (transpout, n_basic_blocks);
4825 for (bb = 0; bb < n_basic_blocks; ++bb)
4829 /* Note that flow inserted a nop a the end of basic blocks that
4830 end in call instructions for reasons other than abnormal
4832 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4835 for (i = 0; i < expr_hash_table_size; i++)
4838 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4839 if (GET_CODE (expr->expr) == MEM)
4841 rtx addr = XEXP (expr->expr, 0);
4843 if (GET_CODE (addr) == SYMBOL_REF
4844 && CONSTANT_POOL_ADDRESS_P (addr))
4847 /* ??? Optimally, we would use interprocedural alias
4848 analysis to determine if this mem is actually killed
4850 RESET_BIT (transpout[bb], expr->bitmap_index);
4856 /* Removal of useless null pointer checks */
4858 /* These need to be file static for communication between
4859 invalidate_nonnull_info and delete_null_pointer_checks. */
4860 static int current_block;
4861 static sbitmap *nonnull_local;
4862 static sbitmap *nonnull_killed;
4864 /* Called via note_stores. X is set by SETTER. If X is a register we must
4865 invalidate nonnull_local and set nonnull_killed.
4867 We ignore hard registers. */
4869 invalidate_nonnull_info (x, setter)
4871 rtx setter ATTRIBUTE_UNUSED;
4876 while (GET_CODE (x) == SUBREG)
4879 /* Ignore anything that is not a register or is a hard register. */
4880 if (GET_CODE (x) != REG
4881 || REGNO (x) < FIRST_PSEUDO_REGISTER)
4886 RESET_BIT (nonnull_local[current_block], regno);
4887 SET_BIT (nonnull_killed[current_block], regno);
4891 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
4894 This is conceptually similar to global constant/copy propagation and
4895 classic global CSE (it even uses the same dataflow equations as cprop).
4897 If a register is used as memory address with the form (mem (reg)), then we
4898 know that REG can not be zero at that point in the program. Any instruction
4899 which sets REG "kills" this property.
4901 So, if every path leading to a conditional branch has an available memory
4902 reference of that form, then we know the register can not have the value
4903 zero at the conditional branch.
4905 So we merely need to compute the local properies and propagate that data
4906 around the cfg, then optimize where possible.
4908 We run this pass two times. Once before CSE, then again after CSE. This
4909 has proven to be the most profitable approach. It is rare for new
4910 optimization opportunities of this nature to appear after the first CSE
4913 This could probably be integrated with global cprop with a little work. */
4916 delete_null_pointer_checks (f)
4919 int_list_ptr *s_preds, *s_succs;
4920 int *num_preds, *num_succs;
4922 sbitmap *nonnull_avin, *nonnull_avout;
4924 /* First break the program into basic blocks. */
4925 find_basic_blocks (f, max_reg_num (), NULL, 1);
4927 /* If we have only a single block, then there's nothing to do. */
4928 if (n_basic_blocks <= 1)
4930 /* Free storage allocated by find_basic_blocks. */
4931 free_basic_block_vars (0);
4935 /* Trying to perform global optimizations on flow graphs which have
4936 a high connectivity will take a long time and is unlikely to be
4937 particularly useful.
4939 In normal circumstances a cfg should have about twice has many edges
4940 as blocks. But we do not want to punish small functions which have
4941 a couple switch statements. So we require a relatively large number
4942 of basic blocks and the ratio of edges to blocks to be high. */
4943 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
4945 /* Free storage allocated by find_basic_blocks. */
4946 free_basic_block_vars (0);
4950 /* We need predecessor/successor lists as well as pred/succ counts for
4951 each basic block. */
4952 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4953 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4954 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
4955 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
4956 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
4958 /* Allocate bitmaps to hold local and global properties. */
4959 nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4960 nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4961 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4962 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4964 /* Compute local properties, nonnull and killed. A register will have
4965 the nonnull property if at the end of the current block its value is
4966 known to be nonnull. The killed property indicates that somewhere in
4967 the block any information we had about the register is killed.
4969 Note that a register can have both properties in a single block. That
4970 indicates that it's killed, then later in the block a new value is
4972 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4973 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4974 for (current_block = 0; current_block < n_basic_blocks; current_block++)
4976 rtx insn, stop_insn;
4978 /* Scan each insn in the basic block looking for memory references and
4980 stop_insn = NEXT_INSN (BLOCK_END (current_block));
4981 for (insn = BLOCK_HEAD (current_block);
4983 insn = NEXT_INSN (insn))
4987 /* Ignore anything that is not a normal insn. */
4988 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4991 /* Basically ignore anything that is not a simple SET. We do have
4992 to make sure to invalidate nonnull_local and set nonnull_killed
4993 for such insns though. */
4994 set = single_set (insn);
4997 note_stores (PATTERN (insn), invalidate_nonnull_info);
5001 /* See if we've got a useable memory load. We handle it first
5002 in case it uses its address register as a dest (which kills
5003 the nonnull property). */
5004 if (GET_CODE (SET_SRC (set)) == MEM
5005 && GET_CODE (XEXP (SET_SRC (set), 0)) == REG
5006 && REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER)
5007 SET_BIT (nonnull_local[current_block],
5008 REGNO (XEXP (SET_SRC (set), 0)));
5010 /* Now invalidate stuff clobbered by this insn. */
5011 note_stores (PATTERN (insn), invalidate_nonnull_info);
5013 /* And handle stores, we do these last since any sets in INSN can
5014 not kill the nonnull property if it is derived from a MEM
5015 appearing in a SET_DEST. */
5016 if (GET_CODE (SET_DEST (set)) == MEM
5017 && GET_CODE (XEXP (SET_DEST (set), 0)) == REG)
5018 SET_BIT (nonnull_local[current_block],
5019 REGNO (XEXP (SET_DEST (set), 0)));
5023 /* Now compute global properties based on the local properties. This
5024 is a classic global availablity algorithm. */
5025 sbitmap_zero (nonnull_avin[0]);
5026 sbitmap_vector_ones (nonnull_avout, n_basic_blocks);
5032 for (bb = 0; bb < n_basic_blocks; bb++)
5035 sbitmap_intersect_of_predecessors (nonnull_avin[bb],
5036 nonnull_avout, bb, s_preds);
5038 changed |= sbitmap_union_of_diff (nonnull_avout[bb],
5041 nonnull_killed[bb]);
5045 /* Now look at each bb and see if it ends with a compare of a value
5047 for (bb = 0; bb < n_basic_blocks; bb++)
5049 rtx last_insn = BLOCK_END (bb);
5050 rtx condition, earliest, reg;
5051 int compare_and_branch;
5053 /* We only want conditional branches. */
5054 if (GET_CODE (last_insn) != JUMP_INSN
5055 || !condjump_p (last_insn)
5056 || simplejump_p (last_insn))
5059 /* LAST_INSN is a conditional jump. Get its condition. */
5060 condition = get_condition (last_insn, &earliest);
5062 /* If we were unable to get the condition, or it is not a equality
5063 comparison against zero then there's nothing we can do. */
5065 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5066 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5067 || XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0))))
5070 /* We must be checking a register against zero. */
5071 reg = XEXP (condition, 0);
5072 if (GET_CODE (reg) != REG)
5075 /* Is the register known to have a nonzero value? */
5076 if (!TEST_BIT (nonnull_avout[bb], REGNO (reg)))
5079 /* Try to compute whether the compare/branch at the loop end is one or
5080 two instructions. */
5081 if (earliest == last_insn)
5082 compare_and_branch = 1;
5083 else if (earliest == prev_nonnote_insn (last_insn))
5084 compare_and_branch = 2;
5088 /* We know the register in this comparison is nonnull at exit from
5089 this block. We can optimize this comparison. */
5090 if (GET_CODE (condition) == NE)
5094 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5096 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5097 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5098 emit_barrier_after (new_jump);
5100 delete_insn (last_insn);
5101 if (compare_and_branch == 2)
5102 delete_insn (earliest);
5105 /* Free storage allocated by find_basic_blocks. */
5106 free_basic_block_vars (0);
5109 free (nonnull_local);
5110 free (nonnull_killed);
5111 free (nonnull_avin);
5112 free (nonnull_avout);
5115 /* Code Hoisting variables and subroutines. */
5117 /* Very busy expressions. */
5118 static sbitmap *hoist_vbein;
5119 static sbitmap *hoist_vbeout;
5121 /* Hoistable expressions. */
5122 static sbitmap *hoist_exprs;
5124 /* Dominator bitmaps. */
5125 static sbitmap *dominators;
5126 static sbitmap *post_dominators;
5128 /* ??? We could compute post dominators and run this algorithm in
5129 reverse to to perform tail merging, doing so would probably be
5130 more effective than the tail merging code in jump.c.
5132 It's unclear if tail merging could be run in parallel with
5133 code hoisting. It would be nice. */
5135 /* Allocate vars used for code hoisting analysis. */
5138 alloc_code_hoist_mem (n_blocks, n_exprs)
5139 int n_blocks, n_exprs;
5141 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5142 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5143 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5145 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5146 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5147 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5148 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5150 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5151 post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5154 /* Free vars used for code hoisting analysis. */
5157 free_code_hoist_mem ()
5164 free (hoist_vbeout);
5169 free (post_dominators);
5172 /* Compute the very busy expressions at entry/exit from each block.
5174 An expression is very busy if all paths from a given point
5175 compute the expression. */
5178 compute_code_hoist_vbeinout ()
5180 int bb, changed, passes;
5182 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5183 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5190 /* We scan the blocks in the reverse order to speed up
5192 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5194 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5195 hoist_vbeout[bb], transp[bb]);
5196 if (bb != n_basic_blocks - 1)
5197 sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
5204 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5207 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5210 compute_code_hoist_data ()
5212 compute_local_properties (transp, comp, antloc, 0);
5213 compute_transpout ();
5214 compute_code_hoist_vbeinout ();
5215 compute_flow_dominators (dominators, post_dominators);
5217 fprintf (gcse_file, "\n");
5220 /* Determine if the expression identified by EXPR_INDEX would
5221 reach BB unimpared if it was placed at the end of EXPR_BB.
5223 It's unclear exactly what Muchnick meant by "unimpared". It seems
5224 to me that the expression must either be computed or transparent in
5225 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5226 would allow the expression to be hoisted out of loops, even if
5227 the expression wasn't a loop invariant.
5229 Contrast this to reachability for PRE where an expression is
5230 considered reachable if *any* path reaches instead of *all*
5234 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5242 if (visited == NULL)
5244 visited = (char *) alloca (n_basic_blocks);
5245 bzero (visited, n_basic_blocks);
5248 visited[expr_bb] = 1;
5249 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5251 int pred_bb = pred->src->index;
5253 if (pred->src == ENTRY_BLOCK_PTR)
5255 else if (visited[pred_bb])
5257 /* Does this predecessor generate this expression? */
5258 else if (TEST_BIT (comp[pred_bb], expr_index))
5260 else if (! TEST_BIT (transp[pred_bb], expr_index))
5265 visited[pred_bb] = 1;
5266 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5272 return (pred == NULL);
5275 /* Actually perform code hoisting. */
5279 int bb, dominated, i;
5280 struct expr **index_map;
5282 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5284 /* Compute a mapping from expression number (`bitmap_index') to
5285 hash table entry. */
5287 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
5288 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
5289 for (i = 0; i < expr_hash_table_size; i++)
5293 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5294 index_map[expr->bitmap_index] = expr;
5297 /* Walk over each basic block looking for potentially hoistable
5298 expressions, nothing gets hoisted from the entry block. */
5299 for (bb = 0; bb < n_basic_blocks; bb++)
5302 int insn_inserted_p;
5304 /* Examine each expression that is very busy at the exit of this
5305 block. These are the potentially hoistable expressions. */
5306 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5309 if (TEST_BIT (hoist_vbeout[bb], i)
5310 && TEST_BIT (transpout[bb], i))
5312 /* We've found a potentially hoistable expression, now
5313 we look at every block BB dominates to see if it
5314 computes the expression. */
5315 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5317 /* Ignore self dominance. */
5319 || ! TEST_BIT (dominators[dominated], bb))
5322 /* We've found a dominated block, now see if it computes
5323 the busy expression and whether or not moving that
5324 expression to the "beginning" of that block is safe. */
5325 if (!TEST_BIT (antloc[dominated], i))
5328 /* Note if the expression would reach the dominated block
5329 unimpared if it was placed at the end of BB.
5331 Keep track of how many times this expression is hoistable
5332 from a dominated block into BB. */
5333 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5337 /* If we found more than one hoistable occurence of this
5338 expression, then note it in the bitmap of expressions to
5339 hoist. It makes no sense to hoist things which are computed
5340 in only one BB, and doing so tends to pessimize register
5341 allocation. One could increase this value to try harder
5342 to avoid any possible code expansion due to register
5343 allocation issues; however experiments have shown that
5344 the vast majority of hoistable expressions are only movable
5345 from two successors, so raising this threshhold is likely
5346 to nullify any benefit we get from code hoisting. */
5349 SET_BIT (hoist_exprs[bb], i);
5355 /* If we found nothing to hoist, then quit now. */
5359 /* Loop over all the hoistable expressions. */
5360 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5362 /* We want to insert the expression into BB only once, so
5363 note when we've inserted it. */
5364 insn_inserted_p = 0;
5366 /* These tests should be the same as the tests above. */
5367 if (TEST_BIT (hoist_vbeout[bb], i))
5369 /* We've found a potentially hoistable expression, now
5370 we look at every block BB dominates to see if it
5371 computes the expression. */
5372 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5374 /* Ignore self dominance. */
5376 || ! TEST_BIT (dominators[dominated], bb))
5379 /* We've found a dominated block, now see if it computes
5380 the busy expression and whether or not moving that
5381 expression to the "beginning" of that block is safe. */
5382 if (!TEST_BIT (antloc[dominated], i))
5385 /* The expression is computed in the dominated block and
5386 it would be safe to compute it at the start of the
5387 dominated block. Now we have to determine if the
5388 expresion would reach the dominated block if it was
5389 placed at the end of BB. */
5390 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5392 struct expr *expr = index_map[i];
5393 struct occr *occr = expr->antic_occr;
5398 /* Find the right occurence of this expression. */
5399 while (BLOCK_NUM (occr->insn) != dominated && occr)
5402 /* Should never happen. */
5408 set = single_set (insn);
5412 /* Create a pseudo-reg to store the result of reaching
5413 expressions into. Get the mode for the new pseudo
5414 from the mode of the original destination pseudo. */
5415 if (expr->reaching_reg == NULL)
5417 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5419 /* In theory this should never fail since we're creating
5422 However, on the x86 some of the movXX patterns actually
5423 contain clobbers of scratch regs. This may cause the
5424 insn created by validate_change to not match any
5425 pattern and thus cause validate_change to fail. */
5426 if (validate_change (insn, &SET_SRC (set),
5427 expr->reaching_reg, 0))
5429 occr->deleted_p = 1;
5430 if (!insn_inserted_p)
5432 insert_insn_end_bb (index_map[i], bb, 0);
5433 insn_inserted_p = 1;
5443 /* Top level routine to perform one code hoisting (aka unification) pass
5445 Return non-zero if a change was made. */
5448 one_code_hoisting_pass ()
5452 alloc_expr_hash_table (max_cuid);
5453 compute_expr_hash_table ();
5455 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5456 expr_hash_table_size, n_exprs);
5459 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5460 compute_code_hoist_data ();
5462 free_code_hoist_mem ();
5464 free_expr_hash_table ();