1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 People wishing to speed up the code here should read:
130 Elimination Algorithms for Data Flow Analysis
131 B.G. Ryder, M.C. Paull
132 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
134 How to Analyze Large Programs Efficiently and Informatively
135 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
136 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
138 People wishing to do something different can find various possibilities
139 in the above papers and elsewhere.
149 #include "hard-reg-set.h"
152 #include "insn-config.h"
154 #include "basic-block.h"
156 #include "function.h"
160 #define obstack_chunk_alloc gmalloc
161 #define obstack_chunk_free free
163 /* Maximum number of passes to perform. */
166 /* Propagate flow information through back edges and thus enable PRE's
167 moving loop invariant calculations out of loops.
169 Originally this tended to create worse overall code, but several
170 improvements during the development of PRE seem to have made following
171 back edges generally a win.
173 Note much of the loop invariant code motion done here would normally
174 be done by loop.c, which has more heuristics for when to move invariants
175 out of loops. At some point we might need to move some of those
176 heuristics into gcse.c. */
177 #define FOLLOW_BACK_EDGES 1
179 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
180 are a superset of those done by GCSE.
182 We perform the following steps:
184 1) Compute basic block information.
186 2) Compute table of places where registers are set.
188 3) Perform copy/constant propagation.
190 4) Perform global cse.
192 5) Perform another pass of copy/constant propagation.
194 Two passes of copy/constant propagation are done because the first one
195 enables more GCSE and the second one helps to clean up the copies that
196 GCSE creates. This is needed more for PRE than for Classic because Classic
197 GCSE will try to use an existing register containing the common
198 subexpression rather than create a new one. This is harder to do for PRE
199 because of the code motion (which Classic GCSE doesn't do).
201 Expressions we are interested in GCSE-ing are of the form
202 (set (pseudo-reg) (expression)).
203 Function want_to_gcse_p says what these are.
205 PRE handles moving invariant expressions out of loops (by treating them as
206 partially redundant).
208 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
209 assignment) based GVN (global value numbering). L. T. Simpson's paper
210 (Rice University) on value numbering is a useful reference for this.
212 **********************
214 We used to support multiple passes but there are diminishing returns in
215 doing so. The first pass usually makes 90% of the changes that are doable.
216 A second pass can make a few more changes made possible by the first pass.
217 Experiments show any further passes don't make enough changes to justify
220 A study of spec92 using an unlimited number of passes:
221 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
222 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
223 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
225 It was found doing copy propagation between each pass enables further
228 PRE is quite expensive in complicated functions because the DFA can take
229 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
230 be modified if one wants to experiment.
232 **********************
234 The steps for PRE are:
236 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
238 2) Perform the data flow analysis for PRE.
240 3) Delete the redundant instructions
242 4) Insert the required copies [if any] that make the partially
243 redundant instructions fully redundant.
245 5) For other reaching expressions, insert an instruction to copy the value
246 to a newly created pseudo that will reach the redundant instruction.
248 The deletion is done first so that when we do insertions we
249 know which pseudo reg to use.
251 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
252 argue it is not. The number of iterations for the algorithm to converge
253 is typically 2-4 so I don't view it as that expensive (relatively speaking).
255 PRE GCSE depends heavily on the second CSE pass to clean up the copies
256 we create. To make an expression reach the place where it's redundant,
257 the result of the expression is copied to a new register, and the redundant
258 expression is deleted by replacing it with this new register. Classic GCSE
259 doesn't have this problem as much as it computes the reaching defs of
260 each register in each block and thus can try to use an existing register.
262 **********************
264 A fair bit of simplicity is created by creating small functions for simple
265 tasks, even when the function is only called in one place. This may
266 measurably slow things down [or may not] by creating more function call
267 overhead than is necessary. The source is laid out so that it's trivial
268 to make the affected functions inline so that one can measure what speed
269 up, if any, can be achieved, and maybe later when things settle things can
272 Help stamp out big monolithic functions! */
274 /* GCSE global vars. */
277 static FILE *gcse_file;
279 /* Note whether or not we should run jump optimization after gcse. We
280 want to do this for two cases.
282 * If we changed any jumps via cprop.
284 * If we added any labels via edge splitting. */
286 static int run_jump_opt_after_gcse;
288 /* Element I is a list of I's predecessors/successors. */
289 static int_list_ptr *s_preds;
290 static int_list_ptr *s_succs;
292 /* Element I is the number of predecessors/successors of basic block I. */
293 static int *num_preds;
294 static int *num_succs;
296 /* Bitmaps are normally not included in debugging dumps.
297 However it's useful to be able to print them from GDB.
298 We could create special functions for this, but it's simpler to
299 just allow passing stderr to the dump_foo fns. Since stderr can
300 be a macro, we store a copy here. */
301 static FILE *debug_stderr;
303 /* An obstack for our working variables. */
304 static struct obstack gcse_obstack;
306 /* Non-zero for each mode that supports (set (reg) (reg)).
307 This is trivially true for integer and floating point values.
308 It may or may not be true for condition codes. */
309 static char can_copy_p[(int) NUM_MACHINE_MODES];
311 /* Non-zero if can_copy_p has been initialized. */
312 static int can_copy_init_p;
318 /* Hash table of expressions. */
322 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
324 /* Index in the available expression bitmaps. */
326 /* Next entry with the same hash. */
327 struct expr *next_same_hash;
328 /* List of anticipatable occurrences in basic blocks in the function.
329 An "anticipatable occurrence" is one that is the first occurrence in the
330 basic block, the operands are not modified in the basic block prior
331 to the occurrence and the output is not used between the start of
332 the block and the occurrence. */
333 struct occr *antic_occr;
334 /* List of available occurrence in basic blocks in the function.
335 An "available occurrence" is one that is the last occurrence in the
336 basic block and the operands are not modified by following statements in
337 the basic block [including this insn]. */
338 struct occr *avail_occr;
339 /* Non-null if the computation is PRE redundant.
340 The value is the newly created pseudo-reg to record a copy of the
341 expression in all the places that reach the redundant copy. */
345 /* Occurrence of an expression.
346 There is one per basic block. If a pattern appears more than once the
347 last appearance is used [or first for anticipatable expressions]. */
351 /* Next occurrence of this expression. */
353 /* The insn that computes the expression. */
355 /* Non-zero if this [anticipatable] occurrence has been deleted. */
357 /* Non-zero if this [available] occurrence has been copied to
359 /* ??? This is mutually exclusive with deleted_p, so they could share
364 /* Expression and copy propagation hash tables.
365 Each hash table is an array of buckets.
366 ??? It is known that if it were an array of entries, structure elements
367 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
368 not clear whether in the final analysis a sufficient amount of memory would
369 be saved as the size of the available expression bitmaps would be larger
370 [one could build a mapping table without holes afterwards though].
371 Someday I'll perform the computation and figure it out.
374 /* Total size of the expression hash table, in elements. */
375 static int expr_hash_table_size;
377 This is an array of `expr_hash_table_size' elements. */
378 static struct expr **expr_hash_table;
380 /* Total size of the copy propagation hash table, in elements. */
381 static int set_hash_table_size;
383 This is an array of `set_hash_table_size' elements. */
384 static struct expr **set_hash_table;
386 /* Mapping of uids to cuids.
387 Only real insns get cuids. */
388 static int *uid_cuid;
390 /* Highest UID in UID_CUID. */
393 /* Get the cuid of an insn. */
394 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
396 /* Number of cuids. */
399 /* Mapping of cuids to insns. */
400 static rtx *cuid_insn;
402 /* Get insn from cuid. */
403 #define CUID_INSN(CUID) (cuid_insn[CUID])
405 /* Maximum register number in function prior to doing gcse + 1.
406 Registers created during this pass have regno >= max_gcse_regno.
407 This is named with "gcse" to not collide with global of same name. */
408 static int max_gcse_regno;
410 /* Maximum number of cse-able expressions found. */
412 /* Maximum number of assignments for copy propagation found. */
415 /* Table of registers that are modified.
416 For each register, each element is a list of places where the pseudo-reg
419 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
420 requires knowledge of which blocks kill which regs [and thus could use
421 a bitmap instead of the lists `reg_set_table' uses].
423 `reg_set_table' and could be turned into an array of bitmaps
425 [however perhaps it may be useful to keep the data as is].
426 One advantage of recording things this way is that `reg_set_table' is
427 fairly sparse with respect to pseudo regs but for hard regs could be
428 fairly dense [relatively speaking].
429 And recording sets of pseudo-regs in lists speeds
430 up functions like compute_transp since in the case of pseudo-regs we only
431 need to iterate over the number of times a pseudo-reg is set, not over the
432 number of basic blocks [clearly there is a bit of a slow down in the cases
433 where a pseudo is set more than once in a block, however it is believed
434 that the net effect is to speed things up]. This isn't done for hard-regs
435 because recording call-clobbered hard-regs in `reg_set_table' at each
436 function call can consume a fair bit of memory, and iterating over hard-regs
437 stored this way in compute_transp will be more expensive. */
439 typedef struct reg_set {
440 /* The next setting of this register. */
441 struct reg_set *next;
442 /* The insn where it was set. */
445 static reg_set **reg_set_table;
446 /* Size of `reg_set_table'.
447 The table starts out at max_gcse_regno + slop, and is enlarged as
449 static int reg_set_table_size;
450 /* Amount to grow `reg_set_table' by when it's full. */
451 #define REG_SET_TABLE_SLOP 100
453 /* Bitmap containing one bit for each register in the program.
454 Used when performing GCSE to track which registers have been set since
455 the start of the basic block. */
456 static sbitmap reg_set_bitmap;
458 /* For each block, a bitmap of registers set in the block.
459 This is used by expr_killed_p and compute_transp.
460 It is computed during hash table computation and not by compute_sets
461 as it includes registers added since the last pass (or between cprop and
462 gcse) and it's currently not easy to realloc sbitmap vectors. */
463 static sbitmap *reg_set_in_block;
465 /* For each block, non-zero if memory is set in that block.
466 This is computed during hash table computation and is used by
467 expr_killed_p and compute_transp.
468 ??? Handling of memory is very simple, we don't make any attempt
469 to optimize things (later).
470 ??? This can be computed by compute_sets since the information
472 static char *mem_set_in_block;
474 /* Various variables for statistics gathering. */
476 /* Memory used in a pass.
477 This isn't intended to be absolutely precise. Its intent is only
478 to keep an eye on memory usage. */
479 static int bytes_used;
480 /* GCSE substitutions made. */
481 static int gcse_subst_count;
482 /* Number of copy instructions created. */
483 static int gcse_create_count;
484 /* Number of constants propagated. */
485 static int const_prop_count;
486 /* Number of copys propagated. */
487 static int copy_prop_count;
489 /* These variables are used by classic GCSE.
490 Normally they'd be defined a bit later, but `rd_gen' needs to
491 be declared sooner. */
493 /* A bitmap of all ones for implementing the algorithm for available
494 expressions and reaching definitions. */
495 /* ??? Available expression bitmaps have a different size than reaching
496 definition bitmaps. This should be the larger of the two, however, it
497 is not currently used for reaching definitions. */
498 static sbitmap u_bitmap;
500 /* Each block has a bitmap of each type.
501 The length of each blocks bitmap is:
503 max_cuid - for reaching definitions
504 n_exprs - for available expressions
506 Thus we view the bitmaps as 2 dimensional arrays. i.e.
507 rd_kill[block_num][cuid_num]
508 ae_kill[block_num][expr_num]
511 /* For reaching defs */
512 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
514 /* for available exprs */
515 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
518 static void compute_can_copy PROTO ((void));
520 static char *gmalloc PROTO ((unsigned int));
521 static char *grealloc PROTO ((char *, unsigned int));
522 static char *gcse_alloc PROTO ((unsigned long));
523 static void alloc_gcse_mem PROTO ((rtx));
524 static void free_gcse_mem PROTO ((void));
525 static void alloc_reg_set_mem PROTO ((int));
526 static void free_reg_set_mem PROTO ((void));
527 static void record_one_set PROTO ((int, rtx));
528 static void record_set_info PROTO ((rtx, rtx));
529 static void compute_sets PROTO ((rtx));
531 static void hash_scan_insn PROTO ((rtx, int, int));
532 static void hash_scan_set PROTO ((rtx, rtx, int));
533 static void hash_scan_clobber PROTO ((rtx, rtx));
534 static void hash_scan_call PROTO ((rtx, rtx));
535 static int want_to_gcse_p PROTO ((rtx));
536 static int oprs_unchanged_p PROTO ((rtx, rtx, int));
537 static int oprs_anticipatable_p PROTO ((rtx, rtx));
538 static int oprs_available_p PROTO ((rtx, rtx));
539 static void insert_expr_in_table PROTO ((rtx, enum machine_mode,
541 static void insert_set_in_table PROTO ((rtx, rtx));
542 static unsigned int hash_expr PROTO ((rtx, enum machine_mode,
544 static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *));
545 static unsigned int hash_set PROTO ((int, int));
546 static int expr_equiv_p PROTO ((rtx, rtx));
547 static void record_last_reg_set_info PROTO ((rtx, int));
548 static void record_last_mem_set_info PROTO ((rtx));
549 static void record_last_set_info PROTO ((rtx, rtx));
550 static void compute_hash_table PROTO ((int));
551 static void alloc_set_hash_table PROTO ((int));
552 static void free_set_hash_table PROTO ((void));
553 static void compute_set_hash_table PROTO ((void));
554 static void alloc_expr_hash_table PROTO ((int));
555 static void free_expr_hash_table PROTO ((void));
556 static void compute_expr_hash_table PROTO ((void));
557 static void dump_hash_table PROTO ((FILE *, const char *, struct expr **,
559 static struct expr *lookup_expr PROTO ((rtx));
560 static struct expr *lookup_set PROTO ((int, rtx));
561 static struct expr *next_set PROTO ((int, struct expr *));
562 static void reset_opr_set_tables PROTO ((void));
563 static int oprs_not_set_p PROTO ((rtx, rtx));
564 static void mark_call PROTO ((rtx));
565 static void mark_set PROTO ((rtx, rtx));
566 static void mark_clobber PROTO ((rtx, rtx));
567 static void mark_oprs_set PROTO ((rtx));
569 static void alloc_cprop_mem PROTO ((int, int));
570 static void free_cprop_mem PROTO ((void));
571 static void compute_transp PROTO ((rtx, int, sbitmap *, int));
572 static void compute_transpout PROTO ((void));
573 static void compute_local_properties PROTO ((sbitmap *, sbitmap *,
575 static void compute_cprop_avinout PROTO ((void));
576 static void compute_cprop_data PROTO ((void));
577 static void find_used_regs PROTO ((rtx));
578 static int try_replace_reg PROTO ((rtx, rtx, rtx));
579 static struct expr *find_avail_set PROTO ((int, rtx));
580 static int cprop_jump PROTO((rtx, rtx, struct reg_use *, rtx));
582 static int cprop_cc0_jump PROTO((rtx, struct reg_use *, rtx));
584 static int cprop_insn PROTO ((rtx, int));
585 static int cprop PROTO ((int));
586 static int one_cprop_pass PROTO ((int, int));
588 static void alloc_pre_mem PROTO ((int, int));
589 static void free_pre_mem PROTO ((void));
590 static void compute_pre_data PROTO ((void));
591 static int pre_expr_reaches_here_p PROTO ((int, struct expr *,
593 static void insert_insn_end_bb PROTO ((struct expr *, int, int));
594 static void pre_insert PROTO ((struct expr **));
595 static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
596 static void pre_insert_copies PROTO ((void));
597 static int pre_delete PROTO ((void));
598 static int pre_gcse PROTO ((void));
599 static int one_pre_gcse_pass PROTO ((int));
601 static void add_label_notes PROTO ((rtx, rtx));
603 static void alloc_code_hoist_mem PROTO ((int, int));
604 static void free_code_hoist_mem PROTO ((void));
605 static void compute_code_hoist_vbeinout PROTO ((void));
606 static void compute_code_hoist_data PROTO ((void));
607 static int hoist_expr_reaches_here_p PROTO ((int, int, int, char *));
608 static void hoist_code PROTO ((void));
609 static int one_code_hoisting_pass PROTO ((void));
611 static void alloc_rd_mem PROTO ((int, int));
612 static void free_rd_mem PROTO ((void));
613 static void handle_rd_kill_set PROTO ((rtx, int, int));
614 static void compute_kill_rd PROTO ((void));
615 static void compute_rd PROTO ((void));
616 static void alloc_avail_expr_mem PROTO ((int, int));
617 static void free_avail_expr_mem PROTO ((void));
618 static void compute_ae_gen PROTO ((void));
619 static int expr_killed_p PROTO ((rtx, int));
620 static void compute_ae_kill PROTO ((void));
621 static void compute_available PROTO ((void));
622 static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
624 static rtx computing_insn PROTO ((struct expr *, rtx));
625 static int def_reaches_here_p PROTO ((rtx, rtx));
626 static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int));
627 static int handle_avail_expr PROTO ((rtx, struct expr *));
628 static int classic_gcse PROTO ((void));
629 static int one_classic_gcse_pass PROTO ((int));
631 static void invalidate_nonnull_info PROTO ((rtx, rtx));
634 /* Entry point for global common subexpression elimination.
635 F is the first instruction in the function. */
643 /* Bytes used at start of pass. */
644 int initial_bytes_used;
645 /* Maximum number of bytes used by a pass. */
647 /* Point to release obstack data from for each pass. */
648 char *gcse_obstack_bottom;
650 /* We do not construct an accurate cfg in functions which call
651 setjmp, so just punt to be safe. */
652 if (current_function_calls_setjmp)
655 /* Assume that we do not need to run jump optimizations after gcse. */
656 run_jump_opt_after_gcse = 0;
658 /* For calling dump_foo fns from gdb. */
659 debug_stderr = stderr;
662 /* Identify the basic block information for this function, including
663 successors and predecessors. */
664 max_gcse_regno = max_reg_num ();
665 find_basic_blocks (f, max_gcse_regno, file, 1);
667 /* Return if there's nothing to do. */
668 if (n_basic_blocks <= 1)
670 /* Free storage allocated by find_basic_blocks. */
671 free_basic_block_vars (0);
675 /* Trying to perform global optimizations on flow graphs which have
676 a high connectivity will take a long time and is unlikely to be
679 In normal circumstances a cfg should have about twice has many edges
680 as blocks. But we do not want to punish small functions which have
681 a couple switch statements. So we require a relatively large number
682 of basic blocks and the ratio of edges to blocks to be high. */
683 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
685 /* Free storage allocated by find_basic_blocks. */
686 free_basic_block_vars (0);
690 /* See what modes support reg/reg copy operations. */
691 if (! can_copy_init_p)
697 gcc_obstack_init (&gcse_obstack);
699 /* Allocate and compute predecessors/successors. */
701 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
702 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
703 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
704 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
705 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
706 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
709 dump_bb_data (file, s_preds, s_succs, 0);
711 /* Record where pseudo-registers are set.
712 This data is kept accurate during each pass.
713 ??? We could also record hard-reg information here
714 [since it's unchanging], however it is currently done during
715 hash table computation.
717 It may be tempting to compute MEM set information here too, but MEM
718 sets will be subject to code motion one day and thus we need to compute
719 information about memory sets when we build the hash tables. */
721 alloc_reg_set_mem (max_gcse_regno);
725 initial_bytes_used = bytes_used;
727 gcse_obstack_bottom = gcse_alloc (1);
729 while (changed && pass < MAX_PASSES)
733 fprintf (file, "GCSE pass %d\n\n", pass + 1);
735 /* Initialize bytes_used to the space for the pred/succ lists,
736 and the reg_set_table data. */
737 bytes_used = initial_bytes_used;
739 /* Each pass may create new registers, so recalculate each time. */
740 max_gcse_regno = max_reg_num ();
744 /* Don't allow constant propagation to modify jumps
746 changed = one_cprop_pass (pass + 1, 0);
749 changed |= one_classic_gcse_pass (pass + 1);
751 changed |= one_pre_gcse_pass (pass + 1);
753 if (max_pass_bytes < bytes_used)
754 max_pass_bytes = bytes_used;
756 /* Free up memory, then reallocate for code hoisting. We can
757 not re-use the existing allocated memory because the tables
758 will not have info for the insns or registers created by
759 partial redundancy elimination. */
762 /* It does not make sense to run code hoisting unless we optimizing
763 for code size -- it rarely makes programs faster, and can make
764 them bigger if we did partial redundancy elimination (when optimizing
765 for space, we use a classic gcse algorithm instead of partial
766 redundancy algorithms). */
769 max_gcse_regno = max_reg_num ();
771 changed |= one_code_hoisting_pass ();
774 if (max_pass_bytes < bytes_used)
775 max_pass_bytes = bytes_used;
780 fprintf (file, "\n");
783 obstack_free (&gcse_obstack, gcse_obstack_bottom);
787 /* Do one last pass of copy propagation, including cprop into
788 conditional jumps. */
790 max_gcse_regno = max_reg_num ();
792 /* This time, go ahead and allow cprop to alter jumps. */
793 one_cprop_pass (pass + 1, 1);
798 fprintf (file, "GCSE of %s: %d basic blocks, ",
799 current_function_name, n_basic_blocks);
800 fprintf (file, "%d pass%s, %d bytes\n\n",
801 pass, pass > 1 ? "es" : "", max_pass_bytes);
804 /* Free our obstack. */
805 obstack_free (&gcse_obstack, NULL_PTR);
806 /* Free reg_set_table. */
808 /* Free storage used to record predecessor/successor data. */
810 /* Free storage allocated by find_basic_blocks. */
811 free_basic_block_vars (0);
812 return run_jump_opt_after_gcse;
815 /* Misc. utilities. */
817 /* Compute which modes support reg/reg copy operations. */
823 #ifndef AVOID_CCMODE_COPIES
826 char *free_point = (char *) oballoc (1);
828 bzero (can_copy_p, NUM_MACHINE_MODES);
831 for (i = 0; i < NUM_MACHINE_MODES; i++)
833 switch (GET_MODE_CLASS (i))
836 #ifdef AVOID_CCMODE_COPIES
839 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
840 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
841 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
852 /* Free the objects we just allocated. */
856 /* Cover function to xmalloc to record bytes allocated. */
863 return xmalloc (size);
866 /* Cover function to xrealloc.
867 We don't record the additional size since we don't know it.
868 It won't affect memory usage stats much anyway. */
875 return xrealloc (ptr, size);
878 /* Cover function to obstack_alloc.
879 We don't need to record the bytes allocated here since
880 obstack_chunk_alloc is set to gmalloc. */
886 return (char *) obstack_alloc (&gcse_obstack, size);
889 /* Allocate memory for the cuid mapping array,
890 and reg/memory set tracking tables.
892 This is called at the start of each pass. */
901 /* Find the largest UID and create a mapping from UIDs to CUIDs.
902 CUIDs are like UIDs except they increase monotonically, have no gaps,
903 and only apply to real insns. */
905 max_uid = get_max_uid ();
906 n = (max_uid + 1) * sizeof (int);
907 uid_cuid = (int *) gmalloc (n);
908 bzero ((char *) uid_cuid, n);
909 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
911 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
912 INSN_CUID (insn) = i++;
914 INSN_CUID (insn) = i;
917 /* Create a table mapping cuids to insns. */
920 n = (max_cuid + 1) * sizeof (rtx);
921 cuid_insn = (rtx *) gmalloc (n);
922 bzero ((char *) cuid_insn, n);
923 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
925 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
927 CUID_INSN (i) = insn;
932 /* Allocate vars to track sets of regs. */
934 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
936 /* Allocate vars to track sets of regs, memory per block. */
938 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
940 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
943 /* Free memory allocated by alloc_gcse_mem. */
951 free (reg_set_bitmap);
953 free (reg_set_in_block);
954 free (mem_set_in_block);
958 /* Compute the local properties of each recorded expression.
959 Local properties are those that are defined by the block, irrespective
962 An expression is transparent in a block if its operands are not modified
965 An expression is computed (locally available) in a block if it is computed
966 at least once and expression would contain the same value if the
967 computation was moved to the end of the block.
969 An expression is locally anticipatable in a block if it is computed at
970 least once and expression would contain the same value if the computation
971 was moved to the beginning of the block.
973 We call this routine for cprop, pre and code hoisting. They all
974 compute basically the same information and thus can easily share
977 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
978 local properties. If NULL, then it is not necessary to compute
979 or record that particular property.
981 SETP controls which hash table to look at. If zero, this routine
982 looks at the expr hash table; if nonzero this routine looks at
983 the set hash table. Additionally, TRANSP is computed as ~TRANSP,
984 since this is really cprop's ABSALTERED. */
987 compute_local_properties (transp, comp, antloc, setp)
993 int i, hash_table_size;
994 struct expr **hash_table;
996 /* Initialize any bitmaps that were passed in. */
1000 sbitmap_vector_zero (transp, n_basic_blocks);
1002 sbitmap_vector_ones (transp, n_basic_blocks);
1005 sbitmap_vector_zero (comp, n_basic_blocks);
1007 sbitmap_vector_zero (antloc, n_basic_blocks);
1009 /* We use the same code for cprop, pre and hoisting. For cprop
1010 we care about the set hash table, for pre and hoisting we
1011 care about the expr hash table. */
1012 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1013 hash_table = setp ? set_hash_table : expr_hash_table;
1015 for (i = 0; i < hash_table_size; i++)
1019 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1022 int indx = expr->bitmap_index;
1024 /* The expression is transparent in this block if it is not killed.
1025 We start by assuming all are transparent [none are killed], and
1026 then reset the bits for those that are. */
1029 compute_transp (expr->expr, indx, transp, setp);
1031 /* The occurrences recorded in antic_occr are exactly those that
1032 we want to set to non-zero in ANTLOC. */
1036 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1038 int bb = BLOCK_NUM (occr->insn);
1039 SET_BIT (antloc[bb], indx);
1041 /* While we're scanning the table, this is a good place to
1043 occr->deleted_p = 0;
1047 /* The occurrences recorded in avail_occr are exactly those that
1048 we want to set to non-zero in COMP. */
1052 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1054 int bb = BLOCK_NUM (occr->insn);
1055 SET_BIT (comp[bb], indx);
1057 /* While we're scanning the table, this is a good place to
1063 /* While we're scanning the table, this is a good place to
1065 expr->reaching_reg = 0;
1071 /* Register set information.
1073 `reg_set_table' records where each register is set or otherwise
1076 static struct obstack reg_set_obstack;
1079 alloc_reg_set_mem (n_regs)
1084 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1085 n = reg_set_table_size * sizeof (struct reg_set *);
1086 reg_set_table = (struct reg_set **) gmalloc (n);
1087 bzero ((char *) reg_set_table, n);
1089 gcc_obstack_init (®_set_obstack);
1095 free (reg_set_table);
1096 obstack_free (®_set_obstack, NULL_PTR);
1099 /* Record REGNO in the reg_set table. */
1102 record_one_set (regno, insn)
1106 /* allocate a new reg_set element and link it onto the list */
1107 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1109 /* If the table isn't big enough, enlarge it. */
1110 if (regno >= reg_set_table_size)
1112 int new_size = regno + REG_SET_TABLE_SLOP;
1113 reg_set_table = (struct reg_set **)
1114 grealloc ((char *) reg_set_table,
1115 new_size * sizeof (struct reg_set *));
1116 bzero ((char *) (reg_set_table + reg_set_table_size),
1117 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1118 reg_set_table_size = new_size;
1121 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack,
1122 sizeof (struct reg_set));
1123 bytes_used += sizeof (struct reg_set);
1124 new_reg_info->insn = insn;
1125 new_reg_info->next = NULL;
1126 if (reg_set_table[regno] == NULL)
1127 reg_set_table[regno] = new_reg_info;
1130 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1131 /* ??? One could keep a "last" pointer to speed this up. */
1132 while (reg_info_ptr1 != NULL)
1134 reg_info_ptr2 = reg_info_ptr1;
1135 reg_info_ptr1 = reg_info_ptr1->next;
1137 reg_info_ptr2->next = new_reg_info;
1141 /* For communication between next two functions (via note_stores). */
1142 static rtx record_set_insn;
1144 /* Called from compute_sets via note_stores to handle one
1145 SET or CLOBBER in an insn. */
1148 record_set_info (dest, setter)
1149 rtx dest, setter ATTRIBUTE_UNUSED;
1151 if (GET_CODE (dest) == SUBREG)
1152 dest = SUBREG_REG (dest);
1154 if (GET_CODE (dest) == REG)
1156 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1157 record_one_set (REGNO (dest), record_set_insn);
1161 /* Scan the function and record each set of each pseudo-register.
1163 This is called once, at the start of the gcse pass.
1164 See the comments for `reg_set_table' for further docs. */
1174 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1176 record_set_insn = insn;
1177 note_stores (PATTERN (insn), record_set_info);
1179 insn = NEXT_INSN (insn);
1183 /* Hash table support. */
1185 #define NEVER_SET -1
1187 /* For each register, the cuid of the first/last insn in the block to set it,
1188 or -1 if not set. */
1189 static int *reg_first_set;
1190 static int *reg_last_set;
1192 /* While computing "first/last set" info, this is the CUID of first/last insn
1193 to set memory or -1 if not set. `mem_last_set' is also used when
1194 performing GCSE to record whether memory has been set since the beginning
1196 Note that handling of memory is very simple, we don't make any attempt
1197 to optimize things (later). */
1198 static int mem_first_set;
1199 static int mem_last_set;
1201 /* Perform a quick check whether X, the source of a set, is something
1202 we want to consider for GCSE. */
1208 enum rtx_code code = GET_CODE (x);
1226 /* Return non-zero if the operands of expression X are unchanged from the
1227 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1228 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1231 oprs_unchanged_p (x, insn, avail_p)
1239 /* repeat is used to turn tail-recursion into iteration. */
1245 code = GET_CODE (x);
1250 return (reg_last_set[REGNO (x)] == NEVER_SET
1251 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1253 return (reg_first_set[REGNO (x)] == NEVER_SET
1254 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1259 if (mem_last_set != NEVER_SET
1260 && mem_last_set >= INSN_CUID (insn))
1265 if (mem_first_set != NEVER_SET
1266 && mem_first_set < INSN_CUID (insn))
1293 i = GET_RTX_LENGTH (code) - 1;
1294 fmt = GET_RTX_FORMAT (code);
1299 rtx tem = XEXP (x, i);
1301 /* If we are about to do the last recursive call
1302 needed at this level, change it into iteration.
1303 This function is called enough to be worth it. */
1309 if (! oprs_unchanged_p (tem, insn, avail_p))
1312 else if (fmt[i] == 'E')
1315 for (j = 0; j < XVECLEN (x, i); j++)
1317 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1326 /* Return non-zero if the operands of expression X are unchanged from
1327 the start of INSN's basic block up to but not including INSN. */
1330 oprs_anticipatable_p (x, insn)
1333 return oprs_unchanged_p (x, insn, 0);
1336 /* Return non-zero if the operands of expression X are unchanged from
1337 INSN to the end of INSN's basic block. */
1340 oprs_available_p (x, insn)
1343 return oprs_unchanged_p (x, insn, 1);
1346 /* Hash expression X.
1347 MODE is only used if X is a CONST_INT.
1348 A boolean indicating if a volatile operand is found or if the expression
1349 contains something we don't want to insert in the table is stored in
1352 ??? One might want to merge this with canon_hash. Later. */
1355 hash_expr (x, mode, do_not_record_p, hash_table_size)
1357 enum machine_mode mode;
1358 int *do_not_record_p;
1359 int hash_table_size;
1363 *do_not_record_p = 0;
1365 hash = hash_expr_1 (x, mode, do_not_record_p);
1366 return hash % hash_table_size;
1369 /* Subroutine of hash_expr to do the actual work. */
1372 hash_expr_1 (x, mode, do_not_record_p)
1374 enum machine_mode mode;
1375 int *do_not_record_p;
1382 /* repeat is used to turn tail-recursion into iteration. */
1388 code = GET_CODE (x);
1393 register int regno = REGNO (x);
1394 hash += ((unsigned) REG << 7) + regno;
1400 unsigned HOST_WIDE_INT tem = INTVAL (x);
1401 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1406 /* This is like the general case, except that it only counts
1407 the integers representing the constant. */
1408 hash += (unsigned) code + (unsigned) GET_MODE (x);
1409 if (GET_MODE (x) != VOIDmode)
1410 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1412 unsigned tem = XWINT (x, i);
1416 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1417 + (unsigned) CONST_DOUBLE_HIGH (x));
1420 /* Assume there is only one rtx object for any given label. */
1422 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1423 differences and differences between each stage's debugging dumps. */
1424 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1429 /* Don't hash on the symbol's address to avoid bootstrap differences.
1430 Different hash values may cause expressions to be recorded in
1431 different orders and thus different registers to be used in the
1432 final assembler. This also avoids differences in the dump files
1433 between various stages. */
1435 unsigned char *p = (unsigned char *) XSTR (x, 0);
1437 h += (h << 7) + *p++; /* ??? revisit */
1438 hash += ((unsigned) SYMBOL_REF << 7) + h;
1443 if (MEM_VOLATILE_P (x))
1445 *do_not_record_p = 1;
1448 hash += (unsigned) MEM;
1459 case UNSPEC_VOLATILE:
1460 *do_not_record_p = 1;
1464 if (MEM_VOLATILE_P (x))
1466 *do_not_record_p = 1;
1474 i = GET_RTX_LENGTH (code) - 1;
1475 hash += (unsigned) code + (unsigned) GET_MODE (x);
1476 fmt = GET_RTX_FORMAT (code);
1481 rtx tem = XEXP (x, i);
1483 /* If we are about to do the last recursive call
1484 needed at this level, change it into iteration.
1485 This function is called enough to be worth it. */
1491 hash += hash_expr_1 (tem, 0, do_not_record_p);
1492 if (*do_not_record_p)
1495 else if (fmt[i] == 'E')
1496 for (j = 0; j < XVECLEN (x, i); j++)
1498 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1499 if (*do_not_record_p)
1502 else if (fmt[i] == 's')
1504 register unsigned char *p = (unsigned char *) XSTR (x, i);
1509 else if (fmt[i] == 'i')
1511 register unsigned tem = XINT (x, i);
1521 /* Hash a set of register REGNO.
1523 Sets are hashed on the register that is set.
1524 This simplifies the PRE copy propagation code.
1526 ??? May need to make things more elaborate. Later, as necessary. */
1529 hash_set (regno, hash_table_size)
1531 int hash_table_size;
1536 return hash % hash_table_size;
1539 /* Return non-zero if exp1 is equivalent to exp2.
1540 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1547 register enum rtx_code code;
1548 register const char *fmt;
1552 if (x == 0 || y == 0)
1555 code = GET_CODE (x);
1556 if (code != GET_CODE (y))
1559 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1560 if (GET_MODE (x) != GET_MODE (y))
1570 return INTVAL (x) == INTVAL (y);
1573 return XEXP (x, 0) == XEXP (y, 0);
1576 return XSTR (x, 0) == XSTR (y, 0);
1579 return REGNO (x) == REGNO (y);
1581 /* For commutative operations, check both orders. */
1589 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1590 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1591 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1592 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1598 /* Compare the elements. If any pair of corresponding elements
1599 fail to match, return 0 for the whole thing. */
1601 fmt = GET_RTX_FORMAT (code);
1602 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1607 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1612 if (XVECLEN (x, i) != XVECLEN (y, i))
1614 for (j = 0; j < XVECLEN (x, i); j++)
1615 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1620 if (strcmp (XSTR (x, i), XSTR (y, i)))
1625 if (XINT (x, i) != XINT (y, i))
1630 if (XWINT (x, i) != XWINT (y, i))
1645 /* Insert expression X in INSN in the hash table.
1646 If it is already present, record it as the last occurrence in INSN's
1649 MODE is the mode of the value X is being stored into.
1650 It is only used if X is a CONST_INT.
1652 ANTIC_P is non-zero if X is an anticipatable expression.
1653 AVAIL_P is non-zero if X is an available expression. */
1656 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1658 enum machine_mode mode;
1660 int antic_p, avail_p;
1662 int found, do_not_record_p;
1664 struct expr *cur_expr, *last_expr = NULL;
1665 struct occr *antic_occr, *avail_occr;
1666 struct occr *last_occr = NULL;
1668 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1670 /* Do not insert expression in table if it contains volatile operands,
1671 or if hash_expr determines the expression is something we don't want
1672 to or can't handle. */
1673 if (do_not_record_p)
1676 cur_expr = expr_hash_table[hash];
1679 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1681 /* If the expression isn't found, save a pointer to the end of
1683 last_expr = cur_expr;
1684 cur_expr = cur_expr->next_same_hash;
1689 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1690 bytes_used += sizeof (struct expr);
1691 if (expr_hash_table[hash] == NULL)
1693 /* This is the first pattern that hashed to this index. */
1694 expr_hash_table[hash] = cur_expr;
1698 /* Add EXPR to end of this hash chain. */
1699 last_expr->next_same_hash = cur_expr;
1701 /* Set the fields of the expr element. */
1703 cur_expr->bitmap_index = n_exprs++;
1704 cur_expr->next_same_hash = NULL;
1705 cur_expr->antic_occr = NULL;
1706 cur_expr->avail_occr = NULL;
1709 /* Now record the occurrence(s). */
1713 antic_occr = cur_expr->antic_occr;
1715 /* Search for another occurrence in the same basic block. */
1716 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1718 /* If an occurrence isn't found, save a pointer to the end of
1720 last_occr = antic_occr;
1721 antic_occr = antic_occr->next;
1726 /* Found another instance of the expression in the same basic block.
1727 Prefer the currently recorded one. We want the first one in the
1728 block and the block is scanned from start to end. */
1729 ; /* nothing to do */
1733 /* First occurrence of this expression in this basic block. */
1734 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1735 bytes_used += sizeof (struct occr);
1736 /* First occurrence of this expression in any block? */
1737 if (cur_expr->antic_occr == NULL)
1738 cur_expr->antic_occr = antic_occr;
1740 last_occr->next = antic_occr;
1741 antic_occr->insn = insn;
1742 antic_occr->next = NULL;
1748 avail_occr = cur_expr->avail_occr;
1750 /* Search for another occurrence in the same basic block. */
1751 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1753 /* If an occurrence isn't found, save a pointer to the end of
1755 last_occr = avail_occr;
1756 avail_occr = avail_occr->next;
1761 /* Found another instance of the expression in the same basic block.
1762 Prefer this occurrence to the currently recorded one. We want
1763 the last one in the block and the block is scanned from start
1765 avail_occr->insn = insn;
1769 /* First occurrence of this expression in this basic block. */
1770 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1771 bytes_used += sizeof (struct occr);
1772 /* First occurrence of this expression in any block? */
1773 if (cur_expr->avail_occr == NULL)
1774 cur_expr->avail_occr = avail_occr;
1776 last_occr->next = avail_occr;
1777 avail_occr->insn = insn;
1778 avail_occr->next = NULL;
1783 /* Insert pattern X in INSN in the hash table.
1784 X is a SET of a reg to either another reg or a constant.
1785 If it is already present, record it as the last occurrence in INSN's
1789 insert_set_in_table (x, insn)
1795 struct expr *cur_expr, *last_expr = NULL;
1796 struct occr *cur_occr, *last_occr = NULL;
1798 if (GET_CODE (x) != SET
1799 || GET_CODE (SET_DEST (x)) != REG)
1802 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1804 cur_expr = set_hash_table[hash];
1807 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1809 /* If the expression isn't found, save a pointer to the end of
1811 last_expr = cur_expr;
1812 cur_expr = cur_expr->next_same_hash;
1817 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1818 bytes_used += sizeof (struct expr);
1819 if (set_hash_table[hash] == NULL)
1821 /* This is the first pattern that hashed to this index. */
1822 set_hash_table[hash] = cur_expr;
1826 /* Add EXPR to end of this hash chain. */
1827 last_expr->next_same_hash = cur_expr;
1829 /* Set the fields of the expr element.
1830 We must copy X because it can be modified when copy propagation is
1831 performed on its operands. */
1832 /* ??? Should this go in a different obstack? */
1833 cur_expr->expr = copy_rtx (x);
1834 cur_expr->bitmap_index = n_sets++;
1835 cur_expr->next_same_hash = NULL;
1836 cur_expr->antic_occr = NULL;
1837 cur_expr->avail_occr = NULL;
1840 /* Now record the occurrence. */
1842 cur_occr = cur_expr->avail_occr;
1844 /* Search for another occurrence in the same basic block. */
1845 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1847 /* If an occurrence isn't found, save a pointer to the end of
1849 last_occr = cur_occr;
1850 cur_occr = cur_occr->next;
1855 /* Found another instance of the expression in the same basic block.
1856 Prefer this occurrence to the currently recorded one. We want
1857 the last one in the block and the block is scanned from start
1859 cur_occr->insn = insn;
1863 /* First occurrence of this expression in this basic block. */
1864 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1865 bytes_used += sizeof (struct occr);
1866 /* First occurrence of this expression in any block? */
1867 if (cur_expr->avail_occr == NULL)
1868 cur_expr->avail_occr = cur_occr;
1870 last_occr->next = cur_occr;
1871 cur_occr->insn = insn;
1872 cur_occr->next = NULL;
1876 /* Scan pattern PAT of INSN and add an entry to the hash table.
1877 If SET_P is non-zero, this is for the assignment hash table,
1878 otherwise it is for the expression hash table. */
1881 hash_scan_set (pat, insn, set_p)
1885 rtx src = SET_SRC (pat);
1886 rtx dest = SET_DEST (pat);
1888 if (GET_CODE (src) == CALL)
1889 hash_scan_call (src, insn);
1891 if (GET_CODE (dest) == REG)
1893 int regno = REGNO (dest);
1896 /* Only record sets of pseudo-regs in the hash table. */
1898 && regno >= FIRST_PSEUDO_REGISTER
1899 /* Don't GCSE something if we can't do a reg/reg copy. */
1900 && can_copy_p [GET_MODE (dest)]
1901 /* Is SET_SRC something we want to gcse? */
1902 && want_to_gcse_p (src))
1904 /* An expression is not anticipatable if its operands are
1905 modified before this insn. */
1906 int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn);
1907 /* An expression is not available if its operands are
1908 subsequently modified, including this insn. */
1909 int avail_p = oprs_available_p (src, insn);
1910 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1912 /* Record sets for constant/copy propagation. */
1914 && regno >= FIRST_PSEUDO_REGISTER
1915 && ((GET_CODE (src) == REG
1916 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1917 && can_copy_p [GET_MODE (dest)])
1918 || GET_CODE (src) == CONST_INT
1919 || GET_CODE (src) == SYMBOL_REF
1920 || GET_CODE (src) == CONST_DOUBLE)
1921 /* A copy is not available if its src or dest is subsequently
1922 modified. Here we want to search from INSN+1 on, but
1923 oprs_available_p searches from INSN on. */
1924 && (insn == BLOCK_END (BLOCK_NUM (insn))
1925 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1926 && oprs_available_p (pat, tmp))))
1927 insert_set_in_table (pat, insn);
1932 hash_scan_clobber (x, insn)
1933 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1935 /* Currently nothing to do. */
1939 hash_scan_call (x, insn)
1940 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1942 /* Currently nothing to do. */
1945 /* Process INSN and add hash table entries as appropriate.
1947 Only available expressions that set a single pseudo-reg are recorded.
1949 Single sets in a PARALLEL could be handled, but it's an extra complication
1950 that isn't dealt with right now. The trick is handling the CLOBBERs that
1951 are also in the PARALLEL. Later.
1953 If SET_P is non-zero, this is for the assignment hash table,
1954 otherwise it is for the expression hash table.
1955 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1956 not record any expressions. */
1959 hash_scan_insn (insn, set_p, in_libcall_block)
1962 int in_libcall_block;
1964 rtx pat = PATTERN (insn);
1966 /* Pick out the sets of INSN and for other forms of instructions record
1967 what's been modified. */
1969 if (GET_CODE (pat) == SET && ! in_libcall_block)
1971 /* Ignore obvious no-ops. */
1972 if (SET_SRC (pat) != SET_DEST (pat))
1973 hash_scan_set (pat, insn, set_p);
1975 else if (GET_CODE (pat) == PARALLEL)
1979 for (i = 0; i < XVECLEN (pat, 0); i++)
1981 rtx x = XVECEXP (pat, 0, i);
1983 if (GET_CODE (x) == SET)
1985 if (GET_CODE (SET_SRC (x)) == CALL)
1986 hash_scan_call (SET_SRC (x), insn);
1988 else if (GET_CODE (x) == CLOBBER)
1989 hash_scan_clobber (x, insn);
1990 else if (GET_CODE (x) == CALL)
1991 hash_scan_call (x, insn);
1994 else if (GET_CODE (pat) == CLOBBER)
1995 hash_scan_clobber (pat, insn);
1996 else if (GET_CODE (pat) == CALL)
1997 hash_scan_call (pat, insn);
2001 dump_hash_table (file, name, table, table_size, total_size)
2004 struct expr **table;
2005 int table_size, total_size;
2008 /* Flattened out table, so it's printed in proper order. */
2009 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
2010 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
2012 bzero ((char *) flat_table, total_size * sizeof (struct expr *));
2013 for (i = 0; i < table_size; i++)
2017 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2019 flat_table[expr->bitmap_index] = expr;
2020 hash_val[expr->bitmap_index] = i;
2024 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2025 name, table_size, total_size);
2027 for (i = 0; i < total_size; i++)
2029 struct expr *expr = flat_table[i];
2031 fprintf (file, "Index %d (hash value %d)\n ",
2032 expr->bitmap_index, hash_val[i]);
2033 print_rtl (file, expr->expr);
2034 fprintf (file, "\n");
2037 fprintf (file, "\n");
2040 /* Record register first/last/block set information for REGNO in INSN.
2041 reg_first_set records the first place in the block where the register
2042 is set and is used to compute "anticipatability".
2043 reg_last_set records the last place in the block where the register
2044 is set and is used to compute "availability".
2045 reg_set_in_block records whether the register is set in the block
2046 and is used to compute "transparency". */
2049 record_last_reg_set_info (insn, regno)
2053 if (reg_first_set[regno] == NEVER_SET)
2054 reg_first_set[regno] = INSN_CUID (insn);
2055 reg_last_set[regno] = INSN_CUID (insn);
2056 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2059 /* Record memory first/last/block set information for INSN. */
2062 record_last_mem_set_info (insn)
2065 if (mem_first_set == NEVER_SET)
2066 mem_first_set = INSN_CUID (insn);
2067 mem_last_set = INSN_CUID (insn);
2068 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2071 /* Used for communicating between next two routines. */
2072 static rtx last_set_insn;
2074 /* Called from compute_hash_table via note_stores to handle one
2075 SET or CLOBBER in an insn. */
2078 record_last_set_info (dest, setter)
2079 rtx dest, setter ATTRIBUTE_UNUSED;
2081 if (GET_CODE (dest) == SUBREG)
2082 dest = SUBREG_REG (dest);
2084 if (GET_CODE (dest) == REG)
2085 record_last_reg_set_info (last_set_insn, REGNO (dest));
2086 else if (GET_CODE (dest) == MEM
2087 /* Ignore pushes, they clobber nothing. */
2088 && ! push_operand (dest, GET_MODE (dest)))
2089 record_last_mem_set_info (last_set_insn);
2092 /* Top level function to create an expression or assignment hash table.
2094 Expression entries are placed in the hash table if
2095 - they are of the form (set (pseudo-reg) src),
2096 - src is something we want to perform GCSE on,
2097 - none of the operands are subsequently modified in the block
2099 Assignment entries are placed in the hash table if
2100 - they are of the form (set (pseudo-reg) src),
2101 - src is something we want to perform const/copy propagation on,
2102 - none of the operands or target are subsequently modified in the block
2103 Currently src must be a pseudo-reg or a const_int.
2105 F is the first insn.
2106 SET_P is non-zero for computing the assignment hash table. */
2109 compute_hash_table (set_p)
2114 /* While we compute the hash table we also compute a bit array of which
2115 registers are set in which blocks.
2116 We also compute which blocks set memory, in the absence of aliasing
2117 support [which is TODO].
2118 ??? This isn't needed during const/copy propagation, but it's cheap to
2120 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2121 bzero ((char *) mem_set_in_block, n_basic_blocks);
2123 /* Some working arrays used to track first and last set in each block. */
2124 /* ??? One could use alloca here, but at some size a threshold is crossed
2125 beyond which one should use malloc. Are we at that threshold here? */
2126 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2127 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2129 for (bb = 0; bb < n_basic_blocks; bb++)
2133 int in_libcall_block;
2136 /* First pass over the instructions records information used to
2137 determine when registers and memory are first and last set.
2138 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2139 could be moved to compute_sets since they currently don't change. */
2141 for (i = 0; i < max_gcse_regno; i++)
2142 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2143 mem_first_set = NEVER_SET;
2144 mem_last_set = NEVER_SET;
2146 for (insn = BLOCK_HEAD (bb);
2147 insn && insn != NEXT_INSN (BLOCK_END (bb));
2148 insn = NEXT_INSN (insn))
2150 #ifdef NON_SAVING_SETJMP
2151 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2152 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2154 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2155 record_last_reg_set_info (insn, regno);
2160 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2163 if (GET_CODE (insn) == CALL_INSN)
2165 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2166 if ((call_used_regs[regno]
2167 && regno != STACK_POINTER_REGNUM
2168 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2169 && regno != HARD_FRAME_POINTER_REGNUM
2171 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2172 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2174 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2175 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2178 && regno != FRAME_POINTER_REGNUM)
2179 || global_regs[regno])
2180 record_last_reg_set_info (insn, regno);
2181 if (! CONST_CALL_P (insn))
2182 record_last_mem_set_info (insn);
2185 last_set_insn = insn;
2186 note_stores (PATTERN (insn), record_last_set_info);
2189 /* The next pass builds the hash table. */
2191 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2192 insn && insn != NEXT_INSN (BLOCK_END (bb));
2193 insn = NEXT_INSN (insn))
2195 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2197 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2198 in_libcall_block = 1;
2199 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2200 in_libcall_block = 0;
2201 hash_scan_insn (insn, set_p, in_libcall_block);
2206 free (reg_first_set);
2207 free (reg_last_set);
2208 /* Catch bugs early. */
2209 reg_first_set = reg_last_set = 0;
2212 /* Allocate space for the set hash table.
2213 N_INSNS is the number of instructions in the function.
2214 It is used to determine the number of buckets to use. */
2217 alloc_set_hash_table (n_insns)
2222 set_hash_table_size = n_insns / 4;
2223 if (set_hash_table_size < 11)
2224 set_hash_table_size = 11;
2225 /* Attempt to maintain efficient use of hash table.
2226 Making it an odd number is simplest for now.
2227 ??? Later take some measurements. */
2228 set_hash_table_size |= 1;
2229 n = set_hash_table_size * sizeof (struct expr *);
2230 set_hash_table = (struct expr **) gmalloc (n);
2233 /* Free things allocated by alloc_set_hash_table. */
2236 free_set_hash_table ()
2238 free (set_hash_table);
2241 /* Compute the hash table for doing copy/const propagation. */
2244 compute_set_hash_table ()
2246 /* Initialize count of number of entries in hash table. */
2248 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2250 compute_hash_table (1);
2253 /* Allocate space for the expression hash table.
2254 N_INSNS is the number of instructions in the function.
2255 It is used to determine the number of buckets to use. */
2258 alloc_expr_hash_table (n_insns)
2263 expr_hash_table_size = n_insns / 2;
2264 /* Make sure the amount is usable. */
2265 if (expr_hash_table_size < 11)
2266 expr_hash_table_size = 11;
2267 /* Attempt to maintain efficient use of hash table.
2268 Making it an odd number is simplest for now.
2269 ??? Later take some measurements. */
2270 expr_hash_table_size |= 1;
2271 n = expr_hash_table_size * sizeof (struct expr *);
2272 expr_hash_table = (struct expr **) gmalloc (n);
2275 /* Free things allocated by alloc_expr_hash_table. */
2278 free_expr_hash_table ()
2280 free (expr_hash_table);
2283 /* Compute the hash table for doing GCSE. */
2286 compute_expr_hash_table ()
2288 /* Initialize count of number of entries in hash table. */
2290 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2292 compute_hash_table (0);
2295 /* Expression tracking support. */
2297 /* Lookup pattern PAT in the expression table.
2298 The result is a pointer to the table entry, or NULL if not found. */
2300 static struct expr *
2304 int do_not_record_p;
2305 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2306 expr_hash_table_size);
2309 if (do_not_record_p)
2312 expr = expr_hash_table[hash];
2314 while (expr && ! expr_equiv_p (expr->expr, pat))
2315 expr = expr->next_same_hash;
2320 /* Lookup REGNO in the set table.
2321 If PAT is non-NULL look for the entry that matches it, otherwise return
2322 the first entry for REGNO.
2323 The result is a pointer to the table entry, or NULL if not found. */
2325 static struct expr *
2326 lookup_set (regno, pat)
2330 unsigned int hash = hash_set (regno, set_hash_table_size);
2333 expr = set_hash_table[hash];
2337 while (expr && ! expr_equiv_p (expr->expr, pat))
2338 expr = expr->next_same_hash;
2342 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2343 expr = expr->next_same_hash;
2349 /* Return the next entry for REGNO in list EXPR. */
2351 static struct expr *
2352 next_set (regno, expr)
2357 expr = expr->next_same_hash;
2358 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2362 /* Reset tables used to keep track of what's still available [since the
2363 start of the block]. */
2366 reset_opr_set_tables ()
2368 /* Maintain a bitmap of which regs have been set since beginning of
2370 sbitmap_zero (reg_set_bitmap);
2371 /* Also keep a record of the last instruction to modify memory.
2372 For now this is very trivial, we only record whether any memory
2373 location has been modified. */
2377 /* Return non-zero if the operands of X are not set before INSN in
2378 INSN's basic block. */
2381 oprs_not_set_p (x, insn)
2388 /* repeat is used to turn tail-recursion into iteration. */
2394 code = GET_CODE (x);
2409 if (mem_last_set != 0)
2415 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2421 fmt = GET_RTX_FORMAT (code);
2422 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2427 /* If we are about to do the last recursive call
2428 needed at this level, change it into iteration.
2429 This function is called enough to be worth it. */
2435 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2439 else if (fmt[i] == 'E')
2442 for (j = 0; j < XVECLEN (x, i); j++)
2444 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2454 /* Mark things set by a CALL. */
2460 mem_last_set = INSN_CUID (insn);
2463 /* Mark things set by a SET. */
2466 mark_set (pat, insn)
2469 rtx dest = SET_DEST (pat);
2471 while (GET_CODE (dest) == SUBREG
2472 || GET_CODE (dest) == ZERO_EXTRACT
2473 || GET_CODE (dest) == SIGN_EXTRACT
2474 || GET_CODE (dest) == STRICT_LOW_PART)
2475 dest = XEXP (dest, 0);
2477 if (GET_CODE (dest) == REG)
2478 SET_BIT (reg_set_bitmap, REGNO (dest));
2479 else if (GET_CODE (dest) == MEM)
2480 mem_last_set = INSN_CUID (insn);
2482 if (GET_CODE (SET_SRC (pat)) == CALL)
2486 /* Record things set by a CLOBBER. */
2489 mark_clobber (pat, insn)
2492 rtx clob = XEXP (pat, 0);
2494 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2495 clob = XEXP (clob, 0);
2497 if (GET_CODE (clob) == REG)
2498 SET_BIT (reg_set_bitmap, REGNO (clob));
2500 mem_last_set = INSN_CUID (insn);
2503 /* Record things set by INSN.
2504 This data is used by oprs_not_set_p. */
2507 mark_oprs_set (insn)
2510 rtx pat = PATTERN (insn);
2512 if (GET_CODE (pat) == SET)
2513 mark_set (pat, insn);
2514 else if (GET_CODE (pat) == PARALLEL)
2518 for (i = 0; i < XVECLEN (pat, 0); i++)
2520 rtx x = XVECEXP (pat, 0, i);
2522 if (GET_CODE (x) == SET)
2524 else if (GET_CODE (x) == CLOBBER)
2525 mark_clobber (x, insn);
2526 else if (GET_CODE (x) == CALL)
2530 else if (GET_CODE (pat) == CLOBBER)
2531 mark_clobber (pat, insn);
2532 else if (GET_CODE (pat) == CALL)
2537 /* Classic GCSE reaching definition support. */
2539 /* Allocate reaching def variables. */
2542 alloc_rd_mem (n_blocks, n_insns)
2543 int n_blocks, n_insns;
2545 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2546 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2548 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2549 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2551 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2552 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2554 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2555 sbitmap_vector_zero (rd_out, n_basic_blocks);
2558 /* Free reaching def variables. */
2565 free (reaching_defs);
2569 /* Add INSN to the kills of BB.
2570 REGNO, set in BB, is killed by INSN. */
2573 handle_rd_kill_set (insn, regno, bb)
2577 struct reg_set *this_reg = reg_set_table[regno];
2581 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2582 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2583 this_reg = this_reg->next;
2587 /* Compute the set of kill's for reaching definitions. */
2595 For each set bit in `gen' of the block (i.e each insn which
2596 generates a definition in the block)
2597 Call the reg set by the insn corresponding to that bit regx
2598 Look at the linked list starting at reg_set_table[regx]
2599 For each setting of regx in the linked list, which is not in
2601 Set the bit in `kill' corresponding to that insn
2604 for (bb = 0; bb < n_basic_blocks; bb++)
2606 for (cuid = 0; cuid < max_cuid; cuid++)
2608 if (TEST_BIT (rd_gen[bb], cuid))
2610 rtx insn = CUID_INSN (cuid);
2611 rtx pat = PATTERN (insn);
2613 if (GET_CODE (insn) == CALL_INSN)
2617 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2619 if ((call_used_regs[regno]
2620 && regno != STACK_POINTER_REGNUM
2621 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2622 && regno != HARD_FRAME_POINTER_REGNUM
2624 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2625 && ! (regno == ARG_POINTER_REGNUM
2626 && fixed_regs[regno])
2628 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2629 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2631 && regno != FRAME_POINTER_REGNUM)
2632 || global_regs[regno])
2633 handle_rd_kill_set (insn, regno, bb);
2637 if (GET_CODE (pat) == PARALLEL)
2641 /* We work backwards because ... */
2642 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2644 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2645 if ((code == SET || code == CLOBBER)
2646 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2647 handle_rd_kill_set (insn,
2648 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2652 else if (GET_CODE (pat) == SET)
2654 if (GET_CODE (SET_DEST (pat)) == REG)
2656 /* Each setting of this register outside of this block
2657 must be marked in the set of kills in this block. */
2658 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2661 /* FIXME: CLOBBER? */
2667 /* Compute the reaching definitions as in
2668 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2669 Chapter 10. It is the same algorithm as used for computing available
2670 expressions but applied to the gens and kills of reaching definitions. */
2675 int bb, changed, passes;
2677 for (bb = 0; bb < n_basic_blocks; bb++)
2678 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2685 for (bb = 0; bb < n_basic_blocks; bb++)
2687 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2688 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2689 reaching_defs[bb], rd_kill[bb]);
2695 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2698 /* Classic GCSE available expression support. */
2700 /* Allocate memory for available expression computation. */
2703 alloc_avail_expr_mem (n_blocks, n_exprs)
2704 int n_blocks, n_exprs;
2706 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2707 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2709 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2710 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2712 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2713 sbitmap_vector_zero (ae_in, n_basic_blocks);
2715 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2716 sbitmap_vector_zero (ae_out, n_basic_blocks);
2718 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2719 sbitmap_ones (u_bitmap);
2723 free_avail_expr_mem ()
2732 /* Compute the set of available expressions generated in each basic block. */
2739 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2740 This is all we have to do because an expression is not recorded if it
2741 is not available, and the only expressions we want to work with are the
2742 ones that are recorded. */
2744 for (i = 0; i < expr_hash_table_size; i++)
2746 struct expr *expr = expr_hash_table[i];
2747 while (expr != NULL)
2749 struct occr *occr = expr->avail_occr;
2750 while (occr != NULL)
2752 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2755 expr = expr->next_same_hash;
2760 /* Return non-zero if expression X is killed in BB. */
2763 expr_killed_p (x, bb)
2771 /* repeat is used to turn tail-recursion into iteration. */
2777 code = GET_CODE (x);
2781 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2784 if (mem_set_in_block[bb])
2804 i = GET_RTX_LENGTH (code) - 1;
2805 fmt = GET_RTX_FORMAT (code);
2810 rtx tem = XEXP (x, i);
2812 /* If we are about to do the last recursive call
2813 needed at this level, change it into iteration.
2814 This function is called enough to be worth it. */
2820 if (expr_killed_p (tem, bb))
2823 else if (fmt[i] == 'E')
2826 for (j = 0; j < XVECLEN (x, i); j++)
2828 if (expr_killed_p (XVECEXP (x, i, j), bb))
2837 /* Compute the set of available expressions killed in each basic block. */
2844 for (bb = 0; bb < n_basic_blocks; bb++)
2846 for (i = 0; i < expr_hash_table_size; i++)
2848 struct expr *expr = expr_hash_table[i];
2850 for ( ; expr != NULL; expr = expr->next_same_hash)
2852 /* Skip EXPR if generated in this block. */
2853 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2856 if (expr_killed_p (expr->expr, bb))
2857 SET_BIT (ae_kill[bb], expr->bitmap_index);
2863 /* Compute available expressions.
2865 Implement the algorithm to find available expressions
2866 as given in the Aho Sethi Ullman book, pages 627-631. */
2869 compute_available ()
2871 int bb, changed, passes;
2873 sbitmap_zero (ae_in[0]);
2875 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2877 for (bb = 1; bb < n_basic_blocks; bb++)
2878 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2885 for (bb = 1; bb < n_basic_blocks; bb++)
2887 sbitmap_intersection_of_preds (ae_in[bb], ae_out, bb);
2888 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2889 ae_in[bb], ae_kill[bb]);
2895 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2898 /* Actually perform the Classic GCSE optimizations. */
2900 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2902 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2903 as a positive reach. We want to do this when there are two computations
2904 of the expression in the block.
2906 VISITED is a pointer to a working buffer for tracking which BB's have
2907 been visited. It is NULL for the top-level call.
2909 We treat reaching expressions that go through blocks containing the same
2910 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2911 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2912 2 as not reaching. The intent is to improve the probability of finding
2913 only one reaching expression and to reduce register lifetimes by picking
2914 the closest such expression. */
2917 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2921 int check_self_loop;
2926 if (visited == NULL)
2928 visited = (char *) alloca (n_basic_blocks);
2929 bzero (visited, n_basic_blocks);
2932 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2934 int pred_bb = pred->src->index;
2936 if (visited[pred_bb])
2938 /* This predecessor has already been visited.
2942 else if (pred_bb == bb)
2944 /* BB loops on itself. */
2946 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2947 && BLOCK_NUM (occr->insn) == pred_bb)
2949 visited[pred_bb] = 1;
2951 /* Ignore this predecessor if it kills the expression. */
2952 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2953 visited[pred_bb] = 1;
2954 /* Does this predecessor generate this expression? */
2955 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2957 /* Is this the occurrence we're looking for?
2958 Note that there's only one generating occurrence per block
2959 so we just need to check the block number. */
2960 if (BLOCK_NUM (occr->insn) == pred_bb)
2962 visited[pred_bb] = 1;
2964 /* Neither gen nor kill. */
2967 visited[pred_bb] = 1;
2968 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2973 /* All paths have been checked. */
2977 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2978 If there is more than one such instruction, return NULL.
2980 Called only by handle_avail_expr. */
2983 computing_insn (expr, insn)
2987 int bb = BLOCK_NUM (insn);
2989 if (expr->avail_occr->next == NULL)
2991 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2993 /* The available expression is actually itself
2994 (i.e. a loop in the flow graph) so do nothing. */
2997 /* (FIXME) Case that we found a pattern that was created by
2998 a substitution that took place. */
2999 return expr->avail_occr->insn;
3003 /* Pattern is computed more than once.
3004 Search backwards from this insn to see how many of these
3005 computations actually reach this insn. */
3007 rtx insn_computes_expr = NULL;
3010 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3012 if (BLOCK_NUM (occr->insn) == bb)
3014 /* The expression is generated in this block.
3015 The only time we care about this is when the expression
3016 is generated later in the block [and thus there's a loop].
3017 We let the normal cse pass handle the other cases. */
3018 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3020 if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
3025 insn_computes_expr = occr->insn;
3029 else /* Computation of the pattern outside this block. */
3031 if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
3036 insn_computes_expr = occr->insn;
3041 if (insn_computes_expr == NULL)
3043 return insn_computes_expr;
3047 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3048 Only called by can_disregard_other_sets. */
3051 def_reaches_here_p (insn, def_insn)
3056 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3059 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3061 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3063 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3065 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3066 reg = XEXP (PATTERN (def_insn), 0);
3067 else if (GET_CODE (PATTERN (def_insn)) == SET)
3068 reg = SET_DEST (PATTERN (def_insn));
3071 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3080 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3081 The value returned is the number of definitions that reach INSN.
3082 Returning a value of zero means that [maybe] more than one definition
3083 reaches INSN and the caller can't perform whatever optimization it is
3084 trying. i.e. it is always safe to return zero. */
3087 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3088 struct reg_set **addr_this_reg;
3092 int number_of_reaching_defs = 0;
3093 struct reg_set *this_reg = *addr_this_reg;
3097 if (def_reaches_here_p (insn, this_reg->insn))
3099 number_of_reaching_defs++;
3100 /* Ignore parallels for now. */
3101 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3104 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3105 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3106 SET_SRC (PATTERN (insn)))))
3108 /* A setting of the reg to a different value reaches INSN. */
3111 if (number_of_reaching_defs > 1)
3113 /* If in this setting the value the register is being
3114 set to is equal to the previous value the register
3115 was set to and this setting reaches the insn we are
3116 trying to do the substitution on then we are ok. */
3118 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3120 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3121 SET_SRC (PATTERN (insn))))
3124 *addr_this_reg = this_reg;
3127 /* prev_this_reg = this_reg; */
3128 this_reg = this_reg->next;
3131 return number_of_reaching_defs;
3134 /* Expression computed by insn is available and the substitution is legal,
3135 so try to perform the substitution.
3137 The result is non-zero if any changes were made. */
3140 handle_avail_expr (insn, expr)
3144 rtx pat, insn_computes_expr;
3146 struct reg_set *this_reg;
3147 int found_setting, use_src;
3150 /* We only handle the case where one computation of the expression
3151 reaches this instruction. */
3152 insn_computes_expr = computing_insn (expr, insn);
3153 if (insn_computes_expr == NULL)
3159 /* At this point we know only one computation of EXPR outside of this
3160 block reaches this insn. Now try to find a register that the
3161 expression is computed into. */
3163 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3165 /* This is the case when the available expression that reaches
3166 here has already been handled as an available expression. */
3167 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3168 /* If the register was created by GCSE we can't use `reg_set_table',
3169 however we know it's set only once. */
3170 if (regnum_for_replacing >= max_gcse_regno
3171 /* If the register the expression is computed into is set only once,
3172 or only one set reaches this insn, we can use it. */
3173 || (((this_reg = reg_set_table[regnum_for_replacing]),
3174 this_reg->next == NULL)
3175 || can_disregard_other_sets (&this_reg, insn, 0)))
3184 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3185 /* This shouldn't happen. */
3186 if (regnum_for_replacing >= max_gcse_regno)
3188 this_reg = reg_set_table[regnum_for_replacing];
3189 /* If the register the expression is computed into is set only once,
3190 or only one set reaches this insn, use it. */
3191 if (this_reg->next == NULL
3192 || can_disregard_other_sets (&this_reg, insn, 0))
3198 pat = PATTERN (insn);
3200 to = SET_SRC (PATTERN (insn_computes_expr));
3202 to = SET_DEST (PATTERN (insn_computes_expr));
3203 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3205 /* We should be able to ignore the return code from validate_change but
3206 to play it safe we check. */
3210 if (gcse_file != NULL)
3212 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3213 INSN_UID (insn), REGNO (to),
3214 use_src ? "from" : "set in",
3215 INSN_UID (insn_computes_expr));
3220 /* The register that the expr is computed into is set more than once. */
3221 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3223 /* Insert an insn after insnx that copies the reg set in insnx
3224 into a new pseudo register call this new register REGN.
3225 From insnb until end of basic block or until REGB is set
3226 replace all uses of REGB with REGN. */
3229 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3231 /* Generate the new insn. */
3232 /* ??? If the change fails, we return 0, even though we created
3233 an insn. I think this is ok. */
3235 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3236 SET_DEST (PATTERN (insn_computes_expr))),
3237 insn_computes_expr);
3238 /* Keep block number table up to date. */
3239 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3240 /* Keep register set table up to date. */
3241 record_one_set (REGNO (to), new_insn);
3243 gcse_create_count++;
3244 if (gcse_file != NULL)
3246 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3247 INSN_UID (NEXT_INSN (insn_computes_expr)),
3248 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3249 INSN_UID (insn_computes_expr));
3250 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
3253 pat = PATTERN (insn);
3255 /* Do register replacement for INSN. */
3256 changed = validate_change (insn, &SET_SRC (pat),
3257 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3260 /* We should be able to ignore the return code from validate_change but
3261 to play it safe we check. */
3265 if (gcse_file != NULL)
3267 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3269 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3270 INSN_UID (insn_computes_expr));
3279 /* Perform classic GCSE.
3280 This is called by one_classic_gcse_pass after all the dataflow analysis
3283 The result is non-zero if a change was made. */
3291 /* Note we start at block 1. */
3294 for (bb = 1; bb < n_basic_blocks; bb++)
3296 /* Reset tables used to keep track of what's still valid [since the
3297 start of the block]. */
3298 reset_opr_set_tables ();
3300 for (insn = BLOCK_HEAD (bb);
3301 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3302 insn = NEXT_INSN (insn))
3304 /* Is insn of form (set (pseudo-reg) ...)? */
3306 if (GET_CODE (insn) == INSN
3307 && GET_CODE (PATTERN (insn)) == SET
3308 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3309 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3311 rtx pat = PATTERN (insn);
3312 rtx src = SET_SRC (pat);
3315 if (want_to_gcse_p (src)
3316 /* Is the expression recorded? */
3317 && ((expr = lookup_expr (src)) != NULL)
3318 /* Is the expression available [at the start of the
3320 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3321 /* Are the operands unchanged since the start of the
3323 && oprs_not_set_p (src, insn))
3324 changed |= handle_avail_expr (insn, expr);
3327 /* Keep track of everything modified by this insn. */
3328 /* ??? Need to be careful w.r.t. mods done to INSN. */
3329 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3330 mark_oprs_set (insn);
3337 /* Top level routine to perform one classic GCSE pass.
3339 Return non-zero if a change was made. */
3342 one_classic_gcse_pass (pass)
3347 gcse_subst_count = 0;
3348 gcse_create_count = 0;
3350 alloc_expr_hash_table (max_cuid);
3351 alloc_rd_mem (n_basic_blocks, max_cuid);
3352 compute_expr_hash_table ();
3354 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3355 expr_hash_table_size, n_exprs);
3360 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3363 compute_available ();
3364 changed = classic_gcse ();
3365 free_avail_expr_mem ();
3368 free_expr_hash_table ();
3372 fprintf (gcse_file, "\n");
3373 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3374 current_function_name, pass,
3375 bytes_used, gcse_subst_count, gcse_create_count);
3381 /* Compute copy/constant propagation working variables. */
3383 /* Local properties of assignments. */
3385 static sbitmap *cprop_pavloc;
3386 static sbitmap *cprop_absaltered;
3388 /* Global properties of assignments (computed from the local properties). */
3390 static sbitmap *cprop_avin;
3391 static sbitmap *cprop_avout;
3393 /* Allocate vars used for copy/const propagation.
3394 N_BLOCKS is the number of basic blocks.
3395 N_SETS is the number of sets. */
3398 alloc_cprop_mem (n_blocks, n_sets)
3399 int n_blocks, n_sets;
3401 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3402 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3404 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3405 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3408 /* Free vars used by copy/const propagation. */
3413 free (cprop_pavloc);
3414 free (cprop_absaltered);
3419 /* For each block, compute whether X is transparent.
3420 X is either an expression or an assignment [though we don't care which,
3421 for this context an assignment is treated as an expression].
3422 For each block where an element of X is modified, set (SET_P == 1) or reset
3423 (SET_P == 0) the INDX bit in BMAP. */
3426 compute_transp (x, indx, bmap, set_p)
3436 /* repeat is used to turn tail-recursion into iteration. */
3442 code = GET_CODE (x);
3448 int regno = REGNO (x);
3452 if (regno < FIRST_PSEUDO_REGISTER)
3454 for (bb = 0; bb < n_basic_blocks; bb++)
3455 if (TEST_BIT (reg_set_in_block[bb], regno))
3456 SET_BIT (bmap[bb], indx);
3460 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3462 bb = BLOCK_NUM (r->insn);
3463 SET_BIT (bmap[bb], indx);
3469 if (regno < FIRST_PSEUDO_REGISTER)
3471 for (bb = 0; bb < n_basic_blocks; bb++)
3472 if (TEST_BIT (reg_set_in_block[bb], regno))
3473 RESET_BIT (bmap[bb], indx);
3477 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3479 bb = BLOCK_NUM (r->insn);
3480 RESET_BIT (bmap[bb], indx);
3490 for (bb = 0; bb < n_basic_blocks; bb++)
3491 if (mem_set_in_block[bb])
3492 SET_BIT (bmap[bb], indx);
3496 for (bb = 0; bb < n_basic_blocks; bb++)
3497 if (mem_set_in_block[bb])
3498 RESET_BIT (bmap[bb], indx);
3518 i = GET_RTX_LENGTH (code) - 1;
3519 fmt = GET_RTX_FORMAT (code);
3524 rtx tem = XEXP (x, i);
3526 /* If we are about to do the last recursive call
3527 needed at this level, change it into iteration.
3528 This function is called enough to be worth it. */
3534 compute_transp (tem, indx, bmap, set_p);
3536 else if (fmt[i] == 'E')
3539 for (j = 0; j < XVECLEN (x, i); j++)
3540 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3545 /* Compute the available expressions at the start and end of each
3546 basic block for cprop. This particular dataflow equation is
3547 used often enough that we might want to generalize it and make
3548 as a subroutine for other global optimizations that need available
3549 in/out information. */
3551 compute_cprop_avinout ()
3553 int bb, changed, passes;
3555 sbitmap_zero (cprop_avin[0]);
3556 sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3563 for (bb = 0; bb < n_basic_blocks; bb++)
3566 sbitmap_intersection_of_preds (cprop_avin[bb], cprop_avout, bb);
3567 changed |= sbitmap_union_of_diff (cprop_avout[bb],
3570 cprop_absaltered[bb]);
3576 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3579 /* Top level routine to do the dataflow analysis needed by copy/const
3583 compute_cprop_data ()
3585 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3586 compute_cprop_avinout ();
3589 /* Copy/constant propagation. */
3591 /* Maximum number of register uses in an insn that we handle. */
3594 /* Table of uses found in an insn.
3595 Allocated statically to avoid alloc/free complexity and overhead. */
3596 static struct reg_use reg_use_table[MAX_USES];
3598 /* Index into `reg_use_table' while building it. */
3599 static int reg_use_count;
3601 /* Set up a list of register numbers used in INSN.
3602 The found uses are stored in `reg_use_table'.
3603 `reg_use_count' is initialized to zero before entry, and
3604 contains the number of uses in the table upon exit.
3606 ??? If a register appears multiple times we will record it multiple
3607 times. This doesn't hurt anything but it will slow things down. */
3617 /* repeat is used to turn tail-recursion into iteration. */
3623 code = GET_CODE (x);
3627 if (reg_use_count == MAX_USES)
3629 reg_use_table[reg_use_count].reg_rtx = x;
3647 case ASM_INPUT: /*FIXME*/
3651 if (GET_CODE (SET_DEST (x)) == MEM)
3652 find_used_regs (SET_DEST (x));
3660 /* Recursively scan the operands of this expression. */
3662 fmt = GET_RTX_FORMAT (code);
3663 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3667 /* If we are about to do the last recursive call
3668 needed at this level, change it into iteration.
3669 This function is called enough to be worth it. */
3675 find_used_regs (XEXP (x, i));
3677 else if (fmt[i] == 'E')
3680 for (j = 0; j < XVECLEN (x, i); j++)
3681 find_used_regs (XVECEXP (x, i, j));
3686 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3687 Returns non-zero is successful. */
3690 try_replace_reg (from, to, insn)
3693 /* If this fails we could try to simplify the result of the
3694 replacement and attempt to recognize the simplified insn.
3696 But we need a general simplify_rtx that doesn't have pass
3697 specific state variables. I'm not aware of one at the moment. */
3698 return validate_replace_src (from, to, insn);
3701 /* Find a set of REGNO that is available on entry to INSN's block.
3702 Returns NULL if not found. */
3704 static struct expr *
3705 find_avail_set (regno, insn)
3709 /* SET1 contains the last set found that can be returned to the caller for
3710 use in a substitution. */
3711 struct expr *set1 = 0;
3713 /* Loops are not possible here. To get a loop we would need two sets
3714 available at the start of the block containing INSN. ie we would
3715 need two sets like this available at the start of the block:
3717 (set (reg X) (reg Y))
3718 (set (reg Y) (reg X))
3720 This can not happen since the set of (reg Y) would have killed the
3721 set of (reg X) making it unavailable at the start of this block. */
3725 struct expr *set = lookup_set (regno, NULL_RTX);
3727 /* Find a set that is available at the start of the block
3728 which contains INSN. */
3731 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3733 set = next_set (regno, set);
3736 /* If no available set was found we've reached the end of the
3737 (possibly empty) copy chain. */
3741 if (GET_CODE (set->expr) != SET)
3744 src = SET_SRC (set->expr);
3746 /* We know the set is available.
3747 Now check that SRC is ANTLOC (i.e. none of the source operands
3748 have changed since the start of the block).
3750 If the source operand changed, we may still use it for the next
3751 iteration of this loop, but we may not use it for substitutions. */
3752 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3755 /* If the source of the set is anything except a register, then
3756 we have reached the end of the copy chain. */
3757 if (GET_CODE (src) != REG)
3760 /* Follow the copy chain, ie start another iteration of the loop
3761 and see if we have an available copy into SRC. */
3762 regno = REGNO (src);
3765 /* SET1 holds the last set that was available and anticipatable at
3770 /* Subroutine of cprop_insn that tries to propagate constants into
3771 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3772 that we can use for substitutions.
3773 REG_USED is the use we will try to replace, SRC is the constant we
3774 will try to substitute for it.
3775 Returns nonzero if a change was made. */
3777 cprop_jump (insn, copy, reg_used, src)
3779 struct reg_use *reg_used;
3782 rtx set = PATTERN (copy);
3785 /* Replace the register with the appropriate constant. */
3786 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3788 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3789 GET_MODE (SET_SRC (set)),
3790 GET_MODE (XEXP (SET_SRC (set), 0)),
3791 XEXP (SET_SRC (set), 0),
3792 XEXP (SET_SRC (set), 1),
3793 XEXP (SET_SRC (set), 2));
3795 /* If no simplification can be made, then try the next
3800 SET_SRC (set) = temp;
3802 /* That may have changed the structure of TEMP, so
3803 force it to be rerecognized if it has not turned
3804 into a nop or unconditional jump. */
3806 INSN_CODE (copy) = -1;
3807 if ((SET_DEST (set) == pc_rtx
3808 && (SET_SRC (set) == pc_rtx
3809 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3810 || recog (PATTERN (copy), copy, NULL) >= 0)
3812 /* This has either become an unconditional jump
3813 or a nop-jump. We'd like to delete nop jumps
3814 here, but doing so confuses gcse. So we just
3815 make the replacement and let later passes
3817 PATTERN (insn) = set;
3818 INSN_CODE (insn) = -1;
3820 /* One less use of the label this insn used to jump to
3821 if we turned this into a NOP jump. */
3822 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3823 --LABEL_NUSES (JUMP_LABEL (insn));
3825 /* If this has turned into an unconditional jump,
3826 then put a barrier after it so that the unreachable
3827 code will be deleted. */
3828 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3829 emit_barrier_after (insn);
3831 run_jump_opt_after_gcse = 1;
3834 if (gcse_file != NULL)
3836 int regno = REGNO (reg_used->reg_rtx);
3837 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3838 regno, INSN_UID (insn));
3839 print_rtl (gcse_file, src);
3840 fprintf (gcse_file, "\n");
3848 /* Subroutine of cprop_insn that tries to propagate constants into
3849 JUMP_INSNS for machines that have CC0. INSN is a single set that
3850 stores into CC0; the insn following it is a conditional jump.
3851 REG_USED is the use we will try to replace, SRC is the constant we
3852 will try to substitute for it.
3853 Returns nonzero if a change was made. */
3855 cprop_cc0_jump (insn, reg_used, src)
3857 struct reg_use *reg_used;
3860 rtx jump = NEXT_INSN (insn);
3861 rtx copy = copy_rtx (jump);
3862 rtx set = PATTERN (copy);
3864 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3865 substitute into it. */
3866 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3867 if (! cprop_jump (jump, copy, reg_used, src))
3870 /* If we succeeded, delete the cc0 setter. */
3871 PUT_CODE (insn, NOTE);
3872 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3873 NOTE_SOURCE_FILE (insn) = 0;
3878 /* Perform constant and copy propagation on INSN.
3879 The result is non-zero if a change was made. */
3882 cprop_insn (insn, alter_jumps)
3886 struct reg_use *reg_used;
3889 /* Only propagate into SETs. Note that a conditional jump is a
3890 SET with pc_rtx as the destination. */
3891 if ((GET_CODE (insn) != INSN
3892 && GET_CODE (insn) != JUMP_INSN)
3893 || GET_CODE (PATTERN (insn)) != SET)
3897 find_used_regs (PATTERN (insn));
3899 reg_used = ®_use_table[0];
3900 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3904 int regno = REGNO (reg_used->reg_rtx);
3906 /* Ignore registers created by GCSE.
3907 We do this because ... */
3908 if (regno >= max_gcse_regno)
3911 /* If the register has already been set in this block, there's
3912 nothing we can do. */
3913 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3916 /* Find an assignment that sets reg_used and is available
3917 at the start of the block. */
3918 set = find_avail_set (regno, insn);
3923 /* ??? We might be able to handle PARALLELs. Later. */
3924 if (GET_CODE (pat) != SET)
3926 src = SET_SRC (pat);
3928 /* Constant propagation. */
3929 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3930 || GET_CODE (src) == SYMBOL_REF)
3932 /* Handle normal insns first. */
3933 if (GET_CODE (insn) == INSN
3934 && try_replace_reg (reg_used->reg_rtx, src, insn))
3938 if (gcse_file != NULL)
3940 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3941 regno, INSN_UID (insn));
3942 print_rtl (gcse_file, src);
3943 fprintf (gcse_file, "\n");
3946 /* The original insn setting reg_used may or may not now be
3947 deletable. We leave the deletion to flow. */
3950 /* Try to propagate a CONST_INT into a conditional jump.
3951 We're pretty specific about what we will handle in this
3952 code, we can extend this as necessary over time.
3954 Right now the insn in question must look like
3955 (set (pc) (if_then_else ...)) */
3956 else if (alter_jumps
3957 && GET_CODE (insn) == JUMP_INSN
3958 && condjump_p (insn)
3959 && ! simplejump_p (insn))
3960 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3962 /* Similar code for machines that use a pair of CC0 setter and
3963 conditional jump insn. */
3964 else if (alter_jumps
3965 && GET_CODE (PATTERN (insn)) == SET
3966 && SET_DEST (PATTERN (insn)) == cc0_rtx
3967 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3968 && condjump_p (NEXT_INSN (insn))
3969 && ! simplejump_p (NEXT_INSN (insn)))
3970 changed |= cprop_cc0_jump (insn, reg_used, src);
3973 else if (GET_CODE (src) == REG
3974 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3975 && REGNO (src) != regno)
3977 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3981 if (gcse_file != NULL)
3983 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3984 regno, INSN_UID (insn), REGNO (src));
3987 /* The original insn setting reg_used may or may not now be
3988 deletable. We leave the deletion to flow. */
3989 /* FIXME: If it turns out that the insn isn't deletable,
3990 then we may have unnecessarily extended register lifetimes
3991 and made things worse. */
3999 /* Forward propagate copies.
4000 This includes copies and constants.
4001 Return non-zero if a change was made. */
4010 /* Note we start at block 1. */
4013 for (bb = 1; bb < n_basic_blocks; bb++)
4015 /* Reset tables used to keep track of what's still valid [since the
4016 start of the block]. */
4017 reset_opr_set_tables ();
4019 for (insn = BLOCK_HEAD (bb);
4020 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4021 insn = NEXT_INSN (insn))
4023 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4025 changed |= cprop_insn (insn, alter_jumps);
4027 /* Keep track of everything modified by this insn. */
4028 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4029 call mark_oprs_set if we turned the insn into a NOTE. */
4030 if (GET_CODE (insn) != NOTE)
4031 mark_oprs_set (insn);
4036 if (gcse_file != NULL)
4037 fprintf (gcse_file, "\n");
4042 /* Perform one copy/constant propagation pass.
4043 F is the first insn in the function.
4044 PASS is the pass count. */
4047 one_cprop_pass (pass, alter_jumps)
4053 const_prop_count = 0;
4054 copy_prop_count = 0;
4056 alloc_set_hash_table (max_cuid);
4057 compute_set_hash_table ();
4059 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4063 alloc_cprop_mem (n_basic_blocks, n_sets);
4064 compute_cprop_data ();
4065 changed = cprop (alter_jumps);
4068 free_set_hash_table ();
4072 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4073 current_function_name, pass,
4074 bytes_used, const_prop_count, copy_prop_count);
4075 fprintf (gcse_file, "\n");
4081 /* Compute PRE+LCM working variables. */
4083 /* Local properties of expressions. */
4084 /* Nonzero for expressions that are transparent in the block. */
4085 static sbitmap *transp;
4087 /* Nonzero for expressions that are transparent at the end of the block.
4088 This is only zero for expressions killed by abnormal critical edge
4089 created by a calls. */
4090 static sbitmap *transpout;
4092 /* Nonzero for expressions that are computed (available) in the block. */
4093 static sbitmap *comp;
4095 /* Nonzero for expressions that are locally anticipatable in the block. */
4096 static sbitmap *antloc;
4098 /* Nonzero for expressions where this block is an optimal computation
4100 static sbitmap *pre_optimal;
4102 /* Nonzero for expressions which are redundant in a particular block. */
4103 static sbitmap *pre_redundant;
4105 static sbitmap *temp_bitmap;
4107 /* Redundant insns. */
4108 static sbitmap pre_redundant_insns;
4110 /* Allocate vars used for PRE analysis. */
4113 alloc_pre_mem (n_blocks, n_exprs)
4114 int n_blocks, n_exprs;
4116 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4117 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4118 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4120 temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
4121 pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs);
4122 pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs);
4123 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
4126 /* Free vars used for PRE analysis. */
4137 free (pre_redundant);
4141 /* Top level routine to do the dataflow analysis needed by PRE. */
4146 compute_local_properties (transp, comp, antloc, 0);
4147 compute_transpout ();
4148 pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp,
4149 antloc, pre_redundant, pre_optimal);
4155 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4158 VISITED is a pointer to a working buffer for tracking which BB's have
4159 been visited. It is NULL for the top-level call.
4161 CHECK_PRE_COMP controls whether or not we check for a computation of
4164 We treat reaching expressions that go through blocks containing the same
4165 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4166 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4167 2 as not reaching. The intent is to improve the probability of finding
4168 only one reaching expression and to reduce register lifetimes by picking
4169 the closest such expression. */
4172 pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp, visited)
4181 if (visited == NULL)
4183 visited = (char *) alloca (n_basic_blocks);
4184 bzero (visited, n_basic_blocks);
4187 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4189 int pred_bb = pred->src->index;
4191 if (pred->src == ENTRY_BLOCK_PTR
4192 /* Has predecessor has already been visited? */
4193 || visited[pred_bb])
4195 /* Nothing to do. */
4197 /* Does this predecessor generate this expression? */
4198 else if ((!check_pre_comp && occr_bb == pred_bb)
4199 || TEST_BIT (comp[pred_bb], expr->bitmap_index))
4201 /* Is this the occurrence we're looking for?
4202 Note that there's only one generating occurrence per block
4203 so we just need to check the block number. */
4204 if (occr_bb == pred_bb)
4206 visited[pred_bb] = 1;
4208 /* Ignore this predecessor if it kills the expression. */
4209 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4210 visited[pred_bb] = 1;
4211 /* Neither gen nor kill. */
4214 visited[pred_bb] = 1;
4215 if (pre_expr_reaches_here_p (occr_bb, expr, pred_bb,
4216 check_pre_comp, visited))
4221 /* All paths have been checked. */
4225 /* Add EXPR to the end of basic block BB.
4227 This is used by both the PRE and code hoisting.
4229 For PRE, we want to verify that the expr is either transparent
4230 or locally anticipatable in the target block. This check makes
4231 no sense for code hoisting. */
4234 insert_insn_end_bb (expr, bb, pre)
4239 rtx insn = BLOCK_END (bb);
4241 rtx reg = expr->reaching_reg;
4242 int regno = REGNO (reg);
4243 rtx pat, copied_expr;
4247 copied_expr = copy_rtx (expr->expr);
4248 emit_move_insn (reg, copied_expr);
4249 first_new_insn = get_insns ();
4250 pat = gen_sequence ();
4253 /* If the last insn is a jump, insert EXPR in front [taking care to
4254 handle cc0, etc. properly]. */
4256 if (GET_CODE (insn) == JUMP_INSN)
4262 /* If this is a jump table, then we can't insert stuff here. Since
4263 we know the previous real insn must be the tablejump, we insert
4264 the new instruction just before the tablejump. */
4265 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4266 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4267 insn = prev_real_insn (insn);
4270 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4271 if cc0 isn't set. */
4272 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4274 insn = XEXP (note, 0);
4277 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4278 if (maybe_cc0_setter
4279 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4280 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4281 insn = maybe_cc0_setter;
4284 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4285 new_insn = emit_insn_before (pat, insn);
4286 if (BLOCK_HEAD (bb) == insn)
4287 BLOCK_HEAD (bb) = new_insn;
4289 /* Likewise if the last insn is a call, as will happen in the presence
4290 of exception handling. */
4291 else if (GET_CODE (insn) == CALL_INSN)
4293 HARD_REG_SET parm_regs;
4297 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4298 we search backward and place the instructions before the first
4299 parameter is loaded. Do this for everyone for consistency and a
4300 presumtion that we'll get better code elsewhere as well. */
4302 /* It should always be the case that we can put these instructions
4303 anywhere in the basic block with performing PRE optimizations.
4306 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4307 && !TEST_BIT (transp[bb], expr->bitmap_index))
4310 /* Since different machines initialize their parameter registers
4311 in different orders, assume nothing. Collect the set of all
4312 parameter registers. */
4313 CLEAR_HARD_REG_SET (parm_regs);
4315 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4316 if (GET_CODE (XEXP (p, 0)) == USE
4317 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4319 int regno = REGNO (XEXP (XEXP (p, 0), 0));
4320 if (regno >= FIRST_PSEUDO_REGISTER)
4322 SET_HARD_REG_BIT (parm_regs, regno);
4326 /* Search backward for the first set of a register in this set. */
4327 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4329 insn = PREV_INSN (insn);
4330 p = single_set (insn);
4331 if (p && GET_CODE (SET_DEST (p)) == REG
4332 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4333 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4335 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4340 /* If we found all the parameter loads, then we want to insert
4341 before the first parameter load.
4343 If we did not find all the parameter loads, then we might have
4344 stopped on the head of the block, which could be a CODE_LABEL.
4345 If we inserted before the CODE_LABEL, then we would be putting
4346 the insn in the wrong basic block. In that case, put the insn
4347 after the CODE_LABEL.
4349 ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here? */
4350 if (GET_CODE (insn) != CODE_LABEL)
4352 new_insn = emit_insn_before (pat, insn);
4353 if (BLOCK_HEAD (bb) == insn)
4354 BLOCK_HEAD (bb) = new_insn;
4358 new_insn = emit_insn_after (pat, insn);
4363 new_insn = emit_insn_after (pat, insn);
4364 BLOCK_END (bb) = new_insn;
4367 /* Keep block number table up to date.
4368 Note, PAT could be a multiple insn sequence, we have to make
4369 sure that each insn in the sequence is handled. */
4370 if (GET_CODE (pat) == SEQUENCE)
4374 for (i = 0; i < XVECLEN (pat, 0); i++)
4376 rtx insn = XVECEXP (pat, 0, i);
4377 set_block_num (insn, bb);
4378 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4379 add_label_notes (PATTERN (insn), new_insn);
4380 record_set_insn = insn;
4381 note_stores (PATTERN (insn), record_set_info);
4386 add_label_notes (SET_SRC (pat), new_insn);
4387 set_block_num (new_insn, bb);
4388 /* Keep register set table up to date. */
4389 record_one_set (regno, new_insn);
4392 gcse_create_count++;
4396 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
4397 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4401 /* Insert partially redundant expressions at the ends of appropriate basic
4402 blocks making them fully redundant. */
4405 pre_insert (index_map)
4406 struct expr **index_map;
4408 int bb, i, set_size;
4411 /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT.
4412 Where INSERT is nonzero, we add the expression at the end of the basic
4413 block if it reaches any of the deleted expressions. */
4415 set_size = pre_optimal[0]->size;
4416 inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
4417 sbitmap_vector_zero (inserted, n_basic_blocks);
4419 for (bb = 0; bb < n_basic_blocks; bb++)
4423 /* This computes the number of potential insertions we need. */
4424 sbitmap_not (temp_bitmap[bb], pre_redundant[bb]);
4425 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]);
4427 /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need
4428 to insert at the end of this basic block. */
4429 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4431 SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i];
4434 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4436 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4438 struct expr *expr = index_map[j];
4441 /* Now look at each deleted occurence of this expression. */
4442 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4444 if (! occr->deleted_p)
4447 /* Insert this expression at the end of BB if it would
4448 reach the deleted occurence. */
4449 if (!TEST_BIT (inserted[bb], j)
4450 && pre_expr_reaches_here_p (bb, expr,
4451 BLOCK_NUM (occr->insn), 0,
4454 SET_BIT (inserted[bb], j);
4455 insert_insn_end_bb (index_map[j], bb, 1);
4463 sbitmap_vector_free (inserted);
4466 /* Copy the result of INSN to REG.
4467 INDX is the expression number. */
4470 pre_insert_copy_insn (expr, insn)
4474 rtx reg = expr->reaching_reg;
4475 int regno = REGNO (reg);
4476 int indx = expr->bitmap_index;
4477 rtx set = single_set (insn);
4482 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4484 /* Keep block number table up to date. */
4485 set_block_num (new_insn, BLOCK_NUM (insn));
4486 /* Keep register set table up to date. */
4487 record_one_set (regno, new_insn);
4489 gcse_create_count++;
4493 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4494 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4498 /* Copy available expressions that reach the redundant expression
4499 to `reaching_reg'. */
4502 pre_insert_copies ()
4506 for (bb = 0; bb < n_basic_blocks; bb++)
4508 sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]);
4511 /* For each available expression in the table, copy the result to
4512 `reaching_reg' if the expression reaches a deleted one.
4514 ??? The current algorithm is rather brute force.
4515 Need to do some profiling. */
4517 for (i = 0; i < expr_hash_table_size; i++)
4521 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4525 /* If the basic block isn't reachable, PPOUT will be TRUE.
4526 However, we don't want to insert a copy here because the
4527 expression may not really be redundant. So only insert
4528 an insn if the expression was deleted.
4529 This test also avoids further processing if the expression
4530 wasn't deleted anywhere. */
4531 if (expr->reaching_reg == NULL)
4534 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4538 if (! occr->deleted_p)
4541 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4543 rtx insn = avail->insn;
4544 int bb = BLOCK_NUM (insn);
4546 if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index))
4549 /* No need to handle this one if handled already. */
4550 if (avail->copied_p)
4552 /* Don't handle this one if it's a redundant one. */
4553 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4555 /* Or if the expression doesn't reach the deleted one. */
4556 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4557 BLOCK_NUM (occr->insn),
4561 /* Copy the result of avail to reaching_reg. */
4562 pre_insert_copy_insn (expr, insn);
4563 avail->copied_p = 1;
4570 /* Delete redundant computations.
4571 Deletion is done by changing the insn to copy the `reaching_reg' of
4572 the expression into the result of the SET. It is left to later passes
4573 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4575 Returns non-zero if a change is made. */
4582 /* Compute the expressions which are redundant and need to be replaced by
4583 copies from the reaching reg to the target reg. */
4584 for (bb = 0; bb < n_basic_blocks; bb++)
4586 sbitmap_not (temp_bitmap[bb], pre_optimal[bb]);
4587 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]);
4591 for (i = 0; i < expr_hash_table_size; i++)
4595 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4598 int indx = expr->bitmap_index;
4600 /* We only need to search antic_occr since we require
4603 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4605 rtx insn = occr->insn;
4607 int bb = BLOCK_NUM (insn);
4609 if (TEST_BIT (temp_bitmap[bb], indx))
4611 set = single_set (insn);
4615 /* Create a pseudo-reg to store the result of reaching
4616 expressions into. Get the mode for the new pseudo
4617 from the mode of the original destination pseudo. */
4618 if (expr->reaching_reg == NULL)
4620 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4622 /* In theory this should never fail since we're creating
4625 However, on the x86 some of the movXX patterns actually
4626 contain clobbers of scratch regs. This may cause the
4627 insn created by validate_change to not match any pattern
4628 and thus cause validate_change to fail. */
4629 if (validate_change (insn, &SET_SRC (set),
4630 expr->reaching_reg, 0))
4632 occr->deleted_p = 1;
4633 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4641 "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4642 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4652 /* Perform GCSE optimizations using PRE.
4653 This is called by one_pre_gcse_pass after all the dataflow analysis
4656 This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4657 and lazy code motion from Knoop, Ruthing and Steffen as described in
4658 Advanced Compiler Design and Implementation.
4660 ??? A new pseudo reg is created to hold the reaching expression.
4661 The nice thing about the classical approach is that it would try to
4662 use an existing reg. If the register can't be adequately optimized
4663 [i.e. we introduce reload problems], one could add a pass here to
4664 propagate the new register through the block.
4666 ??? We don't handle single sets in PARALLELs because we're [currently]
4667 not able to copy the rest of the parallel when we insert copies to create
4668 full redundancies from partial redundancies. However, there's no reason
4669 why we can't handle PARALLELs in the cases where there are no partial
4677 struct expr **index_map;
4679 /* Compute a mapping from expression number (`bitmap_index') to
4680 hash table entry. */
4682 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4683 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4684 for (i = 0; i < expr_hash_table_size; i++)
4688 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4689 index_map[expr->bitmap_index] = expr;
4692 /* Reset bitmap used to track which insns are redundant. */
4693 pre_redundant_insns = sbitmap_alloc (max_cuid);
4694 sbitmap_zero (pre_redundant_insns);
4696 /* Delete the redundant insns first so that
4697 - we know what register to use for the new insns and for the other
4698 ones with reaching expressions
4699 - we know which insns are redundant when we go to create copies */
4700 changed = pre_delete ();
4702 /* Insert insns in places that make partially redundant expressions
4704 pre_insert (index_map);
4706 /* In other places with reaching expressions, copy the expression to the
4707 specially allocated pseudo-reg that reaches the redundant expression. */
4708 pre_insert_copies ();
4710 free (pre_redundant_insns);
4715 /* Top level routine to perform one PRE GCSE pass.
4717 Return non-zero if a change was made. */
4720 one_pre_gcse_pass (pass)
4725 gcse_subst_count = 0;
4726 gcse_create_count = 0;
4728 alloc_expr_hash_table (max_cuid);
4729 compute_expr_hash_table ();
4731 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4732 expr_hash_table_size, n_exprs);
4735 alloc_pre_mem (n_basic_blocks, n_exprs);
4736 compute_pre_data ();
4737 changed |= pre_gcse ();
4740 free_expr_hash_table ();
4744 fprintf (gcse_file, "\n");
4745 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4746 current_function_name, pass,
4747 bytes_used, gcse_subst_count, gcse_create_count);
4753 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4754 We have to add REG_LABEL notes, because the following loop optimization
4755 pass requires them. */
4757 /* ??? This is very similar to the loop.c add_label_notes function. We
4758 could probably share code here. */
4760 /* ??? If there was a jump optimization pass after gcse and before loop,
4761 then we would not need to do this here, because jump would add the
4762 necessary REG_LABEL notes. */
4765 add_label_notes (x, insn)
4769 enum rtx_code code = GET_CODE (x);
4773 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4775 /* This code used to ignore labels that referred to dispatch tables to
4776 avoid flow generating (slighly) worse code.
4778 We no longer ignore such label references (see LABEL_REF handling in
4779 mark_jump_label for additional information). */
4780 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4785 fmt = GET_RTX_FORMAT (code);
4786 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4789 add_label_notes (XEXP (x, i), insn);
4790 else if (fmt[i] == 'E')
4791 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4792 add_label_notes (XVECEXP (x, i, j), insn);
4796 /* Compute transparent outgoing information for each block.
4798 An expression is transparent to an edge unless it is killed by
4799 the edge itself. This can only happen with abnormal control flow,
4800 when the edge is traversed through a call. This happens with
4801 non-local labels and exceptions.
4803 This would not be necessary if we split the edge. While this is
4804 normally impossible for abnormal critical edges, with some effort
4805 it should be possible with exception handling, since we still have
4806 control over which handler should be invoked. But due to increased
4807 EH table sizes, this may not be worthwhile. */
4810 compute_transpout ()
4814 sbitmap_vector_ones (transpout, n_basic_blocks);
4816 for (bb = 0; bb < n_basic_blocks; ++bb)
4820 /* Note that flow inserted a nop a the end of basic blocks that
4821 end in call instructions for reasons other than abnormal
4823 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4826 for (i = 0; i < expr_hash_table_size; i++)
4829 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4830 if (GET_CODE (expr->expr) == MEM)
4832 rtx addr = XEXP (expr->expr, 0);
4834 if (GET_CODE (addr) == SYMBOL_REF
4835 && CONSTANT_POOL_ADDRESS_P (addr))
4838 /* ??? Optimally, we would use interprocedural alias
4839 analysis to determine if this mem is actually killed
4841 RESET_BIT (transpout[bb], expr->bitmap_index);
4847 /* Removal of useless null pointer checks */
4849 /* These need to be file static for communication between
4850 invalidate_nonnull_info and delete_null_pointer_checks. */
4851 static int current_block;
4852 static sbitmap *nonnull_local;
4853 static sbitmap *nonnull_killed;
4855 /* Called via note_stores. X is set by SETTER. If X is a register we must
4856 invalidate nonnull_local and set nonnull_killed.
4858 We ignore hard registers. */
4860 invalidate_nonnull_info (x, setter)
4862 rtx setter ATTRIBUTE_UNUSED;
4867 while (GET_CODE (x) == SUBREG)
4870 /* Ignore anything that is not a register or is a hard register. */
4871 if (GET_CODE (x) != REG
4872 || REGNO (x) < FIRST_PSEUDO_REGISTER)
4877 RESET_BIT (nonnull_local[current_block], regno);
4878 SET_BIT (nonnull_killed[current_block], regno);
4882 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
4885 This is conceptually similar to global constant/copy propagation and
4886 classic global CSE (it even uses the same dataflow equations as cprop).
4888 If a register is used as memory address with the form (mem (reg)), then we
4889 know that REG can not be zero at that point in the program. Any instruction
4890 which sets REG "kills" this property.
4892 So, if every path leading to a conditional branch has an available memory
4893 reference of that form, then we know the register can not have the value
4894 zero at the conditional branch.
4896 So we merely need to compute the local properies and propagate that data
4897 around the cfg, then optimize where possible.
4899 We run this pass two times. Once before CSE, then again after CSE. This
4900 has proven to be the most profitable approach. It is rare for new
4901 optimization opportunities of this nature to appear after the first CSE
4904 This could probably be integrated with global cprop with a little work. */
4907 delete_null_pointer_checks (f)
4910 int_list_ptr *s_preds, *s_succs;
4911 int *num_preds, *num_succs;
4913 sbitmap *nonnull_avin, *nonnull_avout;
4915 /* First break the program into basic blocks. */
4916 find_basic_blocks (f, max_reg_num (), NULL, 1);
4918 /* If we have only a single block, then there's nothing to do. */
4919 if (n_basic_blocks <= 1)
4921 /* Free storage allocated by find_basic_blocks. */
4922 free_basic_block_vars (0);
4926 /* Trying to perform global optimizations on flow graphs which have
4927 a high connectivity will take a long time and is unlikely to be
4928 particularly useful.
4930 In normal circumstances a cfg should have about twice has many edges
4931 as blocks. But we do not want to punish small functions which have
4932 a couple switch statements. So we require a relatively large number
4933 of basic blocks and the ratio of edges to blocks to be high. */
4934 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
4936 /* Free storage allocated by find_basic_blocks. */
4937 free_basic_block_vars (0);
4941 /* We need predecessor/successor lists as well as pred/succ counts for
4942 each basic block. */
4943 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4944 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
4945 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
4946 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
4947 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
4949 /* Allocate bitmaps to hold local and global properties. */
4950 nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4951 nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4952 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4953 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
4955 /* Compute local properties, nonnull and killed. A register will have
4956 the nonnull property if at the end of the current block its value is
4957 known to be nonnull. The killed property indicates that somewhere in
4958 the block any information we had about the register is killed.
4960 Note that a register can have both properties in a single block. That
4961 indicates that it's killed, then later in the block a new value is
4963 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4964 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4965 for (current_block = 0; current_block < n_basic_blocks; current_block++)
4967 rtx insn, stop_insn;
4969 /* Scan each insn in the basic block looking for memory references and
4971 stop_insn = NEXT_INSN (BLOCK_END (current_block));
4972 for (insn = BLOCK_HEAD (current_block);
4974 insn = NEXT_INSN (insn))
4978 /* Ignore anything that is not a normal insn. */
4979 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4982 /* Basically ignore anything that is not a simple SET. We do have
4983 to make sure to invalidate nonnull_local and set nonnull_killed
4984 for such insns though. */
4985 set = single_set (insn);
4988 note_stores (PATTERN (insn), invalidate_nonnull_info);
4992 /* See if we've got a useable memory load. We handle it first
4993 in case it uses its address register as a dest (which kills
4994 the nonnull property). */
4995 if (GET_CODE (SET_SRC (set)) == MEM
4996 && GET_CODE (XEXP (SET_SRC (set), 0)) == REG
4997 && REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER)
4998 SET_BIT (nonnull_local[current_block],
4999 REGNO (XEXP (SET_SRC (set), 0)));
5001 /* Now invalidate stuff clobbered by this insn. */
5002 note_stores (PATTERN (insn), invalidate_nonnull_info);
5004 /* And handle stores, we do these last since any sets in INSN can
5005 not kill the nonnull property if it is derived from a MEM
5006 appearing in a SET_DEST. */
5007 if (GET_CODE (SET_DEST (set)) == MEM
5008 && GET_CODE (XEXP (SET_DEST (set), 0)) == REG)
5009 SET_BIT (nonnull_local[current_block],
5010 REGNO (XEXP (SET_DEST (set), 0)));
5014 /* Now compute global properties based on the local properties. This
5015 is a classic global availablity algorithm. */
5016 sbitmap_zero (nonnull_avin[0]);
5017 sbitmap_vector_ones (nonnull_avout, n_basic_blocks);
5023 for (bb = 0; bb < n_basic_blocks; bb++)
5026 sbitmap_intersect_of_predecessors (nonnull_avin[bb],
5027 nonnull_avout, bb, s_preds);
5029 changed |= sbitmap_union_of_diff (nonnull_avout[bb],
5032 nonnull_killed[bb]);
5036 /* Now look at each bb and see if it ends with a compare of a value
5038 for (bb = 0; bb < n_basic_blocks; bb++)
5040 rtx last_insn = BLOCK_END (bb);
5041 rtx condition, earliest, reg;
5042 int compare_and_branch;
5044 /* We only want conditional branches. */
5045 if (GET_CODE (last_insn) != JUMP_INSN
5046 || !condjump_p (last_insn)
5047 || simplejump_p (last_insn))
5050 /* LAST_INSN is a conditional jump. Get its condition. */
5051 condition = get_condition (last_insn, &earliest);
5053 /* If we were unable to get the condition, or it is not a equality
5054 comparison against zero then there's nothing we can do. */
5056 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5057 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5058 || XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0))))
5061 /* We must be checking a register against zero. */
5062 reg = XEXP (condition, 0);
5063 if (GET_CODE (reg) != REG)
5066 /* Is the register known to have a nonzero value? */
5067 if (!TEST_BIT (nonnull_avout[bb], REGNO (reg)))
5070 /* Try to compute whether the compare/branch at the loop end is one or
5071 two instructions. */
5072 if (earliest == last_insn)
5073 compare_and_branch = 1;
5074 else if (earliest == prev_nonnote_insn (last_insn))
5075 compare_and_branch = 2;
5079 /* We know the register in this comparison is nonnull at exit from
5080 this block. We can optimize this comparison. */
5081 if (GET_CODE (condition) == NE)
5085 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5087 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5088 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5089 emit_barrier_after (new_jump);
5091 delete_insn (last_insn);
5092 if (compare_and_branch == 2)
5093 delete_insn (earliest);
5096 /* Free storage allocated by find_basic_blocks. */
5097 free_basic_block_vars (0);
5100 free (nonnull_local);
5101 free (nonnull_killed);
5102 free (nonnull_avin);
5103 free (nonnull_avout);
5106 /* Code Hoisting variables and subroutines. */
5108 /* Very busy expressions. */
5109 static sbitmap *hoist_vbein;
5110 static sbitmap *hoist_vbeout;
5112 /* Hoistable expressions. */
5113 static sbitmap *hoist_exprs;
5115 /* Dominator bitmaps. */
5116 static sbitmap *dominators;
5117 static sbitmap *post_dominators;
5119 /* ??? We could compute post dominators and run this algorithm in
5120 reverse to to perform tail merging, doing so would probably be
5121 more effective than the tail merging code in jump.c.
5123 It's unclear if tail merging could be run in parallel with
5124 code hoisting. It would be nice. */
5126 /* Allocate vars used for code hoisting analysis. */
5129 alloc_code_hoist_mem (n_blocks, n_exprs)
5130 int n_blocks, n_exprs;
5132 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5133 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5134 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5136 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5137 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5138 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5139 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5141 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5142 post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5145 /* Free vars used for code hoisting analysis. */
5148 free_code_hoist_mem ()
5155 free (hoist_vbeout);
5160 free (post_dominators);
5163 /* Compute the very busy expressions at entry/exit from each block.
5165 An expression is very busy if all paths from a given point
5166 compute the expression. */
5169 compute_code_hoist_vbeinout ()
5171 int bb, changed, passes;
5173 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5174 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5181 /* We scan the blocks in the reverse order to speed up
5183 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5185 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5186 hoist_vbeout[bb], transp[bb]);
5187 if (bb != n_basic_blocks - 1)
5188 sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
5195 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5198 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5201 compute_code_hoist_data ()
5203 compute_local_properties (transp, comp, antloc, 0);
5204 compute_transpout ();
5205 compute_code_hoist_vbeinout ();
5206 compute_flow_dominators (dominators, post_dominators);
5208 fprintf (gcse_file, "\n");
5211 /* Determine if the expression identified by EXPR_INDEX would
5212 reach BB unimpared if it was placed at the end of EXPR_BB.
5214 It's unclear exactly what Muchnick meant by "unimpared". It seems
5215 to me that the expression must either be computed or transparent in
5216 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5217 would allow the expression to be hoisted out of loops, even if
5218 the expression wasn't a loop invariant.
5220 Contrast this to reachability for PRE where an expression is
5221 considered reachable if *any* path reaches instead of *all*
5225 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5233 if (visited == NULL)
5235 visited = (char *) alloca (n_basic_blocks);
5236 bzero (visited, n_basic_blocks);
5239 visited[expr_bb] = 1;
5240 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5242 int pred_bb = pred->src->index;
5244 if (pred->src == ENTRY_BLOCK_PTR)
5246 else if (visited[pred_bb])
5248 /* Does this predecessor generate this expression? */
5249 else if (TEST_BIT (comp[pred_bb], expr_index))
5251 else if (! TEST_BIT (transp[pred_bb], expr_index))
5256 visited[pred_bb] = 1;
5257 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5263 return (pred == NULL);
5266 /* Actually perform code hoisting. */
5270 int bb, dominated, i;
5271 struct expr **index_map;
5273 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5275 /* Compute a mapping from expression number (`bitmap_index') to
5276 hash table entry. */
5278 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
5279 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
5280 for (i = 0; i < expr_hash_table_size; i++)
5284 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5285 index_map[expr->bitmap_index] = expr;
5288 /* Walk over each basic block looking for potentially hoistable
5289 expressions, nothing gets hoisted from the entry block. */
5290 for (bb = 0; bb < n_basic_blocks; bb++)
5293 int insn_inserted_p;
5295 /* Examine each expression that is very busy at the exit of this
5296 block. These are the potentially hoistable expressions. */
5297 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5300 if (TEST_BIT (hoist_vbeout[bb], i)
5301 && TEST_BIT (transpout[bb], i))
5303 /* We've found a potentially hoistable expression, now
5304 we look at every block BB dominates to see if it
5305 computes the expression. */
5306 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5308 /* Ignore self dominance. */
5310 || ! TEST_BIT (dominators[dominated], bb))
5313 /* We've found a dominated block, now see if it computes
5314 the busy expression and whether or not moving that
5315 expression to the "beginning" of that block is safe. */
5316 if (!TEST_BIT (antloc[dominated], i))
5319 /* Note if the expression would reach the dominated block
5320 unimpared if it was placed at the end of BB.
5322 Keep track of how many times this expression is hoistable
5323 from a dominated block into BB. */
5324 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5328 /* If we found more than one hoistable occurence of this
5329 expression, then note it in the bitmap of expressions to
5330 hoist. It makes no sense to hoist things which are computed
5331 in only one BB, and doing so tends to pessimize register
5332 allocation. One could increase this value to try harder
5333 to avoid any possible code expansion due to register
5334 allocation issues; however experiments have shown that
5335 the vast majority of hoistable expressions are only movable
5336 from two successors, so raising this threshhold is likely
5337 to nullify any benefit we get from code hoisting. */
5340 SET_BIT (hoist_exprs[bb], i);
5346 /* If we found nothing to hoist, then quit now. */
5350 /* Loop over all the hoistable expressions. */
5351 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5353 /* We want to insert the expression into BB only once, so
5354 note when we've inserted it. */
5355 insn_inserted_p = 0;
5357 /* These tests should be the same as the tests above. */
5358 if (TEST_BIT (hoist_vbeout[bb], i))
5360 /* We've found a potentially hoistable expression, now
5361 we look at every block BB dominates to see if it
5362 computes the expression. */
5363 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5365 /* Ignore self dominance. */
5367 || ! TEST_BIT (dominators[dominated], bb))
5370 /* We've found a dominated block, now see if it computes
5371 the busy expression and whether or not moving that
5372 expression to the "beginning" of that block is safe. */
5373 if (!TEST_BIT (antloc[dominated], i))
5376 /* The expression is computed in the dominated block and
5377 it would be safe to compute it at the start of the
5378 dominated block. Now we have to determine if the
5379 expresion would reach the dominated block if it was
5380 placed at the end of BB. */
5381 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5383 struct expr *expr = index_map[i];
5384 struct occr *occr = expr->antic_occr;
5389 /* Find the right occurence of this expression. */
5390 while (BLOCK_NUM (occr->insn) != dominated && occr)
5393 /* Should never happen. */
5399 set = single_set (insn);
5403 /* Create a pseudo-reg to store the result of reaching
5404 expressions into. Get the mode for the new pseudo
5405 from the mode of the original destination pseudo. */
5406 if (expr->reaching_reg == NULL)
5408 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5410 /* In theory this should never fail since we're creating
5413 However, on the x86 some of the movXX patterns actually
5414 contain clobbers of scratch regs. This may cause the
5415 insn created by validate_change to not match any
5416 pattern and thus cause validate_change to fail. */
5417 if (validate_change (insn, &SET_SRC (set),
5418 expr->reaching_reg, 0))
5420 occr->deleted_p = 1;
5421 if (!insn_inserted_p)
5423 insert_insn_end_bb (index_map[i], bb, 0);
5424 insn_inserted_p = 1;
5434 /* Top level routine to perform one code hoisting (aka unification) pass
5436 Return non-zero if a change was made. */
5439 one_code_hoisting_pass ()
5443 alloc_expr_hash_table (max_cuid);
5444 compute_expr_hash_table ();
5446 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5447 expr_hash_table_size, n_exprs);
5450 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5451 compute_code_hoist_data ();
5453 free_code_hoist_mem ();
5455 free_expr_hash_table ();