1 /* Global common subexpression elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - memory aliasing support
28 - ability to realloc sbitmap vectors would allow one initial computation
29 of reg_set_in_block with only subsequent additions, rather than
30 recomputing it for each pass
33 - the classic gcse implementation is kept in for now for comparison
36 /* References searched while implementing this.
37 This list will eventually be deleted but I wanted to have a record of it
40 Compilers Principles, Techniques and Tools
44 Global Optimization by Suppression of Partial Redundancies
46 communications of the acm, Vol. 22, Num. 2, Feb. 1979
48 A Portable Machine-Independent Global Optimizer - Design and Measurements
50 Stanford Ph.D. thesis, Dec. 1983
53 Elimination Algorithms for Data Flow Analysis
54 B.G. Ryder, M.C. Paull
55 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
58 A Fast Algorithm for Code Movement Optimization
60 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
62 A Solution to a Problem with Morel and Renvoise's
63 Global Optimization by Suppression of Partial Redundancies
64 K-H Drechsler, M.P. Stadel
65 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
67 Practical Adaptation of the Global Optimization
68 Algorithm of Morel and Renvoise
70 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
72 Efficiently Computing Static Single Assignment Form and the Control
74 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
75 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
78 How to Analyze Large Programs Efficiently and Informatively
79 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
80 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
83 J. Knoop, O. Ruthing, B. Steffen
84 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
86 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
87 Time for Reducible Flow Control
89 ACM Letters on Programming Languages and Systems,
90 Vol. 2, Num. 1-4, Mar-Dec 1993
92 An Efficient Representation for Sparse Sets
93 Preston Briggs, Linda Torczon
94 ACM Letters on Programming Languages and Systems,
95 Vol. 2, Num. 1-4, Mar-Dec 1993
97 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
98 K-H Drechsler, M.P. Stadel
99 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
101 Partial Dead Code Elimination
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
105 Effective Partial Redundancy Elimination
106 P. Briggs, K.D. Cooper
107 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
109 The Program Structure Tree: Computing Control Regions in Linear Time
110 R. Johnson, D. Pearson, K. Pingali
111 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
113 Optimal Code Motion: Theory and Practice
114 J. Knoop, O. Ruthing, B. Steffen
115 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
117 The power of assignment motion
118 J. Knoop, O. Ruthing, B. Steffen
119 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
121 Global code motion / global value numbering
123 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
125 Value Driven Redundancy Elimination
127 Rice University Ph.D. thesis, Apr. 1996
131 Massively Scalar Compiler Project, Rice University, Sep. 1996
133 High Performance Compilers for Parallel Computing
137 People wishing to speed up the code here should read xxx, yyy.
138 People wishing to do something different can find various possibilities
139 in the above papers and elsewhere.
143 /* Must precede rtl.h for FFS. */
148 #include "hard-reg-set.h"
151 #include "insn-config.h"
153 #include "basic-block.h"
157 #define obstack_chunk_alloc gmalloc
158 #define obstack_chunk_free free
160 /* Maximum number of passes to perform. */
163 /* Propagate flow information through back edges and thus enable PRE's
164 moving loop invariant calculations out of loops.
166 Originally this tended to create worse overall code, but several
167 improvements during the development of PRE seem to have made following
168 back edges generally a win.
170 Note much of the loop invariant code motion done here would normally
171 be done by loop.c, which has more heuristics for when to move invariants
172 out of loops. At some point we might need to move some of those
173 heuristics into gcse.c. */
174 #define FOLLOW_BACK_EDGES 1
176 /* We support two GCSE implementations: Classic GCSE (i.e. Dragon Book)
177 and PRE (Partial Redundancy Elimination) GCSE (based on Fred Chow's thesis).
180 In either case we perform the following steps:
182 1) Compute basic block information.
184 2) Compute table of places where registers are set.
186 3) Perform copy/constant propagation.
188 4) Perform global cse.
190 5) Perform another pass of copy/constant propagation [only if PRE].
192 Two passes of copy/constant propagation are done because the first one
193 enables more GCSE and the second one helps to clean up the copies that
194 GCSE creates. This is needed more for PRE than for Classic because Classic
195 GCSE will try to use an existing register containing the common
196 subexpression rather than create a new one. This is harder to do for PRE
197 because of the code motion (which Classic GCSE doesn't do).
199 Expressions we are interested in GCSE-ing are of the form
200 (set (pseudo-reg) (expression)).
201 Function want_to_gcse_p says what these are.
203 PRE handles moving invariant expressions out of loops (by treating them as
204 partially redundant). This feature of PRE is disabled here (by not
205 propagating dataflow information along back edges) because loop.c has more
206 involved (and thus typically better) heuristics for what to move out of
209 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
210 assignment) based GVN (global value numbering). L. T. Simpson's paper
211 (Rice University) on value numbering is a useful reference for this.
213 **********************
215 We used to support multiple passes but there are diminishing returns in
216 doing so. The first pass usually makes 90% of the changes that are doable.
217 A second pass can make a few more changes made possible by the first pass.
218 Experiments show any further passes don't make enough changes to justify
221 A study of spec92 using an unlimited number of passes:
222 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
223 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
224 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
226 It was found doing copy propagation between each pass enables further
229 PRE is quite expensive in complicated functions because the DFA can take
230 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
231 be modified if one wants to experiment.
233 **********************
235 The steps for PRE are:
237 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
239 2) Perform the data flow analysis for PRE.
241 3) Delete the redundant instructions
243 4) Insert the required copies [if any] that make the partially
244 redundant instructions fully redundant.
246 5) For other reaching expressions, insert an instruction to copy the value
247 to a newly created pseudo that will reach the redundant instruction.
249 The deletion is done first so that when we do insertions we
250 know which pseudo reg to use.
252 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
253 argue it is not. The number of iterations for the algorithm to converge
254 is typically 2-4 so I don't view it as that expensive (relatively speaking).
256 PRE GCSE depends heavily on the seconds CSE pass to clean up the copies
257 we create. To make an expression reach the place where it's redundant,
258 the result of the expression is copied to a new register, and the redundant
259 expression is deleted by replacing it with this new register. Classic GCSE
260 doesn't have this problem as much as it computes the reaching defs of
261 each register in each block and thus can try to use an existing register.
263 **********************
265 When -fclassic-gcse, we perform a classic global CSE pass.
266 It is based on the algorithms in the Dragon book, and is based on code
267 written by Devor Tevi at Intel.
269 The steps for Classic GCSE are:
271 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
272 Also recorded are reaching definition "gen" statements (rd_gen)
274 2) Compute the reaching definitions (reaching_defs).
275 This is a bitmap for each basic block indexed by INSN_CUID that is 1
276 for each statement containing a definition that reaches the block.
278 3) Compute the available expressions (ae_in).
279 This is a bitmap for each basic block indexed by expression number
280 that is 1 for each expression that is available at the beginning of
284 This is done by scanning each instruction looking for sets of the form
285 (set (pseudo-reg) (expression)) and checking if `expression' is in the
286 hash table. If it is, and if the expression is available, and if only
287 one computation of the expression reaches the instruction, we substitute
288 the expression for a register containing its value. If there is no
289 such register, but the expression is expensive enough we create an
290 instruction to copy the result of the expression into and use that.
292 **********************
294 A fair bit of simplicity is created by creating small functions for simple
295 tasks, even when the function is only called in one place. This may
296 measurably slow things down [or may not] by creating more function call
297 overhead than is necessary. The source is laid out so that it's trivial
298 to make the affected functions inline so that one can measure what speed
299 up, if any, can be achieved, and maybe later when things settle things can
302 Help stamp out big monolithic functions! */
304 /* GCSE global vars. */
307 static FILE *gcse_file;
309 /* Bitmaps are normally not included in debugging dumps.
310 However it's useful to be able to print them from GDB.
311 We could create special functions for this, but it's simpler to
312 just allow passing stderr to the dump_foo fns. Since stderr can
313 be a macro, we store a copy here. */
314 static FILE *debug_stderr;
316 /* An obstack for our working variables. */
317 static struct obstack gcse_obstack;
319 /* Non-zero for each mode that supports (set (reg) (reg)).
320 This is trivially true for integer and floating point values.
321 It may or may not be true for condition codes. */
322 static char can_copy_p[(int) NUM_MACHINE_MODES];
324 /* Non-zero if can_copy_p has been initialized. */
325 static int can_copy_init_p;
327 /* Element I is a list of I's predecessors/successors. */
328 static int_list_ptr *s_preds;
329 static int_list_ptr *s_succs;
331 /* Element I is the number of predecessors/successors of basic block I. */
332 static int *num_preds;
333 static int *num_succs;
335 /* Hash table of expressions. */
339 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
341 /* Index in the available expression bitmaps. */
343 /* Next entry with the same hash. */
344 struct expr *next_same_hash;
345 /* List of anticipatable occurrences in basic blocks in the function.
346 An "anticipatable occurrence" is one that is the first occurrence in the
347 basic block and the operands are not modified in the basic block prior
348 to the occurrence. */
349 struct occr *antic_occr;
350 /* List of available occurrence in basic blocks in the function.
351 An "available occurrence" is one that is the last occurrence in the
352 basic block and the operands are not modified by following statements in
353 the basic block [including this insn]. */
354 struct occr *avail_occr;
355 /* Non-null if the computation is PRE redundant.
356 The value is the newly created pseudo-reg to record a copy of the
357 expression in all the places that reach the redundant copy. */
361 /* Occurrence of an expression.
362 There is one per basic block. If a pattern appears more than once the
363 last appearance is used [or first for anticipatable expressions]. */
367 /* Next occurrence of this expression. */
369 /* The insn that computes the expression. */
371 /* Non-zero if this [anticipatable] occurrence has been deleted. */
373 /* Non-zero if this [available] occurrence has been copied to
375 /* ??? This is mutually exclusive with deleted_p, so they could share
380 /* Expression and copy propagation hash tables.
381 Each hash table is an array of buckets.
382 ??? It is known that if it were an array of entries, structure elements
383 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
384 not clear whether in the final analysis a sufficient amount of memory would
385 be saved as the size of the available expression bitmaps would be larger
386 [one could build a mapping table without holes afterwards though].
387 Someday I'll perform the computation and figure it out.
390 /* Total size of the expression hash table, in elements. */
391 static int expr_hash_table_size;
393 This is an array of `expr_hash_table_size' elements. */
394 static struct expr **expr_hash_table;
396 /* Total size of the copy propagation hash table, in elements. */
397 static int set_hash_table_size;
399 This is an array of `set_hash_table_size' elements. */
400 static struct expr **set_hash_table;
402 /* Mapping of uids to cuids.
403 Only real insns get cuids. */
404 static int *uid_cuid;
406 /* Highest UID in UID_CUID. */
409 /* Get the cuid of an insn. */
410 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
412 /* Number of cuids. */
415 /* Mapping of cuids to insns. */
416 static rtx *cuid_insn;
418 /* Get insn from cuid. */
419 #define CUID_INSN(CUID) (cuid_insn[CUID])
421 /* Maximum register number in function prior to doing gcse + 1.
422 Registers created during this pass have regno >= max_gcse_regno.
423 This is named with "gcse" to not collide with global of same name. */
424 static int max_gcse_regno;
426 /* Maximum number of cse-able expressions found. */
428 /* Maximum number of assignments for copy propagation found. */
431 /* Table of registers that are modified.
432 For each register, each element is a list of places where the pseudo-reg
435 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
436 requires knowledge of which blocks kill which regs [and thus could use
437 a bitmap instead of the lists `reg_set_table' uses]. The classic GCSE
438 uses the information in lists.
440 If the classic GCSE pass is deleted `reg_set_table' and could be turned
441 into an array of bitmaps (num-bbs x num-regs)
442 [however perhaps it may be useful to keep the data as is].
443 One advantage of recording things this way is that `reg_set_table' is
444 fairly sparse with respect to pseudo regs but for hard regs could be
445 fairly dense [relatively speaking].
446 And recording sets of pseudo-regs in lists speeds
447 up functions like compute_transp since in the case of pseudo-regs we only
448 need to iterate over the number of times a pseudo-reg is set, not over the
449 number of basic blocks [clearly there is a bit of a slow down in the cases
450 where a pseudo is set more than once in a block, however it is believed
451 that the net effect is to speed things up]. This isn't done for hard-regs
452 because recording call-clobbered hard-regs in `reg_set_table' at each
453 function call can consume a fair bit of memory, and iterating over hard-regs
454 stored this way in compute_transp will be more expensive. */
456 typedef struct reg_set {
457 /* The next setting of this register. */
458 struct reg_set *next;
459 /* The insn where it was set. */
462 static reg_set **reg_set_table;
463 /* Size of `reg_set_table'.
464 The table starts out at max_gcse_regno + slop, and is enlarged as
466 static int reg_set_table_size;
467 /* Amount to grow `reg_set_table' by when it's full. */
468 #define REG_SET_TABLE_SLOP 100
470 /* Bitmap containing one bit for each register in the program.
471 Used when performing GCSE to track which registers have been set since
472 the start of the basic block. */
473 static sbitmap reg_set_bitmap;
475 /* For each block, a bitmap of registers set in the block.
476 This is used by expr_killed_p and compute_transp.
477 It is computed during hash table computation and not by compute_sets
478 as it includes registers added since the last pass (or between cprop and
479 gcse) and it's currently not easy to realloc sbitmap vectors. */
480 static sbitmap *reg_set_in_block;
482 /* For each block, non-zero if memory is set in that block.
483 This is computed during hash table computation and is used by
484 expr_killed_p and compute_transp.
485 ??? Handling of memory is very simple, we don't make any attempt
486 to optimize things (later).
487 ??? This can be computed by compute_sets since the information
489 static char *mem_set_in_block;
491 /* Various variables for statistics gathering. */
493 /* Memory used in a pass.
494 This isn't intended to be absolutely precise. Its intent is only
495 to keep an eye on memory usage. */
496 static int bytes_used;
497 /* GCSE substitutions made. */
498 static int gcse_subst_count;
499 /* Number of copy instructions created. */
500 static int gcse_create_count;
501 /* Number of constants propagated. */
502 static int const_prop_count;
503 /* Number of copys propagated. */
504 static int copy_prop_count;
506 extern char *current_function_name;
507 extern int current_function_calls_setjmp;
508 extern int current_function_calls_longjmp;
510 /* These variables are used by classic GCSE.
511 Normally they'd be defined a bit later, but `rd_gen' needs to
512 be declared sooner. */
514 /* A bitmap of all ones for implementing the algorithm for available
515 expressions and reaching definitions. */
516 /* ??? Available expression bitmaps have a different size than reaching
517 definition bitmaps. This should be the larger of the two, however, it
518 is not currently used for reaching definitions. */
519 static sbitmap u_bitmap;
521 /* Each block has a bitmap of each type.
522 The length of each blocks bitmap is:
524 max_cuid - for reaching definitions
525 n_exprs - for available expressions
527 Thus we view the bitmaps as 2 dimensional arrays. i.e.
528 rd_kill[block_num][cuid_num]
529 ae_kill[block_num][expr_num]
532 /* For reaching defs */
533 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
535 /* for available exprs */
536 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
538 static void compute_can_copy PROTO ((void));
540 static char *gmalloc PROTO ((unsigned int));
541 static char *grealloc PROTO ((char *, unsigned int));
542 static char *gcse_alloc PROTO ((unsigned long));
543 static void alloc_gcse_mem PROTO ((rtx));
544 static void free_gcse_mem PROTO ((void));
545 extern void dump_cuid_table PROTO ((FILE *));
547 static void alloc_reg_set_mem PROTO ((int));
548 static void free_reg_set_mem PROTO ((void));
549 static void record_one_set PROTO ((int, rtx));
550 static void record_set_info PROTO ((rtx, rtx));
551 static void compute_sets PROTO ((rtx));
553 static void hash_scan_insn PROTO ((rtx, int, int));
554 static void hash_scan_set PROTO ((rtx, rtx, int));
555 static void hash_scan_clobber PROTO ((rtx, rtx));
556 static void hash_scan_call PROTO ((rtx, rtx));
557 static void maybe_set_rd_gen PROTO ((int, rtx));
558 static int want_to_gcse_p PROTO ((rtx));
559 static int oprs_unchanged_p PROTO ((rtx, rtx, int));
560 static int oprs_anticipatable_p PROTO ((rtx, rtx));
561 static int oprs_available_p PROTO ((rtx, rtx));
562 static void insert_expr_in_table PROTO ((rtx, enum machine_mode, rtx, int, int));
563 static void insert_set_in_table PROTO ((rtx, rtx));
564 static unsigned int hash_expr PROTO ((rtx, enum machine_mode, int *, int));
565 static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *));
566 static unsigned int hash_set PROTO ((int, int));
567 static int expr_equiv_p PROTO ((rtx, rtx));
568 static void record_last_reg_set_info PROTO ((rtx, int));
569 static void record_last_mem_set_info PROTO ((rtx));
570 static void record_last_set_info PROTO ((rtx, rtx));
571 static void compute_hash_table PROTO ((rtx, int));
572 static void alloc_set_hash_table PROTO ((int));
573 static void free_set_hash_table PROTO ((void));
574 static void compute_set_hash_table PROTO ((rtx));
575 static void alloc_expr_hash_table PROTO ((int));
576 static void free_expr_hash_table PROTO ((void));
577 static void compute_expr_hash_table PROTO ((rtx));
578 static void dump_hash_table PROTO ((FILE *, char *, struct expr **, int, int));
579 static struct expr *lookup_expr PROTO ((rtx));
580 static struct expr *lookup_set PROTO ((int, rtx));
581 static struct expr *next_set PROTO ((int, struct expr *));
582 static void reset_opr_set_tables PROTO ((void));
583 static int oprs_not_set_p PROTO ((rtx, rtx));
584 static void mark_call PROTO ((rtx, rtx));
585 static void mark_set PROTO ((rtx, rtx));
586 static void mark_clobber PROTO ((rtx, rtx));
587 static void mark_oprs_set PROTO ((rtx));
589 static void alloc_rd_mem PROTO ((int, int));
590 static void free_rd_mem PROTO ((void));
591 static void compute_kill_rd PROTO ((void));
592 static void handle_rd_kill_set PROTO ((rtx, int, int));
593 static void compute_rd PROTO ((void));
594 extern void dump_rd_table PROTO ((FILE *, char *, sbitmap *));
596 static void alloc_avail_expr_mem PROTO ((int, int));
597 static void free_avail_expr_mem PROTO ((void));
598 static void compute_ae_gen PROTO ((void));
599 static void compute_ae_kill PROTO ((void));
600 static int expr_killed_p PROTO ((rtx, int));
601 static void compute_available PROTO ((void));
603 static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
605 static rtx computing_insn PROTO ((struct expr *, rtx));
606 static int def_reaches_here_p PROTO ((rtx, rtx));
607 static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int));
608 static int handle_avail_expr PROTO ((rtx, struct expr *));
609 static int classic_gcse PROTO ((void));
610 static int one_classic_gcse_pass PROTO ((rtx, int));
612 static void alloc_cprop_mem PROTO ((int, int));
613 static void free_cprop_mem PROTO ((void));
614 extern void dump_cprop_data PROTO ((FILE *));
615 static void compute_transp PROTO ((rtx, int, sbitmap *, int));
616 static void compute_cprop_local_properties PROTO ((void));
617 static void compute_cprop_avinout PROTO ((void));
618 static void compute_cprop_data PROTO ((void));
619 static void find_used_regs PROTO ((rtx));
620 static int try_replace_reg PROTO ((rtx, rtx, rtx));
621 static struct expr *find_avail_set PROTO ((int, rtx));
622 static int cprop_insn PROTO ((rtx));
623 static int cprop PROTO ((void));
624 static int one_cprop_pass PROTO ((rtx, int));
626 static void alloc_pre_mem PROTO ((int, int));
627 static void free_pre_mem PROTO ((void));
628 extern void dump_pre_data PROTO ((FILE *));
629 static void compute_pre_local_properties PROTO ((void));
630 static void compute_pre_avinout PROTO ((void));
631 static void compute_pre_antinout PROTO ((void));
632 static void compute_pre_pavinout PROTO ((void));
633 static void compute_pre_ppinout PROTO ((void));
634 static void compute_pre_data PROTO ((void));
635 static int pre_expr_reaches_here_p PROTO ((struct occr *, struct expr *,
637 static void pre_insert_insn PROTO ((struct expr *, int));
638 static void pre_insert PROTO ((struct expr **));
639 static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
640 static void pre_insert_copies PROTO ((void));
641 static int pre_delete PROTO ((void));
642 static int pre_gcse PROTO ((void));
643 static int one_pre_gcse_pass PROTO ((rtx, int));
645 static void add_label_notes PROTO ((rtx, rtx));
647 /* Entry point for global common subexpression elimination.
648 F is the first instruction in the function. */
656 /* Bytes used at start of pass. */
657 int initial_bytes_used;
658 /* Maximum number of bytes used by a pass. */
660 /* Point to release obstack data from for each pass. */
661 char *gcse_obstack_bottom;
663 /* It's impossible to construct a correct control flow graph in the
664 presense of setjmp, so just punt to be safe. */
665 if (current_function_calls_setjmp)
668 /* For calling dump_foo fns from gdb. */
669 debug_stderr = stderr;
671 max_gcse_regno = max_reg_num ();
672 find_basic_blocks (f, max_gcse_regno, file);
674 /* Return if there's nothing to do. */
675 if (n_basic_blocks <= 1)
677 /* Free storage allocated by find_basic_blocks. */
678 free_basic_block_vars (0);
682 /* See what modes support reg/reg copy operations. */
683 if (! can_copy_init_p)
689 gcc_obstack_init (&gcse_obstack);
693 /* Allocate and compute predecessors/successors. */
695 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
696 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
697 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
698 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
699 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
700 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
703 dump_bb_data (file, s_preds, s_succs, 0);
705 /* Record where pseudo-registers are set.
706 This data is kept accurate during each pass.
707 ??? We could also record hard-reg and memory information here
708 [since it's unchanging], however it is currently done during
709 hash table computation. */
711 alloc_reg_set_mem (max_gcse_regno);
715 initial_bytes_used = bytes_used;
717 gcse_obstack_bottom = gcse_alloc (1);
719 while (changed && pass < MAX_PASSES)
723 fprintf (file, "GCSE pass %d\n\n", pass + 1);
725 /* Initialize bytes_used to the space for the pred/succ lists,
726 and the reg_set_table data. */
727 bytes_used = initial_bytes_used;
729 /* Each pass may create new registers, so recalculate each time. */
730 max_gcse_regno = max_reg_num ();
734 changed = one_cprop_pass (f, pass + 1);
737 changed |= one_classic_gcse_pass (f, pass + 1);
739 changed |= one_pre_gcse_pass (f, pass + 1);
741 if (max_pass_bytes < bytes_used)
742 max_pass_bytes = bytes_used;
748 fprintf (file, "\n");
751 obstack_free (&gcse_obstack, gcse_obstack_bottom);
755 /* If we're doing PRE, do one last pass of copy propagation. */
758 max_gcse_regno = max_reg_num ();
760 one_cprop_pass (f, pass + 1);
766 fprintf (file, "GCSE of %s: %d basic blocks, ",
767 current_function_name, n_basic_blocks);
768 fprintf (file, "%d pass%s, %d bytes\n\n",
769 pass, pass > 1 ? "es" : "", max_pass_bytes);
772 /* Free our obstack. */
773 obstack_free (&gcse_obstack, NULL_PTR);
774 /* Free reg_set_table. */
776 /* Free storage used to record predecessor/successor data. */
778 /* Free storage allocated by find_basic_blocks. */
779 free_basic_block_vars (0);
782 /* Misc. utilities. */
784 /* Compute which modes support reg/reg copy operations. */
790 #ifndef AVOID_CCMODE_COPIES
793 char *free_point = (char *) oballoc (1);
795 bzero (can_copy_p, NUM_MACHINE_MODES);
798 for (i = 0; i < NUM_MACHINE_MODES; i++)
800 switch (GET_MODE_CLASS (i))
803 #ifdef AVOID_CCMODE_COPIES
806 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
807 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
808 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
819 /* Free the objects we just allocated. */
823 /* Cover function to xmalloc to record bytes allocated. */
830 return xmalloc (size);
833 /* Cover function to xrealloc.
834 We don't record the additional size since we don't know it.
835 It won't affect memory usage stats much anyway. */
842 return xrealloc (ptr, size);
845 /* Cover function to obstack_alloc.
846 We don't need to record the bytes allocated here since
847 obstack_chunk_alloc is set to gmalloc. */
853 return (char *) obstack_alloc (&gcse_obstack, size);
856 /* Allocate memory for the cuid mapping array,
857 and reg/memory set tracking tables.
859 This is called at the start of each pass. */
868 /* Find the largest UID and create a mapping from UIDs to CUIDs.
869 CUIDs are like UIDs except they increase monotonically, have no gaps,
870 and only apply to real insns. */
872 max_uid = get_max_uid ();
873 n = (max_uid + 1) * sizeof (int);
874 uid_cuid = (int *) gmalloc (n);
875 bzero ((char *) uid_cuid, n);
876 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
878 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
879 INSN_CUID (insn) = i++;
881 INSN_CUID (insn) = i;
884 /* Create a table mapping cuids to insns. */
887 n = (max_cuid + 1) * sizeof (rtx);
888 cuid_insn = (rtx *) gmalloc (n);
889 bzero ((char *) cuid_insn, n);
890 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
892 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
894 CUID_INSN (i) = insn;
899 /* Allocate vars to track sets of regs. */
901 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
903 /* Allocate vars to track sets of regs, memory per block. */
905 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
907 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
910 /* Free memory allocated by alloc_gcse_mem. */
918 free (reg_set_bitmap);
920 free (reg_set_in_block);
921 free (mem_set_in_block);
925 dump_cuid_table (file)
930 fprintf (file, "CUID table\n");
931 for (i = 0; i < max_cuid; i++)
933 rtx insn = CUID_INSN (i);
934 if (i != 0 && i % 10 == 0)
935 fprintf (file, "\n");
937 fprintf (file, " %d", INSN_UID (insn));
939 fprintf (file, "\n\n");
942 /* Register set information.
944 `reg_set_table' records where each register is set or otherwise
947 static struct obstack reg_set_obstack;
950 alloc_reg_set_mem (n_regs)
955 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
956 n = reg_set_table_size * sizeof (struct reg_set *);
957 reg_set_table = (struct reg_set **) gmalloc (n);
958 bzero ((char *) reg_set_table, n);
960 gcc_obstack_init (®_set_obstack);
966 free (reg_set_table);
967 obstack_free (®_set_obstack, NULL_PTR);
970 /* Record REGNO in the reg_set table. */
973 record_one_set (regno, insn)
977 /* allocate a new reg_set element and link it onto the list */
978 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
980 /* If the table isn't big enough, enlarge it. */
981 if (regno >= reg_set_table_size)
983 int new_size = regno + REG_SET_TABLE_SLOP;
984 reg_set_table = (struct reg_set **)
985 grealloc ((char *) reg_set_table,
986 new_size * sizeof (struct reg_set *));
987 bzero ((char *) (reg_set_table + reg_set_table_size),
988 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
989 reg_set_table_size = new_size;
992 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack,
993 sizeof (struct reg_set));
994 bytes_used += sizeof (struct reg_set);
995 new_reg_info->insn = insn;
996 new_reg_info->next = NULL;
997 if (reg_set_table[regno] == NULL)
998 reg_set_table[regno] = new_reg_info;
1001 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1002 /* ??? One could keep a "last" pointer to speed this up. */
1003 while (reg_info_ptr1 != NULL)
1005 reg_info_ptr2 = reg_info_ptr1;
1006 reg_info_ptr1 = reg_info_ptr1->next;
1008 reg_info_ptr2->next = new_reg_info;
1012 /* For communication between next two functions (via note_stores). */
1013 static rtx record_set_insn;
1015 /* Called from compute_sets via note_stores to handle one
1016 SET or CLOBBER in an insn. */
1019 record_set_info (dest, setter)
1020 rtx dest, setter ATTRIBUTE_UNUSED;
1022 if (GET_CODE (dest) == SUBREG)
1023 dest = SUBREG_REG (dest);
1025 if (GET_CODE (dest) == REG)
1027 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1028 record_one_set (REGNO (dest), record_set_insn);
1032 /* Scan the function and record each set of each pseudo-register.
1034 This is called once, at the start of the gcse pass.
1035 See the comments for `reg_set_table' for further docs. */
1045 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1047 record_set_insn = insn;
1048 note_stores (PATTERN (insn), record_set_info);
1050 insn = NEXT_INSN (insn);
1054 /* Hash table support. */
1056 #define NEVER_SET -1
1058 /* For each register, the cuid of the first/last insn in the block to set it,
1059 or -1 if not set. */
1060 static int *reg_first_set;
1061 static int *reg_last_set;
1063 /* While computing "first/last set" info, this is the CUID of first/last insn
1064 to set memory or -1 if not set. `mem_last_set' is also used when
1065 performing GCSE to record whether memory has been set since the beginning
1067 Note that handling of memory is very simple, we don't make any attempt
1068 to optimize things (later). */
1069 static int mem_first_set;
1070 static int mem_last_set;
1072 /* Set the appropriate bit in `rd_gen' [the gen for reaching defs] if the
1073 register set in this insn is not set after this insn in this block. */
1076 maybe_set_rd_gen (regno, insn)
1080 if (reg_last_set[regno] <= INSN_CUID (insn))
1081 SET_BIT (rd_gen[BLOCK_NUM (insn)], INSN_CUID (insn));
1084 /* Perform a quick check whether X, the source of a set, is something
1085 we want to consider for GCSE. */
1091 enum rtx_code code = GET_CODE (x);
1109 /* Return non-zero if the operands of expression X are unchanged from the
1110 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1111 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1114 oprs_unchanged_p (x, insn, avail_p)
1122 /* repeat is used to turn tail-recursion into iteration. */
1128 code = GET_CODE (x);
1133 return (reg_last_set[REGNO (x)] == NEVER_SET
1134 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1136 return (reg_first_set[REGNO (x)] == NEVER_SET
1137 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1142 if (mem_last_set != NEVER_SET
1143 && mem_last_set >= INSN_CUID (insn))
1148 if (mem_first_set != NEVER_SET
1149 && mem_first_set < INSN_CUID (insn))
1176 i = GET_RTX_LENGTH (code) - 1;
1177 fmt = GET_RTX_FORMAT (code);
1182 rtx tem = XEXP (x, i);
1184 /* If we are about to do the last recursive call
1185 needed at this level, change it into iteration.
1186 This function is called enough to be worth it. */
1192 if (! oprs_unchanged_p (tem, insn, avail_p))
1195 else if (fmt[i] == 'E')
1198 for (j = 0; j < XVECLEN (x, i); j++)
1200 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1209 /* Return non-zero if the operands of expression X are unchanged from
1210 the start of INSN's basic block up to but not including INSN. */
1213 oprs_anticipatable_p (x, insn)
1216 return oprs_unchanged_p (x, insn, 0);
1219 /* Return non-zero if the operands of expression X are unchanged from
1220 INSN to the end of INSN's basic block. */
1223 oprs_available_p (x, insn)
1226 return oprs_unchanged_p (x, insn, 1);
1229 /* Hash expression X.
1230 MODE is only used if X is a CONST_INT.
1231 A boolean indicating if a volatile operand is found or if the expression
1232 contains something we don't want to insert in the table is stored in
1235 ??? One might want to merge this with canon_hash. Later. */
1238 hash_expr (x, mode, do_not_record_p, hash_table_size)
1240 enum machine_mode mode;
1241 int *do_not_record_p;
1242 int hash_table_size;
1246 *do_not_record_p = 0;
1248 hash = hash_expr_1 (x, mode, do_not_record_p);
1249 return hash % hash_table_size;
1252 /* Subroutine of hash_expr to do the actual work. */
1255 hash_expr_1 (x, mode, do_not_record_p)
1257 enum machine_mode mode;
1258 int *do_not_record_p;
1265 /* repeat is used to turn tail-recursion into iteration. */
1271 code = GET_CODE (x);
1276 register int regno = REGNO (x);
1277 hash += ((unsigned) REG << 7) + regno;
1283 unsigned HOST_WIDE_INT tem = INTVAL (x);
1284 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1289 /* This is like the general case, except that it only counts
1290 the integers representing the constant. */
1291 hash += (unsigned) code + (unsigned) GET_MODE (x);
1292 if (GET_MODE (x) != VOIDmode)
1293 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1295 unsigned tem = XINT (x, i);
1299 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1300 + (unsigned) CONST_DOUBLE_HIGH (x));
1303 /* Assume there is only one rtx object for any given label. */
1305 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1306 differences and differences between each stage's debugging dumps. */
1307 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1312 /* Don't hash on the symbol's address to avoid bootstrap differences.
1313 Different hash values may cause expressions to be recorded in
1314 different orders and thus different registers to be used in the
1315 final assembler. This also avoids differences in the dump files
1316 between various stages. */
1318 unsigned char *p = (unsigned char *) XSTR (x, 0);
1320 h += (h << 7) + *p++; /* ??? revisit */
1321 hash += ((unsigned) SYMBOL_REF << 7) + h;
1326 if (MEM_VOLATILE_P (x))
1328 *do_not_record_p = 1;
1331 hash += (unsigned) MEM;
1342 case UNSPEC_VOLATILE:
1343 *do_not_record_p = 1;
1347 if (MEM_VOLATILE_P (x))
1349 *do_not_record_p = 1;
1357 i = GET_RTX_LENGTH (code) - 1;
1358 hash += (unsigned) code + (unsigned) GET_MODE (x);
1359 fmt = GET_RTX_FORMAT (code);
1364 rtx tem = XEXP (x, i);
1366 /* If we are about to do the last recursive call
1367 needed at this level, change it into iteration.
1368 This function is called enough to be worth it. */
1374 hash += hash_expr_1 (tem, 0, do_not_record_p);
1375 if (*do_not_record_p)
1378 else if (fmt[i] == 'E')
1379 for (j = 0; j < XVECLEN (x, i); j++)
1381 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1382 if (*do_not_record_p)
1385 else if (fmt[i] == 's')
1387 register unsigned char *p = (unsigned char *) XSTR (x, i);
1392 else if (fmt[i] == 'i')
1394 register unsigned tem = XINT (x, i);
1404 /* Hash a set of register REGNO.
1406 Sets are hashed on the register that is set.
1407 This simplifies the PRE copy propagation code.
1409 ??? May need to make things more elaborate. Later, as necessary. */
1412 hash_set (regno, hash_table_size)
1414 int hash_table_size;
1419 return hash % hash_table_size;
1422 /* Return non-zero if exp1 is equivalent to exp2.
1423 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1430 register enum rtx_code code;
1435 if (x == 0 || y == 0)
1438 code = GET_CODE (x);
1439 if (code != GET_CODE (y))
1442 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1443 if (GET_MODE (x) != GET_MODE (y))
1453 return INTVAL (x) == INTVAL (y);
1456 return XEXP (x, 0) == XEXP (y, 0);
1459 return XSTR (x, 0) == XSTR (y, 0);
1462 return REGNO (x) == REGNO (y);
1464 /* For commutative operations, check both orders. */
1472 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1473 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1474 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1475 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1481 /* Compare the elements. If any pair of corresponding elements
1482 fail to match, return 0 for the whole thing. */
1484 fmt = GET_RTX_FORMAT (code);
1485 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1490 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1495 if (XVECLEN (x, i) != XVECLEN (y, i))
1497 for (j = 0; j < XVECLEN (x, i); j++)
1498 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1503 if (strcmp (XSTR (x, i), XSTR (y, i)))
1508 if (XINT (x, i) != XINT (y, i))
1513 if (XWINT (x, i) != XWINT (y, i))
1528 /* Insert expression X in INSN in the hash table.
1529 If it is already present, record it as the last occurrence in INSN's
1532 MODE is the mode of the value X is being stored into.
1533 It is only used if X is a CONST_INT.
1535 ANTIC_P is non-zero if X is an anticipatable expression.
1536 AVAIL_P is non-zero if X is an available expression. */
1539 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1541 enum machine_mode mode;
1543 int antic_p, avail_p;
1545 int found, do_not_record_p;
1547 struct expr *cur_expr, *last_expr = NULL;
1548 struct occr *antic_occr, *avail_occr;
1549 struct occr *last_occr = NULL;
1551 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1553 /* Do not insert expression in table if it contains volatile operands,
1554 or if hash_expr determines the expression is something we don't want
1555 to or can't handle. */
1556 if (do_not_record_p)
1559 cur_expr = expr_hash_table[hash];
1562 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1564 /* If the expression isn't found, save a pointer to the end of
1566 last_expr = cur_expr;
1567 cur_expr = cur_expr->next_same_hash;
1572 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1573 bytes_used += sizeof (struct expr);
1574 if (expr_hash_table[hash] == NULL)
1576 /* This is the first pattern that hashed to this index. */
1577 expr_hash_table[hash] = cur_expr;
1581 /* Add EXPR to end of this hash chain. */
1582 last_expr->next_same_hash = cur_expr;
1584 /* Set the fields of the expr element. */
1586 cur_expr->bitmap_index = n_exprs++;
1587 cur_expr->next_same_hash = NULL;
1588 cur_expr->antic_occr = NULL;
1589 cur_expr->avail_occr = NULL;
1592 /* Now record the occurrence(s). */
1596 antic_occr = cur_expr->antic_occr;
1598 /* Search for another occurrence in the same basic block. */
1599 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1601 /* If an occurrence isn't found, save a pointer to the end of
1603 last_occr = antic_occr;
1604 antic_occr = antic_occr->next;
1609 /* Found another instance of the expression in the same basic block.
1610 Prefer the currently recorded one. We want the first one in the
1611 block and the block is scanned from start to end. */
1612 ; /* nothing to do */
1616 /* First occurrence of this expression in this basic block. */
1617 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1618 bytes_used += sizeof (struct occr);
1619 /* First occurrence of this expression in any block? */
1620 if (cur_expr->antic_occr == NULL)
1621 cur_expr->antic_occr = antic_occr;
1623 last_occr->next = antic_occr;
1624 antic_occr->insn = insn;
1625 antic_occr->next = NULL;
1631 avail_occr = cur_expr->avail_occr;
1633 /* Search for another occurrence in the same basic block. */
1634 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1636 /* If an occurrence isn't found, save a pointer to the end of
1638 last_occr = avail_occr;
1639 avail_occr = avail_occr->next;
1644 /* Found another instance of the expression in the same basic block.
1645 Prefer this occurrence to the currently recorded one. We want
1646 the last one in the block and the block is scanned from start
1648 avail_occr->insn = insn;
1652 /* First occurrence of this expression in this basic block. */
1653 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1654 bytes_used += sizeof (struct occr);
1655 /* First occurrence of this expression in any block? */
1656 if (cur_expr->avail_occr == NULL)
1657 cur_expr->avail_occr = avail_occr;
1659 last_occr->next = avail_occr;
1660 avail_occr->insn = insn;
1661 avail_occr->next = NULL;
1666 /* Insert pattern X in INSN in the hash table.
1667 X is a SET of a reg to either another reg or a constant.
1668 If it is already present, record it as the last occurrence in INSN's
1672 insert_set_in_table (x, insn)
1678 struct expr *cur_expr, *last_expr = NULL;
1679 struct occr *cur_occr, *last_occr = NULL;
1681 if (GET_CODE (x) != SET
1682 || GET_CODE (SET_DEST (x)) != REG)
1685 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1687 cur_expr = set_hash_table[hash];
1690 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1692 /* If the expression isn't found, save a pointer to the end of
1694 last_expr = cur_expr;
1695 cur_expr = cur_expr->next_same_hash;
1700 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1701 bytes_used += sizeof (struct expr);
1702 if (set_hash_table[hash] == NULL)
1704 /* This is the first pattern that hashed to this index. */
1705 set_hash_table[hash] = cur_expr;
1709 /* Add EXPR to end of this hash chain. */
1710 last_expr->next_same_hash = cur_expr;
1712 /* Set the fields of the expr element.
1713 We must copy X because it can be modified when copy propagation is
1714 performed on its operands. */
1715 /* ??? Should this go in a different obstack? */
1716 cur_expr->expr = copy_rtx (x);
1717 cur_expr->bitmap_index = n_sets++;
1718 cur_expr->next_same_hash = NULL;
1719 cur_expr->antic_occr = NULL;
1720 cur_expr->avail_occr = NULL;
1723 /* Now record the occurrence. */
1725 cur_occr = cur_expr->avail_occr;
1727 /* Search for another occurrence in the same basic block. */
1728 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1730 /* If an occurrence isn't found, save a pointer to the end of
1732 last_occr = cur_occr;
1733 cur_occr = cur_occr->next;
1738 /* Found another instance of the expression in the same basic block.
1739 Prefer this occurrence to the currently recorded one. We want
1740 the last one in the block and the block is scanned from start
1742 cur_occr->insn = insn;
1746 /* First occurrence of this expression in this basic block. */
1747 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1748 bytes_used += sizeof (struct occr);
1749 /* First occurrence of this expression in any block? */
1750 if (cur_expr->avail_occr == NULL)
1751 cur_expr->avail_occr = cur_occr;
1753 last_occr->next = cur_occr;
1754 cur_occr->insn = insn;
1755 cur_occr->next = NULL;
1759 /* Scan pattern PAT of INSN and add an entry to the hash table.
1760 If SET_P is non-zero, this is for the assignment hash table,
1761 otherwise it is for the expression hash table. */
1764 hash_scan_set (pat, insn, set_p)
1768 rtx src = SET_SRC (pat);
1769 rtx dest = SET_DEST (pat);
1771 if (GET_CODE (src) == CALL)
1772 hash_scan_call (src, insn);
1774 if (GET_CODE (dest) == REG)
1776 int regno = REGNO (dest);
1779 /* Only record sets of pseudo-regs in the hash table. */
1781 && regno >= FIRST_PSEUDO_REGISTER
1782 /* Don't GCSE something if we can't do a reg/reg copy. */
1783 && can_copy_p [GET_MODE (dest)]
1784 /* Is SET_SRC something we want to gcse? */
1785 && want_to_gcse_p (src))
1787 /* An expression is not anticipatable if its operands are
1788 modified before this insn. */
1789 int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn);
1790 /* An expression is not available if its operands are
1791 subsequently modified, including this insn. */
1792 int avail_p = oprs_available_p (src, insn);
1793 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1795 /* Record sets for constant/copy propagation. */
1797 && regno >= FIRST_PSEUDO_REGISTER
1798 && ((GET_CODE (src) == REG
1799 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1800 && can_copy_p [GET_MODE (dest)])
1801 /* ??? CONST_INT:wip */
1802 || GET_CODE (src) == CONST_INT)
1803 /* A copy is not available if its src or dest is subsequently
1804 modified. Here we want to search from INSN+1 on, but
1805 oprs_available_p searches from INSN on. */
1806 && (insn == BLOCK_END (BLOCK_NUM (insn))
1807 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1808 && oprs_available_p (pat, tmp))))
1809 insert_set_in_table (pat, insn);
1812 /* Check if first/last set in this block for classic gcse,
1813 but not for copy/constant propagation. */
1814 if (optimize_size && !set_p)
1817 rtx dest = SET_DEST (pat);
1819 while (GET_CODE (dest) == SUBREG
1820 || GET_CODE (dest) == ZERO_EXTRACT
1821 || GET_CODE (dest) == SIGN_EXTRACT
1822 || GET_CODE (dest) == STRICT_LOW_PART)
1823 dest = XEXP (dest, 0);
1824 if (GET_CODE (dest) == REG)
1825 maybe_set_rd_gen (REGNO (dest), insn);
1830 hash_scan_clobber (x, insn)
1831 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1833 /* Currently nothing to do. */
1837 hash_scan_call (x, insn)
1838 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1840 /* Currently nothing to do. */
1843 /* Process INSN and add hash table entries as appropriate.
1845 Only available expressions that set a single pseudo-reg are recorded.
1847 Single sets in a PARALLEL could be handled, but it's an extra complication
1848 that isn't dealt with right now. The trick is handling the CLOBBERs that
1849 are also in the PARALLEL. Later.
1851 If SET_P is non-zero, this is for the assignment hash table,
1852 otherwise it is for the expression hash table.
1853 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1854 not record any expressions. */
1857 hash_scan_insn (insn, set_p, in_libcall_block)
1860 int in_libcall_block;
1862 rtx pat = PATTERN (insn);
1864 /* Pick out the sets of INSN and for other forms of instructions record
1865 what's been modified. */
1867 if (GET_CODE (pat) == SET && ! in_libcall_block)
1868 hash_scan_set (pat, insn, set_p);
1869 else if (GET_CODE (pat) == PARALLEL)
1873 for (i = 0; i < XVECLEN (pat, 0); i++)
1875 rtx x = XVECEXP (pat, 0, i);
1877 if (GET_CODE (x) == SET)
1879 if (GET_CODE (SET_SRC (x)) == CALL)
1880 hash_scan_call (SET_SRC (x), insn);
1882 /* Check if first/last set in this block for classic
1883 gcse, but not for constant/copy propagation. */
1884 if (optimize_size && !set_p)
1886 rtx dest = SET_DEST (x);
1888 while (GET_CODE (dest) == SUBREG
1889 || GET_CODE (dest) == ZERO_EXTRACT
1890 || GET_CODE (dest) == SIGN_EXTRACT
1891 || GET_CODE (dest) == STRICT_LOW_PART)
1892 dest = XEXP (dest, 0);
1893 if (GET_CODE (dest) == REG)
1894 maybe_set_rd_gen (REGNO (dest), insn);
1897 else if (GET_CODE (x) == CLOBBER)
1898 hash_scan_clobber (x, insn);
1899 else if (GET_CODE (x) == CALL)
1900 hash_scan_call (x, insn);
1903 else if (GET_CODE (pat) == CLOBBER)
1904 hash_scan_clobber (pat, insn);
1905 else if (GET_CODE (pat) == CALL)
1906 hash_scan_call (pat, insn);
1910 dump_hash_table (file, name, table, table_size, total_size)
1913 struct expr **table;
1914 int table_size, total_size;
1917 /* Flattened out table, so it's printed in proper order. */
1918 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
1919 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
1921 bzero ((char *) flat_table, total_size * sizeof (struct expr *));
1922 for (i = 0; i < table_size; i++)
1926 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1928 flat_table[expr->bitmap_index] = expr;
1929 hash_val[expr->bitmap_index] = i;
1933 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1934 name, table_size, total_size);
1936 for (i = 0; i < total_size; i++)
1938 struct expr *expr = flat_table[i];
1940 fprintf (file, "Index %d (hash value %d)\n ",
1941 expr->bitmap_index, hash_val[i]);
1942 print_rtl (file, expr->expr);
1943 fprintf (file, "\n");
1946 fprintf (file, "\n");
1949 /* Record register first/last/block set information for REGNO in INSN.
1950 reg_first_set records the first place in the block where the register
1951 is set and is used to compute "anticipatability".
1952 reg_last_set records the last place in the block where the register
1953 is set and is used to compute "availability".
1954 reg_set_in_block records whether the register is set in the block
1955 and is used to compute "transparency". */
1958 record_last_reg_set_info (insn, regno)
1962 if (reg_first_set[regno] == NEVER_SET)
1963 reg_first_set[regno] = INSN_CUID (insn);
1964 reg_last_set[regno] = INSN_CUID (insn);
1965 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
1968 /* Record memory first/last/block set information for INSN. */
1971 record_last_mem_set_info (insn)
1974 if (mem_first_set == NEVER_SET)
1975 mem_first_set = INSN_CUID (insn);
1976 mem_last_set = INSN_CUID (insn);
1977 mem_set_in_block[BLOCK_NUM (insn)] = 1;
1980 /* Used for communicating between next two routines. */
1981 static rtx last_set_insn;
1983 /* Called from compute_hash_table via note_stores to handle one
1984 SET or CLOBBER in an insn. */
1987 record_last_set_info (dest, setter)
1988 rtx dest, setter ATTRIBUTE_UNUSED;
1990 if (GET_CODE (dest) == SUBREG)
1991 dest = SUBREG_REG (dest);
1993 if (GET_CODE (dest) == REG)
1994 record_last_reg_set_info (last_set_insn, REGNO (dest));
1995 else if (GET_CODE (dest) == MEM
1996 /* Ignore pushes, they clobber nothing. */
1997 && ! push_operand (dest, GET_MODE (dest)))
1998 record_last_mem_set_info (last_set_insn);
2001 /* Top level function to create an expression or assignment hash table.
2003 Expression entries are placed in the hash table if
2004 - they are of the form (set (pseudo-reg) src),
2005 - src is something we want to perform GCSE on,
2006 - none of the operands are subsequently modified in the block
2008 Assignment entries are placed in the hash table if
2009 - they are of the form (set (pseudo-reg) src),
2010 - src is something we want to perform const/copy propagation on,
2011 - none of the operands or target are subsequently modified in the block
2012 Currently src must be a pseudo-reg or a const_int.
2014 F is the first insn.
2015 SET_P is non-zero for computing the assignment hash table. */
2018 compute_hash_table (f, set_p)
2019 rtx f ATTRIBUTE_UNUSED;
2024 /* While we compute the hash table we also compute a bit array of which
2025 registers are set in which blocks.
2026 We also compute which blocks set memory, in the absence of aliasing
2027 support [which is TODO].
2028 ??? This isn't needed during const/copy propagation, but it's cheap to
2030 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2031 bzero ((char *) mem_set_in_block, n_basic_blocks);
2033 /* Some working arrays used to track first and last set in each block. */
2034 /* ??? One could use alloca here, but at some size a threshold is crossed
2035 beyond which one should use malloc. Are we at that threshold here? */
2036 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2037 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2039 for (bb = 0; bb < n_basic_blocks; bb++)
2043 int in_libcall_block;
2046 /* First pass over the instructions records information used to
2047 determine when registers and memory are first and last set.
2048 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2049 could be moved to compute_sets since they currently don't change. */
2051 for (i = 0; i < max_gcse_regno; i++)
2052 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2053 mem_first_set = NEVER_SET;
2054 mem_last_set = NEVER_SET;
2056 for (insn = basic_block_head[bb];
2057 insn && insn != NEXT_INSN (basic_block_end[bb]);
2058 insn = NEXT_INSN (insn))
2060 #ifdef NON_SAVING_SETJMP
2061 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2062 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2064 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2065 record_last_reg_set_info (insn, regno);
2070 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2073 if (GET_CODE (insn) == CALL_INSN)
2075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2076 if ((call_used_regs[regno]
2077 && regno != STACK_POINTER_REGNUM
2078 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2079 && regno != HARD_FRAME_POINTER_REGNUM
2081 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2082 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2084 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2085 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2088 && regno != FRAME_POINTER_REGNUM)
2089 || global_regs[regno])
2090 record_last_reg_set_info (insn, regno);
2091 if (! CONST_CALL_P (insn))
2092 record_last_mem_set_info (insn);
2095 last_set_insn = insn;
2096 note_stores (PATTERN (insn), record_last_set_info);
2099 /* The next pass builds the hash table. */
2101 for (insn = basic_block_head[bb], in_libcall_block = 0;
2102 insn && insn != NEXT_INSN (basic_block_end[bb]);
2103 insn = NEXT_INSN (insn))
2105 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2107 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2108 in_libcall_block = 1;
2109 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2110 in_libcall_block = 0;
2111 hash_scan_insn (insn, set_p, in_libcall_block);
2116 free (reg_first_set);
2117 free (reg_last_set);
2118 /* Catch bugs early. */
2119 reg_first_set = reg_last_set = 0;
2122 /* Allocate space for the set hash table.
2123 N_INSNS is the number of instructions in the function.
2124 It is used to determine the number of buckets to use. */
2127 alloc_set_hash_table (n_insns)
2132 set_hash_table_size = n_insns / 4;
2133 if (set_hash_table_size < 11)
2134 set_hash_table_size = 11;
2135 /* Attempt to maintain efficient use of hash table.
2136 Making it an odd number is simplest for now.
2137 ??? Later take some measurements. */
2138 set_hash_table_size |= 1;
2139 n = set_hash_table_size * sizeof (struct expr *);
2140 set_hash_table = (struct expr **) gmalloc (n);
2143 /* Free things allocated by alloc_set_hash_table. */
2146 free_set_hash_table ()
2148 free (set_hash_table);
2151 /* Compute the hash table for doing copy/const propagation. */
2154 compute_set_hash_table (f)
2157 /* Initialize count of number of entries in hash table. */
2159 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2161 compute_hash_table (f, 1);
2164 /* Allocate space for the expression hash table.
2165 N_INSNS is the number of instructions in the function.
2166 It is used to determine the number of buckets to use. */
2169 alloc_expr_hash_table (n_insns)
2174 expr_hash_table_size = n_insns / 2;
2175 /* Make sure the amount is usable. */
2176 if (expr_hash_table_size < 11)
2177 expr_hash_table_size = 11;
2178 /* Attempt to maintain efficient use of hash table.
2179 Making it an odd number is simplest for now.
2180 ??? Later take some measurements. */
2181 expr_hash_table_size |= 1;
2182 n = expr_hash_table_size * sizeof (struct expr *);
2183 expr_hash_table = (struct expr **) gmalloc (n);
2186 /* Free things allocated by alloc_expr_hash_table. */
2189 free_expr_hash_table ()
2191 free (expr_hash_table);
2194 /* Compute the hash table for doing GCSE. */
2197 compute_expr_hash_table (f)
2200 /* Initialize count of number of entries in hash table. */
2202 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2204 compute_hash_table (f, 0);
2207 /* Expression tracking support. */
2209 /* Lookup pattern PAT in the expression table.
2210 The result is a pointer to the table entry, or NULL if not found. */
2212 static struct expr *
2216 int do_not_record_p;
2217 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2218 expr_hash_table_size);
2221 if (do_not_record_p)
2224 expr = expr_hash_table[hash];
2226 while (expr && ! expr_equiv_p (expr->expr, pat))
2227 expr = expr->next_same_hash;
2232 /* Lookup REGNO in the set table.
2233 If PAT is non-NULL look for the entry that matches it, otherwise return
2234 the first entry for REGNO.
2235 The result is a pointer to the table entry, or NULL if not found. */
2237 static struct expr *
2238 lookup_set (regno, pat)
2242 unsigned int hash = hash_set (regno, set_hash_table_size);
2245 expr = set_hash_table[hash];
2249 while (expr && ! expr_equiv_p (expr->expr, pat))
2250 expr = expr->next_same_hash;
2254 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2255 expr = expr->next_same_hash;
2261 /* Return the next entry for REGNO in list EXPR. */
2263 static struct expr *
2264 next_set (regno, expr)
2269 expr = expr->next_same_hash;
2270 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2274 /* Reset tables used to keep track of what's still available [since the
2275 start of the block]. */
2278 reset_opr_set_tables ()
2280 /* Maintain a bitmap of which regs have been set since beginning of
2282 sbitmap_zero (reg_set_bitmap);
2283 /* Also keep a record of the last instruction to modify memory.
2284 For now this is very trivial, we only record whether any memory
2285 location has been modified. */
2289 /* Return non-zero if the operands of X are not set before INSN in
2290 INSN's basic block. */
2293 oprs_not_set_p (x, insn)
2300 /* repeat is used to turn tail-recursion into iteration. */
2306 code = GET_CODE (x);
2321 if (mem_last_set != 0)
2327 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2333 fmt = GET_RTX_FORMAT (code);
2334 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2339 /* If we are about to do the last recursive call
2340 needed at this level, change it into iteration.
2341 This function is called enough to be worth it. */
2347 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2351 else if (fmt[i] == 'E')
2354 for (j = 0; j < XVECLEN (x, i); j++)
2356 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2366 /* Mark things set by a CALL. */
2369 mark_call (pat, insn)
2370 rtx pat ATTRIBUTE_UNUSED, insn;
2372 mem_last_set = INSN_CUID (insn);
2375 /* Mark things set by a SET. */
2378 mark_set (pat, insn)
2381 rtx dest = SET_DEST (pat);
2383 while (GET_CODE (dest) == SUBREG
2384 || GET_CODE (dest) == ZERO_EXTRACT
2385 || GET_CODE (dest) == SIGN_EXTRACT
2386 || GET_CODE (dest) == STRICT_LOW_PART)
2387 dest = XEXP (dest, 0);
2389 if (GET_CODE (dest) == REG)
2390 SET_BIT (reg_set_bitmap, REGNO (dest));
2391 else if (GET_CODE (dest) == MEM)
2392 mem_last_set = INSN_CUID (insn);
2394 if (GET_CODE (SET_SRC (pat)) == CALL)
2395 mark_call (SET_SRC (pat), insn);
2398 /* Record things set by a CLOBBER. */
2401 mark_clobber (pat, insn)
2404 rtx clob = XEXP (pat, 0);
2406 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2407 clob = XEXP (clob, 0);
2409 if (GET_CODE (clob) == REG)
2410 SET_BIT (reg_set_bitmap, REGNO (clob));
2412 mem_last_set = INSN_CUID (insn);
2415 /* Record things set by INSN.
2416 This data is used by oprs_not_set_p. */
2419 mark_oprs_set (insn)
2422 rtx pat = PATTERN (insn);
2424 if (GET_CODE (pat) == SET)
2425 mark_set (pat, insn);
2426 else if (GET_CODE (pat) == PARALLEL)
2430 for (i = 0; i < XVECLEN (pat, 0); i++)
2432 rtx x = XVECEXP (pat, 0, i);
2434 if (GET_CODE (x) == SET)
2436 else if (GET_CODE (x) == CLOBBER)
2437 mark_clobber (x, insn);
2438 else if (GET_CODE (x) == CALL)
2439 mark_call (x, insn);
2442 else if (GET_CODE (pat) == CLOBBER)
2443 mark_clobber (pat, insn);
2444 else if (GET_CODE (pat) == CALL)
2445 mark_call (pat, insn);
2448 /* Classic GCSE reaching definition support. */
2450 /* Allocate reaching def variables. */
2453 alloc_rd_mem (n_blocks, n_insns)
2454 int n_blocks, n_insns;
2456 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2457 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2459 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2460 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2462 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2463 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2465 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2466 sbitmap_vector_zero (rd_out, n_basic_blocks);
2469 /* Free reaching def variables. */
2476 free (reaching_defs);
2480 /* Add INSN to the kills of BB.
2481 REGNO, set in BB, is killed by INSN. */
2484 handle_rd_kill_set (insn, regno, bb)
2488 struct reg_set *this_reg = reg_set_table[regno];
2492 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2493 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2494 this_reg = this_reg->next;
2499 dump_rd_table (file, title, bmap)
2506 fprintf (file, "%s\n", title);
2507 for (bb = 0; bb < n_basic_blocks; bb++)
2509 fprintf (file, "BB %d\n", bb);
2510 dump_sbitmap (file, bmap[bb]);
2511 for (i = n = cuid = 0; i < bmap[bb]->size; i++)
2513 for (j = 0; j < SBITMAP_ELT_BITS; j++, cuid++)
2515 if ((bmap[bb]->elms[i] & (1 << j)) != 0)
2518 fprintf (file, " ");
2519 fprintf (file, " %d", INSN_UID (CUID_INSN (cuid)));
2525 fprintf (file, "\n");
2527 fprintf (file, "\n");
2530 /* Compute the set of kill's for reaching definitions. */
2538 For each set bit in `gen' of the block (i.e each insn which
2539 generates a definition in the block)
2540 Call the reg set by the insn corresponding to that bit regx
2541 Look at the linked list starting at reg_set_table[regx]
2542 For each setting of regx in the linked list, which is not in
2544 Set the bit in `kill' corresponding to that insn
2547 for (bb = 0; bb < n_basic_blocks; bb++)
2549 for (cuid = 0; cuid < max_cuid; cuid++)
2551 if (TEST_BIT (rd_gen[bb], cuid))
2553 rtx insn = CUID_INSN (cuid);
2554 rtx pat = PATTERN (insn);
2556 if (GET_CODE (insn) == CALL_INSN)
2560 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2562 if ((call_used_regs[regno]
2563 && regno != STACK_POINTER_REGNUM
2564 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2565 && regno != HARD_FRAME_POINTER_REGNUM
2567 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2568 && ! (regno == ARG_POINTER_REGNUM
2569 && fixed_regs[regno])
2571 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2572 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2574 && regno != FRAME_POINTER_REGNUM)
2575 || global_regs[regno])
2576 handle_rd_kill_set (insn, regno, bb);
2580 if (GET_CODE (pat) == PARALLEL)
2584 /* We work backwards because ... */
2585 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2587 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2588 if ((code == SET || code == CLOBBER)
2589 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2590 handle_rd_kill_set (insn,
2591 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2595 else if (GET_CODE (pat) == SET)
2597 if (GET_CODE (SET_DEST (pat)) == REG)
2599 /* Each setting of this register outside of this block
2600 must be marked in the set of kills in this block. */
2601 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2604 /* FIXME: CLOBBER? */
2610 /* Compute the reaching definitions as in
2611 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2612 Chapter 10. It is the same algorithm as used for computing available
2613 expressions but applied to the gens and kills of reaching definitions. */
2618 int bb, changed, passes;
2620 for (bb = 0; bb < n_basic_blocks; bb++)
2621 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2628 for (bb = 0; bb < n_basic_blocks; bb++)
2630 sbitmap_union_of_predecessors (reaching_defs[bb], rd_out,
2632 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2633 reaching_defs[bb], rd_kill[bb]);
2639 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2642 /* Classic GCSE available expression support. */
2644 /* Allocate memory for available expression computation. */
2647 alloc_avail_expr_mem (n_blocks, n_exprs)
2648 int n_blocks, n_exprs;
2650 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2651 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2653 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2654 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2656 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2657 sbitmap_vector_zero (ae_in, n_basic_blocks);
2659 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2660 sbitmap_vector_zero (ae_out, n_basic_blocks);
2662 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2663 sbitmap_ones (u_bitmap);
2667 free_avail_expr_mem ()
2676 /* Compute the set of available expressions generated in each basic block. */
2683 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2684 This is all we have to do because an expression is not recorded if it
2685 is not available, and the only expressions we want to work with are the
2686 ones that are recorded. */
2688 for (i = 0; i < expr_hash_table_size; i++)
2690 struct expr *expr = expr_hash_table[i];
2691 while (expr != NULL)
2693 struct occr *occr = expr->avail_occr;
2694 while (occr != NULL)
2696 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2699 expr = expr->next_same_hash;
2704 /* Return non-zero if expression X is killed in BB. */
2707 expr_killed_p (x, bb)
2715 /* repeat is used to turn tail-recursion into iteration. */
2721 code = GET_CODE (x);
2725 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2728 if (mem_set_in_block[bb])
2748 i = GET_RTX_LENGTH (code) - 1;
2749 fmt = GET_RTX_FORMAT (code);
2754 rtx tem = XEXP (x, i);
2756 /* If we are about to do the last recursive call
2757 needed at this level, change it into iteration.
2758 This function is called enough to be worth it. */
2764 if (expr_killed_p (tem, bb))
2767 else if (fmt[i] == 'E')
2770 for (j = 0; j < XVECLEN (x, i); j++)
2772 if (expr_killed_p (XVECEXP (x, i, j), bb))
2781 /* Compute the set of available expressions killed in each basic block. */
2788 for (bb = 0; bb < n_basic_blocks; bb++)
2790 for (i = 0; i < expr_hash_table_size; i++)
2792 struct expr *expr = expr_hash_table[i];
2794 for ( ; expr != NULL; expr = expr->next_same_hash)
2796 /* Skip EXPR if generated in this block. */
2797 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2800 if (expr_killed_p (expr->expr, bb))
2801 SET_BIT (ae_kill[bb], expr->bitmap_index);
2807 /* Compute available expressions.
2809 Implement the algorithm to find available expressions
2810 as given in the Aho Sethi Ullman book, pages 627-631. */
2813 compute_available ()
2815 int bb, changed, passes;
2817 sbitmap_zero (ae_in[0]);
2819 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2821 for (bb = 1; bb < n_basic_blocks; bb++)
2822 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2829 for (bb = 1; bb < n_basic_blocks; bb++)
2831 sbitmap_intersect_of_predecessors (ae_in[bb], ae_out,
2833 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2834 ae_in[bb], ae_kill[bb]);
2840 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2843 /* Actually perform the Classic GCSE optimizations. */
2845 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2847 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2848 as a positive reach. We want to do this when there are two computations
2849 of the expression in the block.
2851 VISITED is a pointer to a working buffer for tracking which BB's have
2852 been visited. It is NULL for the top-level call.
2854 We treat reaching expressions that go through blocks containing the same
2855 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2856 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2857 2 as not reaching. The intent is to improve the probability of finding
2858 only one reaching expression and to reduce register lifetimes by picking
2859 the closest such expression. */
2862 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2866 int check_self_loop;
2871 if (visited == NULL)
2873 visited = (char *) alloca (n_basic_blocks);
2874 bzero (visited, n_basic_blocks);
2877 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
2879 int pred_bb = INT_LIST_VAL (pred);
2881 if (visited[pred_bb])
2883 /* This predecessor has already been visited.
2887 else if (pred_bb == bb)
2889 /* BB loops on itself. */
2891 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2892 && BLOCK_NUM (occr->insn) == pred_bb)
2894 visited[pred_bb] = 1;
2896 /* Ignore this predecessor if it kills the expression. */
2897 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2898 visited[pred_bb] = 1;
2899 /* Does this predecessor generate this expression? */
2900 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2902 /* Is this the occurrence we're looking for?
2903 Note that there's only one generating occurrence per block
2904 so we just need to check the block number. */
2905 if (BLOCK_NUM (occr->insn) == pred_bb)
2907 visited[pred_bb] = 1;
2909 /* Neither gen nor kill. */
2912 visited[pred_bb] = 1;
2913 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2918 /* All paths have been checked. */
2922 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2923 If there is more than one such instruction, return NULL.
2925 Called only by handle_avail_expr. */
2928 computing_insn (expr, insn)
2932 int bb = BLOCK_NUM (insn);
2934 if (expr->avail_occr->next == NULL)
2936 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2938 /* The available expression is actually itself
2939 (i.e. a loop in the flow graph) so do nothing. */
2942 /* (FIXME) Case that we found a pattern that was created by
2943 a substitution that took place. */
2944 return expr->avail_occr->insn;
2948 /* Pattern is computed more than once.
2949 Search backwards from this insn to see how many of these
2950 computations actually reach this insn. */
2952 rtx insn_computes_expr = NULL;
2955 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2957 if (BLOCK_NUM (occr->insn) == bb)
2959 /* The expression is generated in this block.
2960 The only time we care about this is when the expression
2961 is generated later in the block [and thus there's a loop].
2962 We let the normal cse pass handle the other cases. */
2963 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
2965 if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
2970 insn_computes_expr = occr->insn;
2974 else /* Computation of the pattern outside this block. */
2976 if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
2981 insn_computes_expr = occr->insn;
2986 if (insn_computes_expr == NULL)
2988 return insn_computes_expr;
2992 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2993 Only called by can_disregard_other_sets. */
2996 def_reaches_here_p (insn, def_insn)
3001 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3004 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3006 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3008 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3010 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3011 reg = XEXP (PATTERN (def_insn), 0);
3012 else if (GET_CODE (PATTERN (def_insn)) == SET)
3013 reg = SET_DEST (PATTERN (def_insn));
3016 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3025 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3026 The value returned is the number of definitions that reach INSN.
3027 Returning a value of zero means that [maybe] more than one definition
3028 reaches INSN and the caller can't perform whatever optimization it is
3029 trying. i.e. it is always safe to return zero. */
3032 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3033 struct reg_set **addr_this_reg;
3037 int number_of_reaching_defs = 0;
3038 struct reg_set *this_reg = *addr_this_reg;
3042 if (def_reaches_here_p (insn, this_reg->insn))
3044 number_of_reaching_defs++;
3045 /* Ignore parallels for now. */
3046 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3049 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3050 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3051 SET_SRC (PATTERN (insn)))))
3053 /* A setting of the reg to a different value reaches INSN. */
3056 if (number_of_reaching_defs > 1)
3058 /* If in this setting the value the register is being
3059 set to is equal to the previous value the register
3060 was set to and this setting reaches the insn we are
3061 trying to do the substitution on then we are ok. */
3063 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3065 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3066 SET_SRC (PATTERN (insn))))
3069 *addr_this_reg = this_reg;
3072 /* prev_this_reg = this_reg; */
3073 this_reg = this_reg->next;
3076 return number_of_reaching_defs;
3079 /* Expression computed by insn is available and the substitution is legal,
3080 so try to perform the substitution.
3082 The result is non-zero if any changes were made. */
3085 handle_avail_expr (insn, expr)
3089 rtx pat, insn_computes_expr;
3091 struct reg_set *this_reg;
3092 int found_setting, use_src;
3095 /* We only handle the case where one computation of the expression
3096 reaches this instruction. */
3097 insn_computes_expr = computing_insn (expr, insn);
3098 if (insn_computes_expr == NULL)
3104 /* At this point we know only one computation of EXPR outside of this
3105 block reaches this insn. Now try to find a register that the
3106 expression is computed into. */
3108 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3110 /* This is the case when the available expression that reaches
3111 here has already been handled as an available expression. */
3112 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3113 /* If the register was created by GCSE we can't use `reg_set_table',
3114 however we know it's set only once. */
3115 if (regnum_for_replacing >= max_gcse_regno
3116 /* If the register the expression is computed into is set only once,
3117 or only one set reaches this insn, we can use it. */
3118 || (((this_reg = reg_set_table[regnum_for_replacing]),
3119 this_reg->next == NULL)
3120 || can_disregard_other_sets (&this_reg, insn, 0)))
3129 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3130 /* This shouldn't happen. */
3131 if (regnum_for_replacing >= max_gcse_regno)
3133 this_reg = reg_set_table[regnum_for_replacing];
3134 /* If the register the expression is computed into is set only once,
3135 or only one set reaches this insn, use it. */
3136 if (this_reg->next == NULL
3137 || can_disregard_other_sets (&this_reg, insn, 0))
3143 pat = PATTERN (insn);
3145 to = SET_SRC (PATTERN (insn_computes_expr));
3147 to = SET_DEST (PATTERN (insn_computes_expr));
3148 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3150 /* We should be able to ignore the return code from validate_change but
3151 to play it safe we check. */
3155 if (gcse_file != NULL)
3157 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3158 INSN_UID (insn), REGNO (to),
3159 use_src ? "from" : "set in",
3160 INSN_UID (insn_computes_expr));
3165 /* The register that the expr is computed into is set more than once. */
3166 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3168 /* Insert an insn after insnx that copies the reg set in insnx
3169 into a new pseudo register call this new register REGN.
3170 From insnb until end of basic block or until REGB is set
3171 replace all uses of REGB with REGN. */
3174 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3176 /* Generate the new insn. */
3177 /* ??? If the change fails, we return 0, even though we created
3178 an insn. I think this is ok. */
3180 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3181 SET_DEST (PATTERN (insn_computes_expr))),
3182 insn_computes_expr);
3183 /* Keep block number table up to date. */
3184 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3185 /* Keep register set table up to date. */
3186 record_one_set (REGNO (to), new_insn);
3188 gcse_create_count++;
3189 if (gcse_file != NULL)
3191 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3192 INSN_UID (NEXT_INSN (insn_computes_expr)),
3193 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3194 INSN_UID (insn_computes_expr));
3195 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
3198 pat = PATTERN (insn);
3200 /* Do register replacement for INSN. */
3201 changed = validate_change (insn, &SET_SRC (pat),
3202 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3205 /* We should be able to ignore the return code from validate_change but
3206 to play it safe we check. */
3210 if (gcse_file != NULL)
3212 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3214 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3215 INSN_UID (insn_computes_expr));
3224 /* Perform classic GCSE.
3225 This is called by one_classic_gcse_pass after all the dataflow analysis
3228 The result is non-zero if a change was made. */
3236 /* Note we start at block 1. */
3239 for (bb = 1; bb < n_basic_blocks; bb++)
3241 /* Reset tables used to keep track of what's still valid [since the
3242 start of the block]. */
3243 reset_opr_set_tables ();
3245 for (insn = basic_block_head[bb];
3246 insn != NULL && insn != NEXT_INSN (basic_block_end[bb]);
3247 insn = NEXT_INSN (insn))
3249 /* Is insn of form (set (pseudo-reg) ...)? */
3251 if (GET_CODE (insn) == INSN
3252 && GET_CODE (PATTERN (insn)) == SET
3253 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3254 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3256 rtx pat = PATTERN (insn);
3257 rtx src = SET_SRC (pat);
3260 if (want_to_gcse_p (src)
3261 /* Is the expression recorded? */
3262 && ((expr = lookup_expr (src)) != NULL)
3263 /* Is the expression available [at the start of the
3265 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3266 /* Are the operands unchanged since the start of the
3268 && oprs_not_set_p (src, insn))
3269 changed |= handle_avail_expr (insn, expr);
3272 /* Keep track of everything modified by this insn. */
3273 /* ??? Need to be careful w.r.t. mods done to INSN. */
3274 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3275 mark_oprs_set (insn);
3282 /* Top level routine to perform one classic GCSE pass.
3284 Return non-zero if a change was made. */
3287 one_classic_gcse_pass (f, pass)
3293 gcse_subst_count = 0;
3294 gcse_create_count = 0;
3296 alloc_expr_hash_table (max_cuid);
3297 alloc_rd_mem (n_basic_blocks, max_cuid);
3298 compute_expr_hash_table (f);
3300 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3301 expr_hash_table_size, n_exprs);
3306 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3309 compute_available ();
3310 changed = classic_gcse ();
3311 free_avail_expr_mem ();
3314 free_expr_hash_table ();
3318 fprintf (gcse_file, "\n");
3319 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3320 current_function_name, pass,
3321 bytes_used, gcse_subst_count, gcse_create_count);
3327 /* Compute copy/constant propagation working variables. */
3329 /* Local properties of assignments. */
3331 static sbitmap *cprop_pavloc;
3332 static sbitmap *cprop_absaltered;
3334 /* Global properties of assignments (computed from the local properties). */
3336 static sbitmap *cprop_avin;
3337 static sbitmap *cprop_avout;
3339 /* Allocate vars used for copy/const propagation.
3340 N_BLOCKS is the number of basic blocks.
3341 N_SETS is the number of sets. */
3344 alloc_cprop_mem (n_blocks, n_sets)
3345 int n_blocks, n_sets;
3347 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3348 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3350 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3351 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3354 /* Free vars used by copy/const propagation. */
3359 free (cprop_pavloc);
3360 free (cprop_absaltered);
3365 /* Dump copy/const propagation data. */
3368 dump_cprop_data (file)
3371 dump_sbitmap_vector (file, "CPROP partially locally available sets", "BB",
3372 cprop_pavloc, n_basic_blocks);
3373 dump_sbitmap_vector (file, "CPROP absolutely altered sets", "BB",
3374 cprop_absaltered, n_basic_blocks);
3376 dump_sbitmap_vector (file, "CPROP available incoming sets", "BB",
3377 cprop_avin, n_basic_blocks);
3378 dump_sbitmap_vector (file, "CPROP available outgoing sets", "BB",
3379 cprop_avout, n_basic_blocks);
3382 /* For each block, compute whether X is transparent.
3383 X is either an expression or an assignment [though we don't care which,
3384 for this context an assignment is treated as an expression].
3385 For each block where an element of X is modified, set (SET_P == 1) or reset
3386 (SET_P == 0) the INDX bit in BMAP. */
3389 compute_transp (x, indx, bmap, set_p)
3399 /* repeat is used to turn tail-recursion into iteration. */
3405 code = GET_CODE (x);
3411 int regno = REGNO (x);
3415 if (regno < FIRST_PSEUDO_REGISTER)
3417 for (bb = 0; bb < n_basic_blocks; bb++)
3418 if (TEST_BIT (reg_set_in_block[bb], regno))
3419 SET_BIT (bmap[bb], indx);
3423 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3425 bb = BLOCK_NUM (r->insn);
3426 SET_BIT (bmap[bb], indx);
3432 if (regno < FIRST_PSEUDO_REGISTER)
3434 for (bb = 0; bb < n_basic_blocks; bb++)
3435 if (TEST_BIT (reg_set_in_block[bb], regno))
3436 RESET_BIT (bmap[bb], indx);
3440 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3442 bb = BLOCK_NUM (r->insn);
3443 RESET_BIT (bmap[bb], indx);
3453 for (bb = 0; bb < n_basic_blocks; bb++)
3454 if (mem_set_in_block[bb])
3455 SET_BIT (bmap[bb], indx);
3459 for (bb = 0; bb < n_basic_blocks; bb++)
3460 if (mem_set_in_block[bb])
3461 RESET_BIT (bmap[bb], indx);
3481 i = GET_RTX_LENGTH (code) - 1;
3482 fmt = GET_RTX_FORMAT (code);
3487 rtx tem = XEXP (x, i);
3489 /* If we are about to do the last recursive call
3490 needed at this level, change it into iteration.
3491 This function is called enough to be worth it. */
3497 compute_transp (tem, indx, bmap, set_p);
3499 else if (fmt[i] == 'E')
3502 for (j = 0; j < XVECLEN (x, i); j++)
3503 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3509 compute_cprop_local_properties ()
3513 sbitmap_vector_zero (cprop_absaltered, n_basic_blocks);
3514 sbitmap_vector_zero (cprop_pavloc, n_basic_blocks);
3516 for (i = 0; i < set_hash_table_size; i++)
3520 for (expr = set_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
3523 int indx = expr->bitmap_index;
3525 /* The assignment is absolutely altered if any operand is modified
3526 by this block [excluding the assignment itself].
3527 We start by assuming all are transparent [none are killed], and
3528 then setting the bits for those that are. */
3530 compute_transp (expr->expr, indx, cprop_absaltered, 1);
3532 /* The occurrences recorded in avail_occr are exactly those that
3533 we want to set to non-zero in PAVLOC. */
3535 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3537 int bb = BLOCK_NUM (occr->insn);
3538 SET_BIT (cprop_pavloc[bb], indx);
3545 compute_cprop_avinout ()
3547 int bb, changed, passes;
3549 sbitmap_zero (cprop_avin[0]);
3550 sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3557 for (bb = 0; bb < n_basic_blocks; bb++)
3560 sbitmap_intersect_of_predecessors (cprop_avin[bb], cprop_avout,
3562 changed |= sbitmap_union_of_diff (cprop_avout[bb],
3565 cprop_absaltered[bb]);
3571 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3574 /* Top level routine to do the dataflow analysis needed by copy/const
3578 compute_cprop_data ()
3580 compute_cprop_local_properties ();
3581 compute_cprop_avinout ();
3584 /* Copy/constant propagation. */
3590 /* Maximum number of register uses in an insn that we handle. */
3593 /* Table of uses found in an insn.
3594 Allocated statically to avoid alloc/free complexity and overhead. */
3595 static struct reg_use reg_use_table[MAX_USES];
3597 /* Index into `reg_use_table' while building it. */
3598 static int reg_use_count;
3600 /* Set up a list of register numbers used in INSN.
3601 The found uses are stored in `reg_use_table'.
3602 `reg_use_count' is initialized to zero before entry, and
3603 contains the number of uses in the table upon exit.
3605 ??? If a register appears multiple times we will record it multiple
3606 times. This doesn't hurt anything but it will slow things down. */
3616 /* repeat is used to turn tail-recursion into iteration. */
3622 code = GET_CODE (x);
3626 if (reg_use_count == MAX_USES)
3628 reg_use_table[reg_use_count].reg_rtx = x;
3646 case ASM_INPUT: /*FIXME*/
3650 if (GET_CODE (SET_DEST (x)) == MEM)
3651 find_used_regs (SET_DEST (x));
3659 /* Recursively scan the operands of this expression. */
3661 fmt = GET_RTX_FORMAT (code);
3662 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3666 /* If we are about to do the last recursive call
3667 needed at this level, change it into iteration.
3668 This function is called enough to be worth it. */
3674 find_used_regs (XEXP (x, i));
3676 else if (fmt[i] == 'E')
3679 for (j = 0; j < XVECLEN (x, i); j++)
3680 find_used_regs (XVECEXP (x, i, j));
3685 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3686 Returns non-zero is successful. */
3689 try_replace_reg (from, to, insn)
3692 return validate_replace_src (from, to, insn);
3695 /* Find a set of REGNO that is available on entry to INSN's block.
3696 Returns NULL if not found. */
3698 static struct expr *
3699 find_avail_set (regno, insn)
3703 struct expr *set = lookup_set (regno, NULL_RTX);
3707 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3709 set = next_set (regno, set);
3715 /* Perform constant and copy propagation on INSN.
3716 The result is non-zero if a change was made. */
3722 struct reg_use *reg_used;
3725 /* ??? For now only propagate into SETs. */
3726 if (GET_CODE (insn) != INSN
3727 || GET_CODE (PATTERN (insn)) != SET)
3731 find_used_regs (PATTERN (insn));
3733 reg_used = ®_use_table[0];
3734 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3738 int regno = REGNO (reg_used->reg_rtx);
3740 /* Ignore registers created by GCSE.
3741 We do this because ... */
3742 if (regno >= max_gcse_regno)
3745 /* If the register has already been set in this block, there's
3746 nothing we can do. */
3747 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3750 /* Find an assignment that sets reg_used and is available
3751 at the start of the block. */
3752 set = find_avail_set (regno, insn);
3757 /* ??? We might be able to handle PARALLELs. Later. */
3758 if (GET_CODE (pat) != SET)
3760 src = SET_SRC (pat);
3762 if (GET_CODE (src) == CONST_INT)
3764 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3768 if (gcse_file != NULL)
3770 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3771 regno, INSN_UID (insn));
3772 fprintf (gcse_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (src));
3773 fprintf (gcse_file, "\n");
3776 /* The original insn setting reg_used may or may not now be
3777 deletable. We leave the deletion to flow. */
3780 else if (GET_CODE (src) == REG
3781 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3782 && REGNO (src) != regno)
3784 /* We know the set is available.
3785 Now check that SET_SRC is ANTLOC (i.e. none of the source operands
3786 have changed since the start of the block). */
3787 if (oprs_not_set_p (src, insn))
3789 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3793 if (gcse_file != NULL)
3795 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3796 regno, INSN_UID (insn), REGNO (src));
3799 /* The original insn setting reg_used may or may not now be
3800 deletable. We leave the deletion to flow. */
3801 /* FIXME: If it turns out that the insn isn't deletable,
3802 then we may have unnecessarily extended register lifetimes
3803 and made things worse. */
3812 /* Forward propagate copies.
3813 This includes copies and constants.
3814 Return non-zero if a change was made. */
3822 /* Note we start at block 1. */
3825 for (bb = 1; bb < n_basic_blocks; bb++)
3827 /* Reset tables used to keep track of what's still valid [since the
3828 start of the block]. */
3829 reset_opr_set_tables ();
3831 for (insn = basic_block_head[bb];
3832 insn != NULL && insn != NEXT_INSN (basic_block_end[bb]);
3833 insn = NEXT_INSN (insn))
3835 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3837 changed |= cprop_insn (insn);
3839 /* Keep track of everything modified by this insn. */
3840 /* ??? Need to be careful w.r.t. mods done to INSN. */
3841 mark_oprs_set (insn);
3846 if (gcse_file != NULL)
3847 fprintf (gcse_file, "\n");
3852 /* Perform one copy/constant propagation pass.
3853 F is the first insn in the function.
3854 PASS is the pass count. */
3857 one_cprop_pass (f, pass)
3863 const_prop_count = 0;
3864 copy_prop_count = 0;
3866 alloc_set_hash_table (max_cuid);
3867 compute_set_hash_table (f);
3869 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3873 alloc_cprop_mem (n_basic_blocks, n_sets);
3874 compute_cprop_data ();
3878 free_set_hash_table ();
3882 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
3883 current_function_name, pass,
3884 bytes_used, const_prop_count, copy_prop_count);
3885 fprintf (gcse_file, "\n");
3891 /* Compute PRE working variables. */
3893 /* Local properties of expressions. */
3894 /* Nonzero for expressions that are transparent in the block. */
3895 static sbitmap *pre_transp;
3896 /* Nonzero for expressions that are computed (available) in the block. */
3897 static sbitmap *pre_comp;
3898 /* Nonzero for expressions that are locally anticipatable in the block. */
3899 static sbitmap *pre_antloc;
3901 /* Global properties (computed from the expression local properties). */
3902 /* Nonzero for expressions that are available on block entry/exit. */
3903 static sbitmap *pre_avin;
3904 static sbitmap *pre_avout;
3905 /* Nonzero for expressions that are anticipatable on block entry/exit. */
3906 static sbitmap *pre_antin;
3907 static sbitmap *pre_antout;
3908 /* Nonzero for expressions that are partially available on block entry/exit. */
3909 static sbitmap *pre_pavin;
3910 static sbitmap *pre_pavout;
3911 /* Nonzero for expressions that are "placement possible" on block entry/exit. */
3912 static sbitmap *pre_ppin;
3913 static sbitmap *pre_ppout;
3915 /* Nonzero for expressions that are transparent at the end of the block.
3916 This is only zero for expressions killed by abnormal critical edge
3917 created by a calls. */
3918 static sbitmap *pre_transpout;
3920 /* Used while performing PRE to denote which insns are redundant. */
3921 static sbitmap pre_redundant;
3923 /* Allocate vars used for PRE analysis. */
3926 alloc_pre_mem (n_blocks, n_exprs)
3927 int n_blocks, n_exprs;
3929 pre_transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3930 pre_comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3931 pre_antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3933 pre_avin = sbitmap_vector_alloc (n_blocks, n_exprs);
3934 pre_avout = sbitmap_vector_alloc (n_blocks, n_exprs);
3935 pre_antin = sbitmap_vector_alloc (n_blocks, n_exprs);
3936 pre_antout = sbitmap_vector_alloc (n_blocks, n_exprs);
3938 pre_pavin = sbitmap_vector_alloc (n_blocks, n_exprs);
3939 pre_pavout = sbitmap_vector_alloc (n_blocks, n_exprs);
3940 pre_ppin = sbitmap_vector_alloc (n_blocks, n_exprs);
3941 pre_ppout = sbitmap_vector_alloc (n_blocks, n_exprs);
3943 pre_transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
3946 /* Free vars used for PRE analysis. */
3963 free (pre_transpout);
3966 /* Dump PRE data. */
3969 dump_pre_data (file)
3972 dump_sbitmap_vector (file, "PRE locally transparent expressions", "BB",
3973 pre_transp, n_basic_blocks);
3974 dump_sbitmap_vector (file, "PRE locally available expressions", "BB",
3975 pre_comp, n_basic_blocks);
3976 dump_sbitmap_vector (file, "PRE locally anticipatable expressions", "BB",
3977 pre_antloc, n_basic_blocks);
3979 dump_sbitmap_vector (file, "PRE available incoming expressions", "BB",
3980 pre_avin, n_basic_blocks);
3981 dump_sbitmap_vector (file, "PRE available outgoing expressions", "BB",
3982 pre_avout, n_basic_blocks);
3983 dump_sbitmap_vector (file, "PRE anticipatable incoming expressions", "BB",
3984 pre_antin, n_basic_blocks);
3985 dump_sbitmap_vector (file, "PRE anticipatable outgoing expressions", "BB",
3986 pre_antout, n_basic_blocks);
3988 dump_sbitmap_vector (file, "PRE partially available incoming expressions", "BB",
3989 pre_pavin, n_basic_blocks);
3990 dump_sbitmap_vector (file, "PRE partially available outgoing expressions", "BB",
3991 pre_pavout, n_basic_blocks);
3992 dump_sbitmap_vector (file, "PRE placement possible on incoming", "BB",
3993 pre_ppin, n_basic_blocks);
3994 dump_sbitmap_vector (file, "PRE placement possible on outgoing", "BB",
3995 pre_ppout, n_basic_blocks);
3997 dump_sbitmap_vector (file, "PRE transparent on outgoing", "BB",
3998 pre_transpout, n_basic_blocks);
4001 /* Compute the local properties of each recorded expression.
4002 Local properties are those that are defined by the block, irrespective
4005 An expression is transparent in a block if its operands are not modified
4008 An expression is computed (locally available) in a block if it is computed
4009 at least once and expression would contain the same value if the
4010 computation was moved to the end of the block.
4012 An expression is locally anticipatable in a block if it is computed at
4013 least once and expression would contain the same value if the computation
4014 was moved to the beginning of the block. */
4017 compute_pre_local_properties ()
4021 sbitmap_vector_ones (pre_transp, n_basic_blocks);
4022 sbitmap_vector_zero (pre_comp, n_basic_blocks);
4023 sbitmap_vector_zero (pre_antloc, n_basic_blocks);
4025 for (i = 0; i < expr_hash_table_size; i++)
4029 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4032 int indx = expr->bitmap_index;
4034 /* The expression is transparent in this block if it is not killed.
4035 We start by assuming all are transparent [none are killed], and then
4036 reset the bits for those that are. */
4038 compute_transp (expr->expr, indx, pre_transp, 0);
4040 /* The occurrences recorded in antic_occr are exactly those that
4041 we want to set to non-zero in ANTLOC. */
4043 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4045 int bb = BLOCK_NUM (occr->insn);
4046 SET_BIT (pre_antloc[bb], indx);
4048 /* While we're scanning the table, this is a good place to
4050 occr->deleted_p = 0;
4053 /* The occurrences recorded in avail_occr are exactly those that
4054 we want to set to non-zero in COMP. */
4056 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
4058 int bb = BLOCK_NUM (occr->insn);
4059 SET_BIT (pre_comp[bb], indx);
4061 /* While we're scanning the table, this is a good place to
4066 /* While we're scanning the table, this is a good place to
4068 expr->reaching_reg = 0;
4073 /* Compute expression availability at entrance and exit of each block. */
4076 compute_pre_avinout ()
4078 int bb, changed, passes;
4080 sbitmap_zero (pre_avin[0]);
4081 sbitmap_vector_ones (pre_avout, n_basic_blocks);
4088 for (bb = 0; bb < n_basic_blocks; bb++)
4091 sbitmap_intersect_of_predecessors (pre_avin[bb], pre_avout,
4093 changed |= sbitmap_a_or_b_and_c (pre_avout[bb], pre_comp[bb],
4094 pre_transp[bb], pre_avin[bb]);
4100 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
4103 /* Compute expression anticipatability at entrance and exit of each block. */
4106 compute_pre_antinout ()
4108 int bb, changed, passes;
4110 sbitmap_zero (pre_antout[n_basic_blocks - 1]);
4111 sbitmap_vector_ones (pre_antin, n_basic_blocks);
4118 /* We scan the blocks in the reverse order to speed up
4120 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
4122 if (bb != n_basic_blocks - 1)
4123 sbitmap_intersect_of_successors (pre_antout[bb], pre_antin,
4125 changed |= sbitmap_a_or_b_and_c (pre_antin[bb], pre_antloc[bb],
4126 pre_transp[bb], pre_antout[bb]);
4132 fprintf (gcse_file, "antic expr computation: %d passes\n", passes);
4135 /* Compute expression partial availability at entrance and exit of
4139 compute_pre_pavinout ()
4141 int bb, changed, passes;
4143 sbitmap_zero (pre_pavin[0]);
4144 sbitmap_vector_zero (pre_pavout, n_basic_blocks);
4151 for (bb = 0; bb < n_basic_blocks; bb++)
4154 sbitmap_union_of_predecessors (pre_pavin[bb], pre_pavout,
4156 changed |= sbitmap_a_or_b_and_c (pre_pavout[bb], pre_comp[bb],
4157 pre_transp[bb], pre_pavin[bb]);
4163 fprintf (gcse_file, "partially avail expr computation: %d passes\n", passes);
4166 /* Compute transparent outgoing information for each block.
4168 An expression is transparent to an edge unless it is killed by
4169 the edge itself. This can only happen with abnormal control flow,
4170 when the edge is traversed through a call. This happens with
4171 non-local labels and exceptions.
4173 This would not be necessary if we split the edge. While this is
4174 normally impossible for abnormal critical edges, with some effort
4175 it should be possible with exception handling, since we still have
4176 control over which handler should be invoked. But due to increased
4177 EH table sizes, this may not be worthwhile. */
4180 compute_pre_transpout ()
4184 sbitmap_vector_ones (pre_transpout, n_basic_blocks);
4186 for (bb = 0; bb < n_basic_blocks; ++bb)
4190 /* Note that flow inserted a nop a the end of basic blocks that
4191 end in call instructions for reasons other than abnormal
4193 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4196 for (i = 0; i < expr_hash_table_size; i++)
4199 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4200 if (GET_CODE (expr->expr) == MEM)
4202 rtx addr = XEXP (expr->expr, 0);
4204 if (GET_CODE (addr) == SYMBOL_REF
4205 && CONSTANT_POOL_ADDRESS_P (addr))
4208 /* ??? Optimally, we would use interprocedural alias
4209 analysis to determine if this mem is actually killed
4211 RESET_BIT (pre_transpout[bb], expr->bitmap_index);
4217 /* Compute "placement possible" information on entrance and exit of
4220 From Fred Chow's Thesis:
4221 A computation `e' is PP at a point `p' if it is anticipated at `p' and
4222 all the anticipated e's can be rendered redundant by zero or more
4223 insertions at that point and some other points in the procedure, and
4224 these insertions satisfy the conditions that the insertions are always
4225 at points that `e' is anticipated and the first anticipated e's after the
4226 insertions are rendered redundant. */
4229 compute_pre_ppinout ()
4231 int bb, i, changed, size, passes;
4233 sbitmap_vector_ones (pre_ppin, n_basic_blocks);
4234 /* ??? Inefficient as we set pre_ppin[0] twice, but simple. */
4235 sbitmap_zero (pre_ppin[0]);
4237 sbitmap_vector_ones (pre_ppout, n_basic_blocks);
4238 /* ??? Inefficient as we set pre_ppout[n_basic_blocks-1] twice, but simple. */
4239 sbitmap_zero (pre_ppout[n_basic_blocks - 1]);
4241 size = pre_ppin[0]->size;
4247 for (bb = 1; bb < n_basic_blocks; bb++)
4249 sbitmap_ptr antin = pre_antin[bb]->elms;
4250 sbitmap_ptr pavin = pre_pavin[bb]->elms;
4251 sbitmap_ptr antloc = pre_antloc[bb]->elms;
4252 sbitmap_ptr transp = pre_transp[bb]->elms;
4253 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4254 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4256 for (i = 0; i < size; i++)
4259 SBITMAP_ELT_TYPE tmp = *antin & *pavin & (*antloc | (*transp & *ppout));
4260 SBITMAP_ELT_TYPE pred_val = -1L;
4262 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4264 int pred_bb = INT_LIST_VAL (pred);
4265 sbitmap_ptr ppout_j,avout_j;
4267 if (pred_bb == ENTRY_BLOCK)
4270 /* If this is a back edge, propagate info along the back
4271 edge to allow for loop invariant code motion.
4273 See FOLLOW_BACK_EDGES at the top of this file for a longer
4274 discussion about loop invariant code motion in pre. */
4275 if (! FOLLOW_BACK_EDGES
4276 && (INSN_CUID (BLOCK_HEAD (bb))
4277 < INSN_CUID (BLOCK_END (pred_bb))))
4283 ppout_j = pre_ppout[pred_bb]->elms + i;
4284 avout_j = pre_avout[pred_bb]->elms + i;
4285 pred_val &= *ppout_j | *avout_j;
4290 antin++; pavin++; antloc++; transp++; ppout++; ppin++;
4294 for (bb = 0; bb < n_basic_blocks - 1; bb++)
4296 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4297 sbitmap_ptr transpout = pre_transpout[bb]->elms;
4299 for (i = 0; i < size; i++)
4302 SBITMAP_ELT_TYPE tmp = *transpout;
4304 for (succ = s_succs[bb]; succ != NULL; succ = succ->next)
4306 int succ_bb = INT_LIST_VAL (succ);
4309 if (succ_bb == EXIT_BLOCK)
4312 ppin = pre_ppin[succ_bb]->elms + i;
4322 ppout++; transpout++;
4330 fprintf (gcse_file, "placement possible computation: %d passes\n", passes);
4333 /* Top level routine to do the dataflow analysis needed by PRE. */
4338 compute_pre_local_properties ();
4339 compute_pre_avinout ();
4340 compute_pre_antinout ();
4341 compute_pre_pavinout ();
4342 compute_pre_transpout ();
4343 compute_pre_ppinout ();
4345 fprintf (gcse_file, "\n");
4350 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
4352 VISITED is a pointer to a working buffer for tracking which BB's have
4353 been visited. It is NULL for the top-level call.
4355 We treat reaching expressions that go through blocks containing the same
4356 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4357 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4358 2 as not reaching. The intent is to improve the probability of finding
4359 only one reaching expression and to reduce register lifetimes by picking
4360 the closest such expression. */
4363 pre_expr_reaches_here_p (occr, expr, bb, visited)
4371 if (visited == NULL)
4373 visited = (char *) alloca (n_basic_blocks);
4374 bzero (visited, n_basic_blocks);
4377 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4379 int pred_bb = INT_LIST_VAL (pred);
4381 if (pred_bb == ENTRY_BLOCK
4382 /* Has predecessor has already been visited? */
4383 || visited[pred_bb])
4385 /* Nothing to do. */
4387 /* Does this predecessor generate this expression? */
4388 else if (TEST_BIT (pre_comp[pred_bb], expr->bitmap_index))
4390 /* Is this the occurrence we're looking for?
4391 Note that there's only one generating occurrence per block
4392 so we just need to check the block number. */
4393 if (BLOCK_NUM (occr->insn) == pred_bb)
4395 visited[pred_bb] = 1;
4397 /* Ignore this predecessor if it kills the expression. */
4398 else if (! TEST_BIT (pre_transp[pred_bb], expr->bitmap_index))
4399 visited[pred_bb] = 1;
4400 /* Neither gen nor kill. */
4403 visited[pred_bb] = 1;
4404 if (pre_expr_reaches_here_p (occr, expr, pred_bb, visited))
4409 /* All paths have been checked. */
4413 /* Add EXPR to the end of basic block BB. */
4416 pre_insert_insn (expr, bb)
4420 rtx insn = BLOCK_END (bb);
4422 rtx reg = expr->reaching_reg;
4423 int regno = REGNO (reg);
4426 pat = gen_rtx_SET (VOIDmode, reg, copy_rtx (expr->expr));
4428 /* If the last insn is a jump, insert EXPR in front [taking care to
4429 handle cc0, etc. properly]. */
4431 if (GET_CODE (insn) == JUMP_INSN)
4437 /* If this is a jump table, then we can't insert stuff here. Since
4438 we know the previous real insn must be the tablejump, we insert
4439 the new instruction just before the tablejump. */
4440 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4441 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4442 insn = prev_real_insn (insn);
4445 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4446 if cc0 isn't set. */
4447 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4449 insn = XEXP (note, 0);
4452 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4453 if (maybe_cc0_setter
4454 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4455 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4456 insn = maybe_cc0_setter;
4459 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4460 new_insn = emit_insn_before (pat, insn);
4461 add_label_notes (SET_SRC (pat), new_insn);
4462 if (BLOCK_HEAD (bb) == insn)
4463 BLOCK_HEAD (bb) = new_insn;
4465 /* Likewise if the last insn is a call, as will happen in the presence
4466 of exception handling. */
4467 else if (GET_CODE (insn) == CALL_INSN)
4469 HARD_REG_SET parm_regs;
4473 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4474 we search backward and place the instructions before the first
4475 parameter is loaded. Do this for everyone for consistency and a
4476 presumtion that we'll get better code elsewhere as well. */
4478 /* It should always be the case that we can put these instructions
4479 anywhere in the basic block. Check this. */
4480 /* ??? Well, it would be the case if we'd split all critical edges.
4481 Since we didn't, we may very well abort. */
4482 if (!TEST_BIT (pre_antloc[bb], expr->bitmap_index)
4483 && !TEST_BIT (pre_transp[bb], expr->bitmap_index))
4486 /* Since different machines initialize their parameter registers
4487 in different orders, assume nothing. Collect the set of all
4488 parameter registers. */
4489 CLEAR_HARD_REG_SET (parm_regs);
4491 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4492 if (GET_CODE (XEXP (p, 0)) == USE
4493 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4495 int regno = REGNO (XEXP (XEXP (p, 0), 0));
4496 if (regno >= FIRST_PSEUDO_REGISTER)
4498 SET_HARD_REG_BIT (parm_regs, regno);
4502 /* Search backward for the first set of a register in this set. */
4503 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4505 insn = PREV_INSN (insn);
4506 p = single_set (insn);
4507 if (p && GET_CODE (SET_DEST (p)) == REG
4508 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4509 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4511 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4516 new_insn = emit_insn_before (pat, insn);
4517 if (BLOCK_HEAD (bb) == insn)
4518 BLOCK_HEAD (bb) = new_insn;
4522 new_insn = emit_insn_after (pat, insn);
4523 add_label_notes (SET_SRC (pat), new_insn);
4524 BLOCK_END (bb) = new_insn;
4527 /* Keep block number table up to date. */
4528 set_block_num (new_insn, bb);
4529 /* Keep register set table up to date. */
4530 record_one_set (regno, new_insn);
4532 gcse_create_count++;
4536 fprintf (gcse_file, "PRE: end of bb %d, insn %d, copying expression %d to reg %d\n",
4537 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4541 /* Insert partially redundant expressions at the ends of appropriate basic
4542 blocks making them now redundant. */
4545 pre_insert (index_map)
4546 struct expr **index_map;
4550 /* Compute INSERT = PPOUT & (~AVOUT) & (~PPIN | ~TRANSP) for each
4551 expression. Where INSERT == TRUE, add the expression at the end of
4554 size = pre_ppout[0]->size;
4555 for (bb = 0; bb < n_basic_blocks; bb++)
4558 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4559 sbitmap_ptr avout = pre_avout[bb]->elms;
4560 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4561 sbitmap_ptr transp = pre_transp[bb]->elms;
4565 i++, indx += SBITMAP_ELT_BITS, ppout++, avout++, ppin++, transp++)
4568 SBITMAP_ELT_TYPE insert = *ppout & (~*avout) & (~*ppin | ~*transp);
4570 for (j = indx; insert != 0 && j < n_exprs; j++, insert >>= 1)
4572 if ((insert & 1) != 0
4573 /* If the basic block isn't reachable, PPOUT will be TRUE.
4574 However, we don't want to insert a copy here because the
4575 expression may not really be redundant. So only insert
4576 an insn if the expression was deleted. */
4577 && index_map[j]->reaching_reg != NULL)
4578 pre_insert_insn (index_map[j], bb);
4584 /* Copy the result of INSN to REG.
4585 INDX is the expression number. */
4588 pre_insert_copy_insn (expr, insn)
4592 rtx reg = expr->reaching_reg;
4593 int regno = REGNO (reg);
4594 int indx = expr->bitmap_index;
4595 rtx set = single_set (insn);
4600 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4602 /* Keep block number table up to date. */
4603 set_block_num (new_insn, BLOCK_NUM (insn));
4604 /* Keep register set table up to date. */
4605 record_one_set (regno, new_insn);
4607 gcse_create_count++;
4611 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4612 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4616 /* Copy available expressions that reach the redundant expression
4617 to `reaching_reg'. */
4620 pre_insert_copies ()
4624 /* For each available expression in the table, copy the result to
4625 `reaching_reg' if the expression reaches a deleted one.
4627 ??? The current algorithm is rather brute force.
4628 Need to do some profiling. */
4630 for (i = 0; i < expr_hash_table_size; i++)
4634 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4638 /* If the basic block isn't reachable, PPOUT will be TRUE.
4639 However, we don't want to insert a copy here because the
4640 expression may not really be redundant. So only insert
4641 an insn if the expression was deleted.
4642 This test also avoids further processing if the expression
4643 wasn't deleted anywhere. */
4644 if (expr->reaching_reg == NULL)
4647 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4651 if (! occr->deleted_p)
4654 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4656 rtx insn = avail->insn;
4658 /* No need to handle this one if handled already. */
4659 if (avail->copied_p)
4661 /* Don't handle this one if it's a redundant one. */
4662 if (TEST_BIT (pre_redundant, INSN_CUID (insn)))
4664 /* Or if the expression doesn't reach the deleted one. */
4665 if (! pre_expr_reaches_here_p (avail, expr,
4666 BLOCK_NUM (occr->insn),
4670 /* Copy the result of avail to reaching_reg. */
4671 pre_insert_copy_insn (expr, insn);
4672 avail->copied_p = 1;
4679 /* Delete redundant computations.
4680 These are ones that satisy ANTLOC & PPIN.
4681 Deletion is done by changing the insn to copy the `reaching_reg' of
4682 the expression into the result of the SET. It is left to later passes
4683 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4685 Returns non-zero if a change is made. */
4693 for (i = 0; i < expr_hash_table_size; i++)
4697 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4700 int indx = expr->bitmap_index;
4702 /* We only need to search antic_occr since we require
4705 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4707 rtx insn = occr->insn;
4709 int bb = BLOCK_NUM (insn);
4710 sbitmap ppin = pre_ppin[bb];
4712 if (TEST_BIT (ppin, indx))
4714 set = single_set (insn);
4718 /* Create a pseudo-reg to store the result of reaching
4719 expressions into. Get the mode for the new pseudo
4720 from the mode of the original destination pseudo. */
4721 if (expr->reaching_reg == NULL)
4723 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4725 /* In theory this should never fail since we're creating
4728 However, on the x86 some of the movXX patterns actually
4729 contain clobbers of scratch regs. This may cause the
4730 insn created by validate_change to not patch any pattern
4731 and thus cause validate_change to fail. */
4732 if (validate_change (insn, &SET_SRC (set),
4733 expr->reaching_reg, 0))
4735 occr->deleted_p = 1;
4736 SET_BIT (pre_redundant, INSN_CUID (insn));
4743 fprintf (gcse_file, "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4744 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4754 /* Perform GCSE optimizations using PRE.
4755 This is called by one_pre_gcse_pass after all the dataflow analysis
4758 This is based on the original Morel-Renvoise paper and Fred Chow's thesis.
4760 The M-R paper uses "TRANSP" to describe an expression as being transparent
4761 in a block where as Chow's thesis uses "ALTERED". We use TRANSP.
4763 ??? A new pseudo reg is created to hold the reaching expression.
4764 The nice thing about the classical approach is that it would try to
4765 use an existing reg. If the register can't be adequately optimized
4766 [i.e. we introduce reload problems], one could add a pass here to
4767 propagate the new register through the block.
4769 ??? We don't handle single sets in PARALLELs because we're [currently]
4770 not able to copy the rest of the parallel when we insert copies to create
4771 full redundancies from partial redundancies. However, there's no reason
4772 why we can't handle PARALLELs in the cases where there are no partial
4780 struct expr **index_map;
4782 /* Compute a mapping from expression number (`bitmap_index') to
4783 hash table entry. */
4785 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4786 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4787 for (i = 0; i < expr_hash_table_size; i++)
4791 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4792 index_map[expr->bitmap_index] = expr;
4795 /* Reset bitmap used to track which insns are redundant. */
4796 pre_redundant = sbitmap_alloc (max_cuid);
4797 sbitmap_zero (pre_redundant);
4799 /* Delete the redundant insns first so that
4800 - we know what register to use for the new insns and for the other
4801 ones with reaching expressions
4802 - we know which insns are redundant when we go to create copies */
4803 changed = pre_delete ();
4805 /* Insert insns in places that make partially redundant expressions
4807 pre_insert (index_map);
4809 /* In other places with reaching expressions, copy the expression to the
4810 specially allocated pseudo-reg that reaches the redundant expression. */
4811 pre_insert_copies ();
4813 free (pre_redundant);
4818 /* Top level routine to perform one PRE GCSE pass.
4820 Return non-zero if a change was made. */
4823 one_pre_gcse_pass (f, pass)
4829 gcse_subst_count = 0;
4830 gcse_create_count = 0;
4832 alloc_expr_hash_table (max_cuid);
4833 compute_expr_hash_table (f);
4835 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4836 expr_hash_table_size, n_exprs);
4839 alloc_pre_mem (n_basic_blocks, n_exprs);
4840 compute_pre_data ();
4841 changed |= pre_gcse ();
4844 free_expr_hash_table ();
4848 fprintf (gcse_file, "\n");
4849 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4850 current_function_name, pass,
4851 bytes_used, gcse_subst_count, gcse_create_count);
4857 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4858 We have to add REG_LABEL notes, because the following loop optimization
4859 pass requires them. */
4861 /* ??? This is very similar to the loop.c add_label_notes function. We
4862 could probably share code here. */
4864 /* ??? If there was a jump optimization pass after gcse and before loop,
4865 then we would not need to do this here, because jump would add the
4866 necessary REG_LABEL notes. */
4869 add_label_notes (x, insn)
4873 enum rtx_code code = GET_CODE (x);
4877 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4879 /* This code used to ignore labels that referred to dispatch tables to
4880 avoid flow generating (slighly) worse code.
4882 We no longer ignore such label references (see LABEL_REF handling in
4883 mark_jump_label for additional information). */
4884 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4889 fmt = GET_RTX_FORMAT (code);
4890 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4893 add_label_notes (XEXP (x, i), insn);
4894 else if (fmt[i] == 'E')
4895 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4896 add_label_notes (XVECEXP (x, i, j), insn);