1 /* Global common subexpression elimination/Partial redundancy elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 - reordering of memory allocation and freeing to be more space efficient
25 - do rough calc of how many regs are needed in each block, and a rough
26 calc of how many regs are available in each class and use that to
27 throttle back the code in cases where RTX_COST is minimal.
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
37 /* References searched while implementing this.
39 Compilers Principles, Techniques and Tools
43 Global Optimization by Suppression of Partial Redundancies
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Stanford Ph.D. thesis, Dec. 1983
51 A Fast Algorithm for Code Movement Optimization
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
65 Efficiently Computing Static Single Assignment Form and the Control
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
109 Global code motion / global value numbering
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
113 Value Driven Redundancy Elimination
115 Rice University Ph.D. thesis, Apr. 1996
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
121 High Performance Compilers for Parallel Computing
125 Advanced Compiler Design and Implementation
127 Morgan Kaufmann, 1997
129 Building an Optimizing Compiler
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
153 #include "hard-reg-set.h"
156 #include "insn-config.h"
158 #include "basic-block.h"
160 #include "function.h"
167 #define obstack_chunk_alloc gmalloc
168 #define obstack_chunk_free free
170 /* Propagate flow information through back edges and thus enable PRE's
171 moving loop invariant calculations out of loops.
173 Originally this tended to create worse overall code, but several
174 improvements during the development of PRE seem to have made following
175 back edges generally a win.
177 Note much of the loop invariant code motion done here would normally
178 be done by loop.c, which has more heuristics for when to move invariants
179 out of loops. At some point we might need to move some of those
180 heuristics into gcse.c. */
181 #define FOLLOW_BACK_EDGES 1
183 /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
186 We perform the following steps:
188 1) Compute basic block information.
190 2) Compute table of places where registers are set.
192 3) Perform copy/constant propagation.
194 4) Perform global cse.
196 5) Perform another pass of copy/constant propagation.
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
209 PRE handles moving invariant expressions out of loops (by treating them as
210 partially redundant).
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
216 **********************
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
229 It was found doing copy propagation between each pass enables further
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. The parameter max-gcse-passes can
234 be modified if one wants to experiment.
236 **********************
238 The steps for PRE are:
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
242 2) Perform the data flow analysis for PRE.
244 3) Delete the redundant instructions
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
266 **********************
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
276 Help stamp out big monolithic functions! */
278 /* GCSE global vars. */
281 static FILE *gcse_file;
283 /* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
286 * If we changed any jumps via cprop.
288 * If we added any labels via edge splitting. */
290 static int run_jump_opt_after_gcse;
292 /* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297 static FILE *debug_stderr;
299 /* An obstack for our working variables. */
300 static struct obstack gcse_obstack;
302 /* Non-zero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305 static char can_copy_p[(int) NUM_MACHINE_MODES];
307 /* Non-zero if can_copy_p has been initialized. */
308 static int can_copy_init_p;
310 struct reg_use {rtx reg_rtx; };
312 /* Hash table of expressions. */
316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
318 /* Index in the available expression bitmaps. */
320 /* Next entry with the same hash. */
321 struct expr *next_same_hash;
322 /* List of anticipatable occurrences in basic blocks in the function.
323 An "anticipatable occurrence" is one that is the first occurrence in the
324 basic block, the operands are not modified in the basic block prior
325 to the occurrence and the output is not used between the start of
326 the block and the occurrence. */
327 struct occr *antic_occr;
328 /* List of available occurrence in basic blocks in the function.
329 An "available occurrence" is one that is the last occurrence in the
330 basic block and the operands are not modified by following statements in
331 the basic block [including this insn]. */
332 struct occr *avail_occr;
333 /* Non-null if the computation is PRE redundant.
334 The value is the newly created pseudo-reg to record a copy of the
335 expression in all the places that reach the redundant copy. */
339 /* Occurrence of an expression.
340 There is one per basic block. If a pattern appears more than once the
341 last appearance is used [or first for anticipatable expressions]. */
345 /* Next occurrence of this expression. */
347 /* The insn that computes the expression. */
349 /* Non-zero if this [anticipatable] occurrence has been deleted. */
351 /* Non-zero if this [available] occurrence has been copied to
353 /* ??? This is mutually exclusive with deleted_p, so they could share
358 /* Expression and copy propagation hash tables.
359 Each hash table is an array of buckets.
360 ??? It is known that if it were an array of entries, structure elements
361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
362 not clear whether in the final analysis a sufficient amount of memory would
363 be saved as the size of the available expression bitmaps would be larger
364 [one could build a mapping table without holes afterwards though].
365 Someday I'll perform the computation and figure it out. */
367 /* Total size of the expression hash table, in elements. */
368 static unsigned int expr_hash_table_size;
371 This is an array of `expr_hash_table_size' elements. */
372 static struct expr **expr_hash_table;
374 /* Total size of the copy propagation hash table, in elements. */
375 static unsigned int set_hash_table_size;
378 This is an array of `set_hash_table_size' elements. */
379 static struct expr **set_hash_table;
381 /* Mapping of uids to cuids.
382 Only real insns get cuids. */
383 static int *uid_cuid;
385 /* Highest UID in UID_CUID. */
388 /* Get the cuid of an insn. */
389 #ifdef ENABLE_CHECKING
390 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
392 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
395 /* Number of cuids. */
398 /* Mapping of cuids to insns. */
399 static rtx *cuid_insn;
401 /* Get insn from cuid. */
402 #define CUID_INSN(CUID) (cuid_insn[CUID])
404 /* Maximum register number in function prior to doing gcse + 1.
405 Registers created during this pass have regno >= max_gcse_regno.
406 This is named with "gcse" to not collide with global of same name. */
407 static unsigned int max_gcse_regno;
409 /* Maximum number of cse-able expressions found. */
412 /* Maximum number of assignments for copy propagation found. */
415 /* Table of registers that are modified.
417 For each register, each element is a list of places where the pseudo-reg
420 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
421 requires knowledge of which blocks kill which regs [and thus could use
422 a bitmap instead of the lists `reg_set_table' uses].
424 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
425 num-regs) [however perhaps it may be useful to keep the data as is]. One
426 advantage of recording things this way is that `reg_set_table' is fairly
427 sparse with respect to pseudo regs but for hard regs could be fairly dense
428 [relatively speaking]. And recording sets of pseudo-regs in lists speeds
429 up functions like compute_transp since in the case of pseudo-regs we only
430 need to iterate over the number of times a pseudo-reg is set, not over the
431 number of basic blocks [clearly there is a bit of a slow down in the cases
432 where a pseudo is set more than once in a block, however it is believed
433 that the net effect is to speed things up]. This isn't done for hard-regs
434 because recording call-clobbered hard-regs in `reg_set_table' at each
435 function call can consume a fair bit of memory, and iterating over
436 hard-regs stored this way in compute_transp will be more expensive. */
438 typedef struct reg_set
440 /* The next setting of this register. */
441 struct reg_set *next;
442 /* The insn where it was set. */
446 static reg_set **reg_set_table;
448 /* Size of `reg_set_table'.
449 The table starts out at max_gcse_regno + slop, and is enlarged as
451 static int reg_set_table_size;
453 /* Amount to grow `reg_set_table' by when it's full. */
454 #define REG_SET_TABLE_SLOP 100
456 /* This is a list of expressions which are MEMs and will be used by load
458 Load motion tracks MEMs which aren't killed by
459 anything except itself. (ie, loads and stores to a single location).
460 We can then allow movement of these MEM refs with a little special
461 allowance. (all stores copy the same value to the reaching reg used
462 for the loads). This means all values used to store into memory must have
463 no side effects so we can re-issue the setter value.
464 Store Motion uses this structure as an expression table to track stores
465 which look interesting, and might be moveable towards the exit block. */
469 struct expr * expr; /* Gcse expression reference for LM. */
470 rtx pattern; /* Pattern of this mem. */
471 rtx loads; /* INSN list of loads seen. */
472 rtx stores; /* INSN list of stores seen. */
473 struct ls_expr * next; /* Next in the list. */
474 int invalid; /* Invalid for some reason. */
475 int index; /* If it maps to a bitmap index. */
476 int hash_index; /* Index when in a hash table. */
477 rtx reaching_reg; /* Register to use when re-writing. */
480 /* Head of the list of load/store memory refs. */
481 static struct ls_expr * pre_ldst_mems = NULL;
483 /* Bitmap containing one bit for each register in the program.
484 Used when performing GCSE to track which registers have been set since
485 the start of the basic block. */
486 static regset reg_set_bitmap;
488 /* For each block, a bitmap of registers set in the block.
489 This is used by expr_killed_p and compute_transp.
490 It is computed during hash table computation and not by compute_sets
491 as it includes registers added since the last pass (or between cprop and
492 gcse) and it's currently not easy to realloc sbitmap vectors. */
493 static sbitmap *reg_set_in_block;
495 /* Array, indexed by basic block number for a list of insns which modify
496 memory within that block. */
497 static rtx * modify_mem_list;
498 bitmap modify_mem_list_set;
500 /* This array parallels modify_mem_list, but is kept canonicalized. */
501 static rtx * canon_modify_mem_list;
502 bitmap canon_modify_mem_list_set;
503 /* Various variables for statistics gathering. */
505 /* Memory used in a pass.
506 This isn't intended to be absolutely precise. Its intent is only
507 to keep an eye on memory usage. */
508 static int bytes_used;
510 /* GCSE substitutions made. */
511 static int gcse_subst_count;
512 /* Number of copy instructions created. */
513 static int gcse_create_count;
514 /* Number of constants propagated. */
515 static int const_prop_count;
516 /* Number of copys propagated. */
517 static int copy_prop_count;
519 /* These variables are used by classic GCSE.
520 Normally they'd be defined a bit later, but `rd_gen' needs to
521 be declared sooner. */
523 /* Each block has a bitmap of each type.
524 The length of each blocks bitmap is:
526 max_cuid - for reaching definitions
527 n_exprs - for available expressions
529 Thus we view the bitmaps as 2 dimensional arrays. i.e.
530 rd_kill[block_num][cuid_num]
531 ae_kill[block_num][expr_num] */
533 /* For reaching defs */
534 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
536 /* for available exprs */
537 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
539 /* Objects of this type are passed around by the null-pointer check
541 struct null_pointer_info
543 /* The basic block being processed. */
545 /* The first register to be handled in this pass. */
546 unsigned int min_reg;
547 /* One greater than the last register to be handled in this pass. */
548 unsigned int max_reg;
549 sbitmap *nonnull_local;
550 sbitmap *nonnull_killed;
553 static void compute_can_copy PARAMS ((void));
554 static char *gmalloc PARAMS ((unsigned int));
555 static char *grealloc PARAMS ((char *, unsigned int));
556 static char *gcse_alloc PARAMS ((unsigned long));
557 static void alloc_gcse_mem PARAMS ((rtx));
558 static void free_gcse_mem PARAMS ((void));
559 static void alloc_reg_set_mem PARAMS ((int));
560 static void free_reg_set_mem PARAMS ((void));
561 static int get_bitmap_width PARAMS ((int, int, int));
562 static void record_one_set PARAMS ((int, rtx));
563 static void record_set_info PARAMS ((rtx, rtx, void *));
564 static void compute_sets PARAMS ((rtx));
565 static void hash_scan_insn PARAMS ((rtx, int, int));
566 static void hash_scan_set PARAMS ((rtx, rtx, int));
567 static void hash_scan_clobber PARAMS ((rtx, rtx));
568 static void hash_scan_call PARAMS ((rtx, rtx));
569 static int want_to_gcse_p PARAMS ((rtx));
570 static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
571 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
572 static int oprs_available_p PARAMS ((rtx, rtx));
573 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
575 static void insert_set_in_table PARAMS ((rtx, rtx));
576 static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int));
577 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
578 static unsigned int hash_string_1 PARAMS ((const char *));
579 static unsigned int hash_set PARAMS ((int, int));
580 static int expr_equiv_p PARAMS ((rtx, rtx));
581 static void record_last_reg_set_info PARAMS ((rtx, int));
582 static void record_last_mem_set_info PARAMS ((rtx));
583 static void record_last_set_info PARAMS ((rtx, rtx, void *));
584 static void compute_hash_table PARAMS ((int));
585 static void alloc_set_hash_table PARAMS ((int));
586 static void free_set_hash_table PARAMS ((void));
587 static void compute_set_hash_table PARAMS ((void));
588 static void alloc_expr_hash_table PARAMS ((unsigned int));
589 static void free_expr_hash_table PARAMS ((void));
590 static void compute_expr_hash_table PARAMS ((void));
591 static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **,
593 static struct expr *lookup_expr PARAMS ((rtx));
594 static struct expr *lookup_set PARAMS ((unsigned int, rtx));
595 static struct expr *next_set PARAMS ((unsigned int, struct expr *));
596 static void reset_opr_set_tables PARAMS ((void));
597 static int oprs_not_set_p PARAMS ((rtx, rtx));
598 static void mark_call PARAMS ((rtx));
599 static void mark_set PARAMS ((rtx, rtx));
600 static void mark_clobber PARAMS ((rtx, rtx));
601 static void mark_oprs_set PARAMS ((rtx));
602 static void alloc_cprop_mem PARAMS ((int, int));
603 static void free_cprop_mem PARAMS ((void));
604 static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
605 static void compute_transpout PARAMS ((void));
606 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
608 static void compute_cprop_data PARAMS ((void));
609 static void find_used_regs PARAMS ((rtx *, void *));
610 static int try_replace_reg PARAMS ((rtx, rtx, rtx));
611 static struct expr *find_avail_set PARAMS ((int, rtx));
612 static int cprop_jump PARAMS ((basic_block, rtx, rtx, rtx));
614 static int cprop_cc0_jump PARAMS ((basic_block, rtx, struct reg_use *, rtx));
616 static void mems_conflict_for_gcse_p PARAMS ((rtx, rtx, void *));
617 static int load_killed_in_block_p PARAMS ((basic_block, int, rtx, int));
618 static void canon_list_insert PARAMS ((rtx, rtx, void *));
619 static int cprop_insn PARAMS ((basic_block, rtx, int));
620 static int cprop PARAMS ((int));
621 static int one_cprop_pass PARAMS ((int, int));
622 static void alloc_pre_mem PARAMS ((int, int));
623 static void free_pre_mem PARAMS ((void));
624 static void compute_pre_data PARAMS ((void));
625 static int pre_expr_reaches_here_p PARAMS ((basic_block, struct expr *,
627 static void insert_insn_end_bb PARAMS ((struct expr *, basic_block, int));
628 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
629 static void pre_insert_copies PARAMS ((void));
630 static int pre_delete PARAMS ((void));
631 static int pre_gcse PARAMS ((void));
632 static int one_pre_gcse_pass PARAMS ((int));
633 static void add_label_notes PARAMS ((rtx, rtx));
634 static void alloc_code_hoist_mem PARAMS ((int, int));
635 static void free_code_hoist_mem PARAMS ((void));
636 static void compute_code_hoist_vbeinout PARAMS ((void));
637 static void compute_code_hoist_data PARAMS ((void));
638 static int hoist_expr_reaches_here_p PARAMS ((basic_block, int, basic_block,
640 static void hoist_code PARAMS ((void));
641 static int one_code_hoisting_pass PARAMS ((void));
642 static void alloc_rd_mem PARAMS ((int, int));
643 static void free_rd_mem PARAMS ((void));
644 static void handle_rd_kill_set PARAMS ((rtx, int, basic_block));
645 static void compute_kill_rd PARAMS ((void));
646 static void compute_rd PARAMS ((void));
647 static void alloc_avail_expr_mem PARAMS ((int, int));
648 static void free_avail_expr_mem PARAMS ((void));
649 static void compute_ae_gen PARAMS ((void));
650 static int expr_killed_p PARAMS ((rtx, basic_block));
651 static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
652 static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
654 static rtx computing_insn PARAMS ((struct expr *, rtx));
655 static int def_reaches_here_p PARAMS ((rtx, rtx));
656 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
657 static int handle_avail_expr PARAMS ((rtx, struct expr *));
658 static int classic_gcse PARAMS ((void));
659 static int one_classic_gcse_pass PARAMS ((int));
660 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
661 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *,
662 sbitmap *, sbitmap *,
663 struct null_pointer_info *));
664 static rtx process_insert_insn PARAMS ((struct expr *));
665 static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
666 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
667 basic_block, int, char *));
668 static int pre_expr_reaches_here_p_work PARAMS ((basic_block, struct expr *,
669 basic_block, char *));
670 static struct ls_expr * ldst_entry PARAMS ((rtx));
671 static void free_ldst_entry PARAMS ((struct ls_expr *));
672 static void free_ldst_mems PARAMS ((void));
673 static void print_ldst_list PARAMS ((FILE *));
674 static struct ls_expr * find_rtx_in_ldst PARAMS ((rtx));
675 static int enumerate_ldsts PARAMS ((void));
676 static inline struct ls_expr * first_ls_expr PARAMS ((void));
677 static inline struct ls_expr * next_ls_expr PARAMS ((struct ls_expr *));
678 static int simple_mem PARAMS ((rtx));
679 static void invalidate_any_buried_refs PARAMS ((rtx));
680 static void compute_ld_motion_mems PARAMS ((void));
681 static void trim_ld_motion_mems PARAMS ((void));
682 static void update_ld_motion_stores PARAMS ((struct expr *));
683 static void reg_set_info PARAMS ((rtx, rtx, void *));
684 static int store_ops_ok PARAMS ((rtx, basic_block));
685 static void find_moveable_store PARAMS ((rtx));
686 static int compute_store_table PARAMS ((void));
687 static int load_kills_store PARAMS ((rtx, rtx));
688 static int find_loads PARAMS ((rtx, rtx));
689 static int store_killed_in_insn PARAMS ((rtx, rtx));
690 static int store_killed_after PARAMS ((rtx, rtx, basic_block));
691 static int store_killed_before PARAMS ((rtx, rtx, basic_block));
692 static void build_store_vectors PARAMS ((void));
693 static void insert_insn_start_bb PARAMS ((rtx, basic_block));
694 static int insert_store PARAMS ((struct ls_expr *, edge));
695 static void replace_store_insn PARAMS ((rtx, rtx, basic_block));
696 static void delete_store PARAMS ((struct ls_expr *,
698 static void free_store_memory PARAMS ((void));
699 static void store_motion PARAMS ((void));
700 static void free_insn_expr_list_list PARAMS ((rtx *));
701 static void clear_modify_mem_tables PARAMS ((void));
702 static void free_modify_mem_tables PARAMS ((void));
704 /* Entry point for global common subexpression elimination.
705 F is the first instruction in the function. */
713 /* Bytes used at start of pass. */
714 int initial_bytes_used;
715 /* Maximum number of bytes used by a pass. */
717 /* Point to release obstack data from for each pass. */
718 char *gcse_obstack_bottom;
720 /* Insertion of instructions on edges can create new basic blocks; we
721 need the original basic block count so that we can properly deallocate
722 arrays sized on the number of basic blocks originally in the cfg. */
724 /* We do not construct an accurate cfg in functions which call
725 setjmp, so just punt to be safe. */
726 if (current_function_calls_setjmp)
729 /* Assume that we do not need to run jump optimizations after gcse. */
730 run_jump_opt_after_gcse = 0;
732 /* For calling dump_foo fns from gdb. */
733 debug_stderr = stderr;
736 /* Identify the basic block information for this function, including
737 successors and predecessors. */
738 max_gcse_regno = max_reg_num ();
741 dump_flow_info (file);
743 orig_bb_count = n_basic_blocks;
744 /* Return if there's nothing to do. */
745 if (n_basic_blocks <= 1)
748 /* Trying to perform global optimizations on flow graphs which have
749 a high connectivity will take a long time and is unlikely to be
752 In normal circumstances a cfg should have about twice as many edges
753 as blocks. But we do not want to punish small functions which have
754 a couple switch statements. So we require a relatively large number
755 of basic blocks and the ratio of edges to blocks to be high. */
756 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
758 if (warn_disabled_optimization)
759 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block",
760 n_basic_blocks, n_edges / n_basic_blocks);
764 /* If allocating memory for the cprop bitmap would take up too much
765 storage it's better just to disable the optimization. */
767 * SBITMAP_SET_SIZE (max_gcse_regno)
768 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
770 if (warn_disabled_optimization)
771 warning ("GCSE disabled: %d basic blocks and %d registers",
772 n_basic_blocks, max_gcse_regno);
777 /* See what modes support reg/reg copy operations. */
778 if (! can_copy_init_p)
784 gcc_obstack_init (&gcse_obstack);
788 init_alias_analysis ();
789 /* Record where pseudo-registers are set. This data is kept accurate
790 during each pass. ??? We could also record hard-reg information here
791 [since it's unchanging], however it is currently done during hash table
794 It may be tempting to compute MEM set information here too, but MEM sets
795 will be subject to code motion one day and thus we need to compute
796 information about memory sets when we build the hash tables. */
798 alloc_reg_set_mem (max_gcse_regno);
802 initial_bytes_used = bytes_used;
804 gcse_obstack_bottom = gcse_alloc (1);
806 while (changed && pass < MAX_GCSE_PASSES)
810 fprintf (file, "GCSE pass %d\n\n", pass + 1);
812 /* Initialize bytes_used to the space for the pred/succ lists,
813 and the reg_set_table data. */
814 bytes_used = initial_bytes_used;
816 /* Each pass may create new registers, so recalculate each time. */
817 max_gcse_regno = max_reg_num ();
821 /* Don't allow constant propagation to modify jumps
823 changed = one_cprop_pass (pass + 1, 0);
826 changed |= one_classic_gcse_pass (pass + 1);
829 changed |= one_pre_gcse_pass (pass + 1);
830 /* We may have just created new basic blocks. Release and
831 recompute various things which are sized on the number of
835 free_modify_mem_tables ();
837 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx));
838 canon_modify_mem_list
839 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx));
840 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx));
841 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx));
842 orig_bb_count = n_basic_blocks;
845 alloc_reg_set_mem (max_reg_num ());
847 run_jump_opt_after_gcse = 1;
850 if (max_pass_bytes < bytes_used)
851 max_pass_bytes = bytes_used;
853 /* Free up memory, then reallocate for code hoisting. We can
854 not re-use the existing allocated memory because the tables
855 will not have info for the insns or registers created by
856 partial redundancy elimination. */
859 /* It does not make sense to run code hoisting unless we optimizing
860 for code size -- it rarely makes programs faster, and can make
861 them bigger if we did partial redundancy elimination (when optimizing
862 for space, we use a classic gcse algorithm instead of partial
863 redundancy algorithms). */
866 max_gcse_regno = max_reg_num ();
868 changed |= one_code_hoisting_pass ();
871 if (max_pass_bytes < bytes_used)
872 max_pass_bytes = bytes_used;
877 fprintf (file, "\n");
881 obstack_free (&gcse_obstack, gcse_obstack_bottom);
885 /* Do one last pass of copy propagation, including cprop into
886 conditional jumps. */
888 max_gcse_regno = max_reg_num ();
890 /* This time, go ahead and allow cprop to alter jumps. */
891 one_cprop_pass (pass + 1, 1);
896 fprintf (file, "GCSE of %s: %d basic blocks, ",
897 current_function_name, n_basic_blocks);
898 fprintf (file, "%d pass%s, %d bytes\n\n",
899 pass, pass > 1 ? "es" : "", max_pass_bytes);
902 obstack_free (&gcse_obstack, NULL);
904 /* We are finished with alias. */
905 end_alias_analysis ();
906 allocate_reg_info (max_reg_num (), FALSE, FALSE);
908 /* Store motion disabled until it is fixed. */
909 if (0 && !optimize_size && flag_gcse_sm)
911 /* Record where pseudo-registers are set. */
912 return run_jump_opt_after_gcse;
915 /* Misc. utilities. */
917 /* Compute which modes support reg/reg copy operations. */
923 #ifndef AVOID_CCMODE_COPIES
926 memset (can_copy_p, 0, NUM_MACHINE_MODES);
929 for (i = 0; i < NUM_MACHINE_MODES; i++)
930 if (GET_MODE_CLASS (i) == MODE_CC)
932 #ifdef AVOID_CCMODE_COPIES
935 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
936 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
937 if (recog (PATTERN (insn), insn, NULL) >= 0)
947 /* Cover function to xmalloc to record bytes allocated. */
954 return xmalloc (size);
957 /* Cover function to xrealloc.
958 We don't record the additional size since we don't know it.
959 It won't affect memory usage stats much anyway. */
966 return xrealloc (ptr, size);
969 /* Cover function to obstack_alloc.
970 We don't need to record the bytes allocated here since
971 obstack_chunk_alloc is set to gmalloc. */
977 return (char *) obstack_alloc (&gcse_obstack, size);
980 /* Allocate memory for the cuid mapping array,
981 and reg/memory set tracking tables.
983 This is called at the start of each pass. */
992 /* Find the largest UID and create a mapping from UIDs to CUIDs.
993 CUIDs are like UIDs except they increase monotonically, have no gaps,
994 and only apply to real insns. */
996 max_uid = get_max_uid ();
997 n = (max_uid + 1) * sizeof (int);
998 uid_cuid = (int *) gmalloc (n);
999 memset ((char *) uid_cuid, 0, n);
1000 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
1003 uid_cuid[INSN_UID (insn)] = i++;
1005 uid_cuid[INSN_UID (insn)] = i;
1008 /* Create a table mapping cuids to insns. */
1011 n = (max_cuid + 1) * sizeof (rtx);
1012 cuid_insn = (rtx *) gmalloc (n);
1013 memset ((char *) cuid_insn, 0, n);
1014 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
1016 CUID_INSN (i++) = insn;
1018 /* Allocate vars to track sets of regs. */
1019 reg_set_bitmap = BITMAP_XMALLOC ();
1021 /* Allocate vars to track sets of regs, memory per block. */
1022 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
1024 /* Allocate array to keep a list of insns which modify memory in each
1026 modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx));
1027 canon_modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx));
1028 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx));
1029 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx));
1030 modify_mem_list_set = BITMAP_XMALLOC ();
1031 canon_modify_mem_list_set = BITMAP_XMALLOC ();
1034 /* Free memory allocated by alloc_gcse_mem. */
1042 BITMAP_XFREE (reg_set_bitmap);
1044 sbitmap_vector_free (reg_set_in_block);
1045 free_modify_mem_tables ();
1046 BITMAP_XFREE (modify_mem_list_set);
1047 BITMAP_XFREE (canon_modify_mem_list_set);
1050 /* Many of the global optimization algorithms work by solving dataflow
1051 equations for various expressions. Initially, some local value is
1052 computed for each expression in each block. Then, the values across the
1053 various blocks are combined (by following flow graph edges) to arrive at
1054 global values. Conceptually, each set of equations is independent. We
1055 may therefore solve all the equations in parallel, solve them one at a
1056 time, or pick any intermediate approach.
1058 When you're going to need N two-dimensional bitmaps, each X (say, the
1059 number of blocks) by Y (say, the number of expressions), call this
1060 function. It's not important what X and Y represent; only that Y
1061 correspond to the things that can be done in parallel. This function will
1062 return an appropriate chunking factor C; you should solve C sets of
1063 equations in parallel. By going through this function, we can easily
1064 trade space against time; by solving fewer equations in parallel we use
1068 get_bitmap_width (n, x, y)
1073 /* It's not really worth figuring out *exactly* how much memory will
1074 be used by a particular choice. The important thing is to get
1075 something approximately right. */
1076 size_t max_bitmap_memory = 10 * 1024 * 1024;
1078 /* The number of bytes we'd use for a single column of minimum
1080 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
1082 /* Often, it's reasonable just to solve all the equations in
1084 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1087 /* Otherwise, pick the largest width we can, without going over the
1089 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1093 /* Compute the local properties of each recorded expression.
1095 Local properties are those that are defined by the block, irrespective of
1098 An expression is transparent in a block if its operands are not modified
1101 An expression is computed (locally available) in a block if it is computed
1102 at least once and expression would contain the same value if the
1103 computation was moved to the end of the block.
1105 An expression is locally anticipatable in a block if it is computed at
1106 least once and expression would contain the same value if the computation
1107 was moved to the beginning of the block.
1109 We call this routine for cprop, pre and code hoisting. They all compute
1110 basically the same information and thus can easily share this code.
1112 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
1113 properties. If NULL, then it is not necessary to compute or record that
1114 particular property.
1116 SETP controls which hash table to look at. If zero, this routine looks at
1117 the expr hash table; if nonzero this routine looks at the set hash table.
1118 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1122 compute_local_properties (transp, comp, antloc, setp)
1128 unsigned int i, hash_table_size;
1129 struct expr **hash_table;
1131 /* Initialize any bitmaps that were passed in. */
1135 sbitmap_vector_zero (transp, n_basic_blocks);
1137 sbitmap_vector_ones (transp, n_basic_blocks);
1141 sbitmap_vector_zero (comp, n_basic_blocks);
1143 sbitmap_vector_zero (antloc, n_basic_blocks);
1145 /* We use the same code for cprop, pre and hoisting. For cprop
1146 we care about the set hash table, for pre and hoisting we
1147 care about the expr hash table. */
1148 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1149 hash_table = setp ? set_hash_table : expr_hash_table;
1151 for (i = 0; i < hash_table_size; i++)
1155 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1157 int indx = expr->bitmap_index;
1160 /* The expression is transparent in this block if it is not killed.
1161 We start by assuming all are transparent [none are killed], and
1162 then reset the bits for those that are. */
1164 compute_transp (expr->expr, indx, transp, setp);
1166 /* The occurrences recorded in antic_occr are exactly those that
1167 we want to set to non-zero in ANTLOC. */
1169 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1171 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1173 /* While we're scanning the table, this is a good place to
1175 occr->deleted_p = 0;
1178 /* The occurrences recorded in avail_occr are exactly those that
1179 we want to set to non-zero in COMP. */
1181 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1183 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1185 /* While we're scanning the table, this is a good place to
1190 /* While we're scanning the table, this is a good place to
1192 expr->reaching_reg = 0;
1197 /* Register set information.
1199 `reg_set_table' records where each register is set or otherwise
1202 static struct obstack reg_set_obstack;
1205 alloc_reg_set_mem (n_regs)
1210 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1211 n = reg_set_table_size * sizeof (struct reg_set *);
1212 reg_set_table = (struct reg_set **) gmalloc (n);
1213 memset ((char *) reg_set_table, 0, n);
1215 gcc_obstack_init (®_set_obstack);
1221 free (reg_set_table);
1222 obstack_free (®_set_obstack, NULL);
1225 /* Record REGNO in the reg_set table. */
1228 record_one_set (regno, insn)
1232 /* Allocate a new reg_set element and link it onto the list. */
1233 struct reg_set *new_reg_info;
1235 /* If the table isn't big enough, enlarge it. */
1236 if (regno >= reg_set_table_size)
1238 int new_size = regno + REG_SET_TABLE_SLOP;
1241 = (struct reg_set **) grealloc ((char *) reg_set_table,
1242 new_size * sizeof (struct reg_set *));
1243 memset ((char *) (reg_set_table + reg_set_table_size), 0,
1244 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1245 reg_set_table_size = new_size;
1248 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack,
1249 sizeof (struct reg_set));
1250 bytes_used += sizeof (struct reg_set);
1251 new_reg_info->insn = insn;
1252 new_reg_info->next = reg_set_table[regno];
1253 reg_set_table[regno] = new_reg_info;
1256 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1257 an insn. The DATA is really the instruction in which the SET is
1261 record_set_info (dest, setter, data)
1262 rtx dest, setter ATTRIBUTE_UNUSED;
1265 rtx record_set_insn = (rtx) data;
1267 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1268 record_one_set (REGNO (dest), record_set_insn);
1271 /* Scan the function and record each set of each pseudo-register.
1273 This is called once, at the start of the gcse pass. See the comments for
1274 `reg_set_table' for further documenation. */
1282 for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1284 note_stores (PATTERN (insn), record_set_info, insn);
1287 /* Hash table support. */
1289 /* For each register, the cuid of the first/last insn in the block
1290 that set it, or -1 if not set. */
1291 #define NEVER_SET -1
1293 struct reg_avail_info
1300 static struct reg_avail_info *reg_avail_info;
1301 static int current_bb;
1304 /* See whether X, the source of a set, is something we want to consider for
1311 static rtx test_insn = 0;
1312 int num_clobbers = 0;
1315 switch (GET_CODE (x))
1329 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
1330 if (general_operand (x, GET_MODE (x)))
1332 else if (GET_MODE (x) == VOIDmode)
1335 /* Otherwise, check if we can make a valid insn from it. First initialize
1336 our test insn if we haven't already. */
1340 = make_insn_raw (gen_rtx_SET (VOIDmode,
1341 gen_rtx_REG (word_mode,
1342 FIRST_PSEUDO_REGISTER * 2),
1344 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
1345 ggc_add_rtx_root (&test_insn, 1);
1348 /* Now make an insn like the one we would make when GCSE'ing and see if
1350 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
1351 SET_SRC (PATTERN (test_insn)) = x;
1352 return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0
1353 && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode)));
1356 /* Return non-zero if the operands of expression X are unchanged from the
1357 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1358 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1361 oprs_unchanged_p (x, insn, avail_p)
1372 code = GET_CODE (x);
1377 struct reg_avail_info *info = ®_avail_info[REGNO (x)];
1379 if (info->last_bb != current_bb)
1382 return info->last_set < INSN_CUID (insn);
1384 return info->first_set >= INSN_CUID (insn);
1388 if (load_killed_in_block_p (BASIC_BLOCK (current_bb), INSN_CUID (insn),
1392 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1418 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1422 /* If we are about to do the last recursive call needed at this
1423 level, change it into iteration. This function is called enough
1426 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1428 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1431 else if (fmt[i] == 'E')
1432 for (j = 0; j < XVECLEN (x, i); j++)
1433 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1440 /* Used for communication between mems_conflict_for_gcse_p and
1441 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a
1442 conflict between two memory references. */
1443 static int gcse_mems_conflict_p;
1445 /* Used for communication between mems_conflict_for_gcse_p and
1446 load_killed_in_block_p. A memory reference for a load instruction,
1447 mems_conflict_for_gcse_p will see if a memory store conflicts with
1448 this memory load. */
1449 static rtx gcse_mem_operand;
1451 /* DEST is the output of an instruction. If it is a memory reference, and
1452 possibly conflicts with the load found in gcse_mem_operand, then set
1453 gcse_mems_conflict_p to a nonzero value. */
1456 mems_conflict_for_gcse_p (dest, setter, data)
1457 rtx dest, setter ATTRIBUTE_UNUSED;
1458 void *data ATTRIBUTE_UNUSED;
1460 while (GET_CODE (dest) == SUBREG
1461 || GET_CODE (dest) == ZERO_EXTRACT
1462 || GET_CODE (dest) == SIGN_EXTRACT
1463 || GET_CODE (dest) == STRICT_LOW_PART)
1464 dest = XEXP (dest, 0);
1466 /* If DEST is not a MEM, then it will not conflict with the load. Note
1467 that function calls are assumed to clobber memory, but are handled
1469 if (GET_CODE (dest) != MEM)
1472 /* If we are setting a MEM in our list of specially recognized MEMs,
1473 don't mark as killed this time. */
1475 if (dest == gcse_mem_operand && pre_ldst_mems != NULL)
1477 if (!find_rtx_in_ldst (dest))
1478 gcse_mems_conflict_p = 1;
1482 if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
1484 gcse_mems_conflict_p = 1;
1487 /* Return nonzero if the expression in X (a memory reference) is killed
1488 in block BB before or after the insn with the CUID in UID_LIMIT.
1489 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1492 To check the entire block, set UID_LIMIT to max_uid + 1 and
1496 load_killed_in_block_p (bb, uid_limit, x, avail_p)
1502 rtx list_entry = modify_mem_list[bb->index];
1506 /* Ignore entries in the list that do not apply. */
1508 && INSN_CUID (XEXP (list_entry, 0)) < uid_limit)
1510 && INSN_CUID (XEXP (list_entry, 0)) > uid_limit))
1512 list_entry = XEXP (list_entry, 1);
1516 setter = XEXP (list_entry, 0);
1518 /* If SETTER is a call everything is clobbered. Note that calls
1519 to pure functions are never put on the list, so we need not
1520 worry about them. */
1521 if (GET_CODE (setter) == CALL_INSN)
1524 /* SETTER must be an INSN of some kind that sets memory. Call
1525 note_stores to examine each hunk of memory that is modified.
1527 The note_stores interface is pretty limited, so we have to
1528 communicate via global variables. Yuk. */
1529 gcse_mem_operand = x;
1530 gcse_mems_conflict_p = 0;
1531 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1532 if (gcse_mems_conflict_p)
1534 list_entry = XEXP (list_entry, 1);
1539 /* Return non-zero if the operands of expression X are unchanged from
1540 the start of INSN's basic block up to but not including INSN. */
1543 oprs_anticipatable_p (x, insn)
1546 return oprs_unchanged_p (x, insn, 0);
1549 /* Return non-zero if the operands of expression X are unchanged from
1550 INSN to the end of INSN's basic block. */
1553 oprs_available_p (x, insn)
1556 return oprs_unchanged_p (x, insn, 1);
1559 /* Hash expression X.
1561 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1562 indicating if a volatile operand is found or if the expression contains
1563 something we don't want to insert in the table.
1565 ??? One might want to merge this with canon_hash. Later. */
1568 hash_expr (x, mode, do_not_record_p, hash_table_size)
1570 enum machine_mode mode;
1571 int *do_not_record_p;
1572 int hash_table_size;
1576 *do_not_record_p = 0;
1578 hash = hash_expr_1 (x, mode, do_not_record_p);
1579 return hash % hash_table_size;
1582 /* Hash a string. Just add its bytes up. */
1584 static inline unsigned
1589 const unsigned char *p = (const unsigned char *) ps;
1598 /* Subroutine of hash_expr to do the actual work. */
1601 hash_expr_1 (x, mode, do_not_record_p)
1603 enum machine_mode mode;
1604 int *do_not_record_p;
1611 /* Used to turn recursion into iteration. We can't rely on GCC's
1612 tail-recursion eliminatio since we need to keep accumulating values
1619 code = GET_CODE (x);
1623 hash += ((unsigned int) REG << 7) + REGNO (x);
1627 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1628 + (unsigned int) INTVAL (x));
1632 /* This is like the general case, except that it only counts
1633 the integers representing the constant. */
1634 hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1635 if (GET_MODE (x) != VOIDmode)
1636 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1637 hash += (unsigned int) XWINT (x, i);
1639 hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1640 + (unsigned int) CONST_DOUBLE_HIGH (x));
1648 units = CONST_VECTOR_NUNITS (x);
1650 for (i = 0; i < units; ++i)
1652 elt = CONST_VECTOR_ELT (x, i);
1653 hash += hash_expr_1 (elt, GET_MODE (elt), do_not_record_p);
1659 /* Assume there is only one rtx object for any given label. */
1661 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1662 differences and differences between each stage's debugging dumps. */
1663 hash += (((unsigned int) LABEL_REF << 7)
1664 + CODE_LABEL_NUMBER (XEXP (x, 0)));
1669 /* Don't hash on the symbol's address to avoid bootstrap differences.
1670 Different hash values may cause expressions to be recorded in
1671 different orders and thus different registers to be used in the
1672 final assembler. This also avoids differences in the dump files
1673 between various stages. */
1675 const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1678 h += (h << 7) + *p++; /* ??? revisit */
1680 hash += ((unsigned int) SYMBOL_REF << 7) + h;
1685 if (MEM_VOLATILE_P (x))
1687 *do_not_record_p = 1;
1691 hash += (unsigned int) MEM;
1692 /* We used alias set for hashing, but this is not good, since the alias
1693 set may differ in -fprofile-arcs and -fbranch-probabilities compilation
1694 causing the profiles to fail to match. */
1705 case UNSPEC_VOLATILE:
1706 *do_not_record_p = 1;
1710 if (MEM_VOLATILE_P (x))
1712 *do_not_record_p = 1;
1717 /* We don't want to take the filename and line into account. */
1718 hash += (unsigned) code + (unsigned) GET_MODE (x)
1719 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x))
1720 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
1721 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
1723 if (ASM_OPERANDS_INPUT_LENGTH (x))
1725 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
1727 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i),
1728 GET_MODE (ASM_OPERANDS_INPUT (x, i)),
1730 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT
1734 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
1735 x = ASM_OPERANDS_INPUT (x, 0);
1736 mode = GET_MODE (x);
1746 hash += (unsigned) code + (unsigned) GET_MODE (x);
1747 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1751 /* If we are about to do the last recursive call
1752 needed at this level, change it into iteration.
1753 This function is called enough to be worth it. */
1760 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1761 if (*do_not_record_p)
1765 else if (fmt[i] == 'E')
1766 for (j = 0; j < XVECLEN (x, i); j++)
1768 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1769 if (*do_not_record_p)
1773 else if (fmt[i] == 's')
1774 hash += hash_string_1 (XSTR (x, i));
1775 else if (fmt[i] == 'i')
1776 hash += (unsigned int) XINT (x, i);
1784 /* Hash a set of register REGNO.
1786 Sets are hashed on the register that is set. This simplifies the PRE copy
1789 ??? May need to make things more elaborate. Later, as necessary. */
1792 hash_set (regno, hash_table_size)
1794 int hash_table_size;
1799 return hash % hash_table_size;
1802 /* Return non-zero if exp1 is equivalent to exp2.
1803 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1816 if (x == 0 || y == 0)
1819 code = GET_CODE (x);
1820 if (code != GET_CODE (y))
1823 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1824 if (GET_MODE (x) != GET_MODE (y))
1834 return INTVAL (x) == INTVAL (y);
1837 return XEXP (x, 0) == XEXP (y, 0);
1840 return XSTR (x, 0) == XSTR (y, 0);
1843 return REGNO (x) == REGNO (y);
1846 /* Can't merge two expressions in different alias sets, since we can
1847 decide that the expression is transparent in a block when it isn't,
1848 due to it being set with the different alias set. */
1849 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1853 /* For commutative operations, check both orders. */
1861 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1862 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1863 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1864 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1867 /* We don't use the generic code below because we want to
1868 disregard filename and line numbers. */
1870 /* A volatile asm isn't equivalent to any other. */
1871 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y))
1874 if (GET_MODE (x) != GET_MODE (y)
1875 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y))
1876 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x),
1877 ASM_OPERANDS_OUTPUT_CONSTRAINT (y))
1878 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y)
1879 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y))
1882 if (ASM_OPERANDS_INPUT_LENGTH (x))
1884 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
1885 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i),
1886 ASM_OPERANDS_INPUT (y, i))
1887 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i),
1888 ASM_OPERANDS_INPUT_CONSTRAINT (y, i)))
1898 /* Compare the elements. If any pair of corresponding elements
1899 fail to match, return 0 for the whole thing. */
1901 fmt = GET_RTX_FORMAT (code);
1902 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1907 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1912 if (XVECLEN (x, i) != XVECLEN (y, i))
1914 for (j = 0; j < XVECLEN (x, i); j++)
1915 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1920 if (strcmp (XSTR (x, i), XSTR (y, i)))
1925 if (XINT (x, i) != XINT (y, i))
1930 if (XWINT (x, i) != XWINT (y, i))
1945 /* Insert expression X in INSN in the hash table.
1946 If it is already present, record it as the last occurrence in INSN's
1949 MODE is the mode of the value X is being stored into.
1950 It is only used if X is a CONST_INT.
1952 ANTIC_P is non-zero if X is an anticipatable expression.
1953 AVAIL_P is non-zero if X is an available expression. */
1956 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1958 enum machine_mode mode;
1960 int antic_p, avail_p;
1962 int found, do_not_record_p;
1964 struct expr *cur_expr, *last_expr = NULL;
1965 struct occr *antic_occr, *avail_occr;
1966 struct occr *last_occr = NULL;
1968 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1970 /* Do not insert expression in table if it contains volatile operands,
1971 or if hash_expr determines the expression is something we don't want
1972 to or can't handle. */
1973 if (do_not_record_p)
1976 cur_expr = expr_hash_table[hash];
1979 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1981 /* If the expression isn't found, save a pointer to the end of
1983 last_expr = cur_expr;
1984 cur_expr = cur_expr->next_same_hash;
1989 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1990 bytes_used += sizeof (struct expr);
1991 if (expr_hash_table[hash] == NULL)
1992 /* This is the first pattern that hashed to this index. */
1993 expr_hash_table[hash] = cur_expr;
1995 /* Add EXPR to end of this hash chain. */
1996 last_expr->next_same_hash = cur_expr;
1998 /* Set the fields of the expr element. */
2000 cur_expr->bitmap_index = n_exprs++;
2001 cur_expr->next_same_hash = NULL;
2002 cur_expr->antic_occr = NULL;
2003 cur_expr->avail_occr = NULL;
2006 /* Now record the occurrence(s). */
2009 antic_occr = cur_expr->antic_occr;
2011 /* Search for another occurrence in the same basic block. */
2012 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
2014 /* If an occurrence isn't found, save a pointer to the end of
2016 last_occr = antic_occr;
2017 antic_occr = antic_occr->next;
2021 /* Found another instance of the expression in the same basic block.
2022 Prefer the currently recorded one. We want the first one in the
2023 block and the block is scanned from start to end. */
2024 ; /* nothing to do */
2027 /* First occurrence of this expression in this basic block. */
2028 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2029 bytes_used += sizeof (struct occr);
2030 /* First occurrence of this expression in any block? */
2031 if (cur_expr->antic_occr == NULL)
2032 cur_expr->antic_occr = antic_occr;
2034 last_occr->next = antic_occr;
2036 antic_occr->insn = insn;
2037 antic_occr->next = NULL;
2043 avail_occr = cur_expr->avail_occr;
2045 /* Search for another occurrence in the same basic block. */
2046 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
2048 /* If an occurrence isn't found, save a pointer to the end of
2050 last_occr = avail_occr;
2051 avail_occr = avail_occr->next;
2055 /* Found another instance of the expression in the same basic block.
2056 Prefer this occurrence to the currently recorded one. We want
2057 the last one in the block and the block is scanned from start
2059 avail_occr->insn = insn;
2062 /* First occurrence of this expression in this basic block. */
2063 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2064 bytes_used += sizeof (struct occr);
2066 /* First occurrence of this expression in any block? */
2067 if (cur_expr->avail_occr == NULL)
2068 cur_expr->avail_occr = avail_occr;
2070 last_occr->next = avail_occr;
2072 avail_occr->insn = insn;
2073 avail_occr->next = NULL;
2078 /* Insert pattern X in INSN in the hash table.
2079 X is a SET of a reg to either another reg or a constant.
2080 If it is already present, record it as the last occurrence in INSN's
2084 insert_set_in_table (x, insn)
2090 struct expr *cur_expr, *last_expr = NULL;
2091 struct occr *cur_occr, *last_occr = NULL;
2093 if (GET_CODE (x) != SET
2094 || GET_CODE (SET_DEST (x)) != REG)
2097 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
2099 cur_expr = set_hash_table[hash];
2102 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
2104 /* If the expression isn't found, save a pointer to the end of
2106 last_expr = cur_expr;
2107 cur_expr = cur_expr->next_same_hash;
2112 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
2113 bytes_used += sizeof (struct expr);
2114 if (set_hash_table[hash] == NULL)
2115 /* This is the first pattern that hashed to this index. */
2116 set_hash_table[hash] = cur_expr;
2118 /* Add EXPR to end of this hash chain. */
2119 last_expr->next_same_hash = cur_expr;
2121 /* Set the fields of the expr element.
2122 We must copy X because it can be modified when copy propagation is
2123 performed on its operands. */
2124 cur_expr->expr = copy_rtx (x);
2125 cur_expr->bitmap_index = n_sets++;
2126 cur_expr->next_same_hash = NULL;
2127 cur_expr->antic_occr = NULL;
2128 cur_expr->avail_occr = NULL;
2131 /* Now record the occurrence. */
2132 cur_occr = cur_expr->avail_occr;
2134 /* Search for another occurrence in the same basic block. */
2135 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
2137 /* If an occurrence isn't found, save a pointer to the end of
2139 last_occr = cur_occr;
2140 cur_occr = cur_occr->next;
2144 /* Found another instance of the expression in the same basic block.
2145 Prefer this occurrence to the currently recorded one. We want the
2146 last one in the block and the block is scanned from start to end. */
2147 cur_occr->insn = insn;
2150 /* First occurrence of this expression in this basic block. */
2151 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
2152 bytes_used += sizeof (struct occr);
2154 /* First occurrence of this expression in any block? */
2155 if (cur_expr->avail_occr == NULL)
2156 cur_expr->avail_occr = cur_occr;
2158 last_occr->next = cur_occr;
2160 cur_occr->insn = insn;
2161 cur_occr->next = NULL;
2165 /* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is
2166 non-zero, this is for the assignment hash table, otherwise it is for the
2167 expression hash table. */
2170 hash_scan_set (pat, insn, set_p)
2174 rtx src = SET_SRC (pat);
2175 rtx dest = SET_DEST (pat);
2178 if (GET_CODE (src) == CALL)
2179 hash_scan_call (src, insn);
2181 else if (GET_CODE (dest) == REG)
2183 unsigned int regno = REGNO (dest);
2186 /* If this is a single set and we are doing constant propagation,
2187 see if a REG_NOTE shows this equivalent to a constant. */
2188 if (set_p && (note = find_reg_equal_equiv_note (insn)) != 0
2189 && CONSTANT_P (XEXP (note, 0)))
2190 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
2192 /* Only record sets of pseudo-regs in the hash table. */
2194 && regno >= FIRST_PSEUDO_REGISTER
2195 /* Don't GCSE something if we can't do a reg/reg copy. */
2196 && can_copy_p [GET_MODE (dest)]
2197 /* GCSE commonly inserts instruction after the insn. We can't
2198 do that easily for EH_REGION notes so disable GCSE on these
2200 && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)
2201 /* Is SET_SRC something we want to gcse? */
2202 && want_to_gcse_p (src)
2203 /* Don't CSE a nop. */
2204 && ! set_noop_p (pat)
2205 /* Don't GCSE if it has attached REG_EQUIV note.
2206 At this point this only function parameters should have
2207 REG_EQUIV notes and if the argument slot is used somewhere
2208 explicitly, it means address of parameter has been taken,
2209 so we should not extend the lifetime of the pseudo. */
2210 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
2211 || GET_CODE (XEXP (note, 0)) != MEM))
2213 /* An expression is not anticipatable if its operands are
2214 modified before this insn or if this is not the only SET in
2216 int antic_p = oprs_anticipatable_p (src, insn) && single_set (insn);
2217 /* An expression is not available if its operands are
2218 subsequently modified, including this insn. It's also not
2219 available if this is a branch, because we can't insert
2220 a set after the branch. */
2221 int avail_p = (oprs_available_p (src, insn)
2222 && ! JUMP_P (insn));
2224 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
2227 /* Record sets for constant/copy propagation. */
2229 && regno >= FIRST_PSEUDO_REGISTER
2230 && ((GET_CODE (src) == REG
2231 && REGNO (src) >= FIRST_PSEUDO_REGISTER
2232 && can_copy_p [GET_MODE (dest)]
2233 && REGNO (src) != regno)
2234 || CONSTANT_P (src))
2235 /* A copy is not available if its src or dest is subsequently
2236 modified. Here we want to search from INSN+1 on, but
2237 oprs_available_p searches from INSN on. */
2238 && (insn == BLOCK_END (BLOCK_NUM (insn))
2239 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
2240 && oprs_available_p (pat, tmp))))
2241 insert_set_in_table (pat, insn);
2246 hash_scan_clobber (x, insn)
2247 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2249 /* Currently nothing to do. */
2253 hash_scan_call (x, insn)
2254 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
2256 /* Currently nothing to do. */
2259 /* Process INSN and add hash table entries as appropriate.
2261 Only available expressions that set a single pseudo-reg are recorded.
2263 Single sets in a PARALLEL could be handled, but it's an extra complication
2264 that isn't dealt with right now. The trick is handling the CLOBBERs that
2265 are also in the PARALLEL. Later.
2267 If SET_P is non-zero, this is for the assignment hash table,
2268 otherwise it is for the expression hash table.
2269 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2270 not record any expressions. */
2273 hash_scan_insn (insn, set_p, in_libcall_block)
2276 int in_libcall_block;
2278 rtx pat = PATTERN (insn);
2281 if (in_libcall_block)
2284 /* Pick out the sets of INSN and for other forms of instructions record
2285 what's been modified. */
2287 if (GET_CODE (pat) == SET)
2288 hash_scan_set (pat, insn, set_p);
2289 else if (GET_CODE (pat) == PARALLEL)
2290 for (i = 0; i < XVECLEN (pat, 0); i++)
2292 rtx x = XVECEXP (pat, 0, i);
2294 if (GET_CODE (x) == SET)
2295 hash_scan_set (x, insn, set_p);
2296 else if (GET_CODE (x) == CLOBBER)
2297 hash_scan_clobber (x, insn);
2298 else if (GET_CODE (x) == CALL)
2299 hash_scan_call (x, insn);
2302 else if (GET_CODE (pat) == CLOBBER)
2303 hash_scan_clobber (pat, insn);
2304 else if (GET_CODE (pat) == CALL)
2305 hash_scan_call (pat, insn);
2309 dump_hash_table (file, name, table, table_size, total_size)
2312 struct expr **table;
2313 int table_size, total_size;
2316 /* Flattened out table, so it's printed in proper order. */
2317 struct expr **flat_table;
2318 unsigned int *hash_val;
2322 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2323 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
2325 for (i = 0; i < table_size; i++)
2326 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2328 flat_table[expr->bitmap_index] = expr;
2329 hash_val[expr->bitmap_index] = i;
2332 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2333 name, table_size, total_size);
2335 for (i = 0; i < total_size; i++)
2336 if (flat_table[i] != 0)
2338 expr = flat_table[i];
2339 fprintf (file, "Index %d (hash value %d)\n ",
2340 expr->bitmap_index, hash_val[i]);
2341 print_rtl (file, expr->expr);
2342 fprintf (file, "\n");
2345 fprintf (file, "\n");
2351 /* Record register first/last/block set information for REGNO in INSN.
2353 first_set records the first place in the block where the register
2354 is set and is used to compute "anticipatability".
2356 last_set records the last place in the block where the register
2357 is set and is used to compute "availability".
2359 last_bb records the block for which first_set and last_set are
2360 valid, as a quick test to invalidate them.
2362 reg_set_in_block records whether the register is set in the block
2363 and is used to compute "transparency". */
2366 record_last_reg_set_info (insn, regno)
2370 struct reg_avail_info *info = ®_avail_info[regno];
2371 int cuid = INSN_CUID (insn);
2373 info->last_set = cuid;
2374 if (info->last_bb != current_bb)
2376 info->last_bb = current_bb;
2377 info->first_set = cuid;
2378 SET_BIT (reg_set_in_block[current_bb], regno);
2383 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
2384 Note we store a pair of elements in the list, so they have to be
2385 taken off pairwise. */
2388 canon_list_insert (dest, unused1, v_insn)
2389 rtx dest ATTRIBUTE_UNUSED;
2390 rtx unused1 ATTRIBUTE_UNUSED;
2393 rtx dest_addr, insn;
2396 while (GET_CODE (dest) == SUBREG
2397 || GET_CODE (dest) == ZERO_EXTRACT
2398 || GET_CODE (dest) == SIGN_EXTRACT
2399 || GET_CODE (dest) == STRICT_LOW_PART)
2400 dest = XEXP (dest, 0);
2402 /* If DEST is not a MEM, then it will not conflict with a load. Note
2403 that function calls are assumed to clobber memory, but are handled
2406 if (GET_CODE (dest) != MEM)
2409 dest_addr = get_addr (XEXP (dest, 0));
2410 dest_addr = canon_rtx (dest_addr);
2411 insn = (rtx) v_insn;
2412 bb = BLOCK_NUM (insn);
2414 canon_modify_mem_list[bb] =
2415 alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
2416 canon_modify_mem_list[bb] =
2417 alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
2418 bitmap_set_bit (canon_modify_mem_list_set, bb);
2421 /* Record memory modification information for INSN. We do not actually care
2422 about the memory location(s) that are set, or even how they are set (consider
2423 a CALL_INSN). We merely need to record which insns modify memory. */
2426 record_last_mem_set_info (insn)
2429 int bb = BLOCK_NUM (insn);
2431 /* load_killed_in_block_p will handle the case of calls clobbering
2433 modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
2434 bitmap_set_bit (modify_mem_list_set, bb);
2436 if (GET_CODE (insn) == CALL_INSN)
2438 /* Note that traversals of this loop (other than for free-ing)
2439 will break after encountering a CALL_INSN. So, there's no
2440 need to insert a pair of items, as canon_list_insert does. */
2441 canon_modify_mem_list[bb] =
2442 alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
2443 bitmap_set_bit (canon_modify_mem_list_set, bb);
2446 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
2449 /* Called from compute_hash_table via note_stores to handle one
2450 SET or CLOBBER in an insn. DATA is really the instruction in which
2451 the SET is taking place. */
2454 record_last_set_info (dest, setter, data)
2455 rtx dest, setter ATTRIBUTE_UNUSED;
2458 rtx last_set_insn = (rtx) data;
2460 if (GET_CODE (dest) == SUBREG)
2461 dest = SUBREG_REG (dest);
2463 if (GET_CODE (dest) == REG)
2464 record_last_reg_set_info (last_set_insn, REGNO (dest));
2465 else if (GET_CODE (dest) == MEM
2466 /* Ignore pushes, they clobber nothing. */
2467 && ! push_operand (dest, GET_MODE (dest)))
2468 record_last_mem_set_info (last_set_insn);
2471 /* Top level function to create an expression or assignment hash table.
2473 Expression entries are placed in the hash table if
2474 - they are of the form (set (pseudo-reg) src),
2475 - src is something we want to perform GCSE on,
2476 - none of the operands are subsequently modified in the block
2478 Assignment entries are placed in the hash table if
2479 - they are of the form (set (pseudo-reg) src),
2480 - src is something we want to perform const/copy propagation on,
2481 - none of the operands or target are subsequently modified in the block
2483 Currently src must be a pseudo-reg or a const_int.
2485 F is the first insn.
2486 SET_P is non-zero for computing the assignment hash table. */
2489 compute_hash_table (set_p)
2494 /* While we compute the hash table we also compute a bit array of which
2495 registers are set in which blocks.
2496 ??? This isn't needed during const/copy propagation, but it's cheap to
2498 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2500 /* re-Cache any INSN_LIST nodes we have allocated. */
2501 clear_modify_mem_tables ();
2502 /* Some working arrays used to track first and last set in each block. */
2503 reg_avail_info = (struct reg_avail_info*)
2504 gmalloc (max_gcse_regno * sizeof (struct reg_avail_info));
2506 for (i = 0; i < max_gcse_regno; ++i)
2507 reg_avail_info[i].last_bb = NEVER_SET;
2509 for (current_bb = 0; current_bb < n_basic_blocks; current_bb++)
2513 int in_libcall_block;
2515 /* First pass over the instructions records information used to
2516 determine when registers and memory are first and last set.
2517 ??? hard-reg reg_set_in_block computation
2518 could be moved to compute_sets since they currently don't change. */
2520 for (insn = BLOCK_HEAD (current_bb);
2521 insn && insn != NEXT_INSN (BLOCK_END (current_bb));
2522 insn = NEXT_INSN (insn))
2524 if (! INSN_P (insn))
2527 if (GET_CODE (insn) == CALL_INSN)
2529 bool clobbers_all = false;
2530 #ifdef NON_SAVING_SETJMP
2531 if (NON_SAVING_SETJMP
2532 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
2533 clobbers_all = true;
2536 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2538 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
2539 record_last_reg_set_info (insn, regno);
2544 note_stores (PATTERN (insn), record_last_set_info, insn);
2547 /* The next pass builds the hash table. */
2549 for (insn = BLOCK_HEAD (current_bb), in_libcall_block = 0;
2550 insn && insn != NEXT_INSN (BLOCK_END (current_bb));
2551 insn = NEXT_INSN (insn))
2554 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2555 in_libcall_block = 1;
2556 else if (set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2557 in_libcall_block = 0;
2558 hash_scan_insn (insn, set_p, in_libcall_block);
2559 if (!set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX))
2560 in_libcall_block = 0;
2564 free (reg_avail_info);
2565 reg_avail_info = NULL;
2568 /* Allocate space for the set hash table.
2569 N_INSNS is the number of instructions in the function.
2570 It is used to determine the number of buckets to use. */
2573 alloc_set_hash_table (n_insns)
2578 set_hash_table_size = n_insns / 4;
2579 if (set_hash_table_size < 11)
2580 set_hash_table_size = 11;
2582 /* Attempt to maintain efficient use of hash table.
2583 Making it an odd number is simplest for now.
2584 ??? Later take some measurements. */
2585 set_hash_table_size |= 1;
2586 n = set_hash_table_size * sizeof (struct expr *);
2587 set_hash_table = (struct expr **) gmalloc (n);
2590 /* Free things allocated by alloc_set_hash_table. */
2593 free_set_hash_table ()
2595 free (set_hash_table);
2598 /* Compute the hash table for doing copy/const propagation. */
2601 compute_set_hash_table ()
2603 /* Initialize count of number of entries in hash table. */
2605 memset ((char *) set_hash_table, 0,
2606 set_hash_table_size * sizeof (struct expr *));
2608 compute_hash_table (1);
2611 /* Allocate space for the expression hash table.
2612 N_INSNS is the number of instructions in the function.
2613 It is used to determine the number of buckets to use. */
2616 alloc_expr_hash_table (n_insns)
2617 unsigned int n_insns;
2621 expr_hash_table_size = n_insns / 2;
2622 /* Make sure the amount is usable. */
2623 if (expr_hash_table_size < 11)
2624 expr_hash_table_size = 11;
2626 /* Attempt to maintain efficient use of hash table.
2627 Making it an odd number is simplest for now.
2628 ??? Later take some measurements. */
2629 expr_hash_table_size |= 1;
2630 n = expr_hash_table_size * sizeof (struct expr *);
2631 expr_hash_table = (struct expr **) gmalloc (n);
2634 /* Free things allocated by alloc_expr_hash_table. */
2637 free_expr_hash_table ()
2639 free (expr_hash_table);
2642 /* Compute the hash table for doing GCSE. */
2645 compute_expr_hash_table ()
2647 /* Initialize count of number of entries in hash table. */
2649 memset ((char *) expr_hash_table, 0,
2650 expr_hash_table_size * sizeof (struct expr *));
2652 compute_hash_table (0);
2655 /* Expression tracking support. */
2657 /* Lookup pattern PAT in the expression table.
2658 The result is a pointer to the table entry, or NULL if not found. */
2660 static struct expr *
2664 int do_not_record_p;
2665 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2666 expr_hash_table_size);
2669 if (do_not_record_p)
2672 expr = expr_hash_table[hash];
2674 while (expr && ! expr_equiv_p (expr->expr, pat))
2675 expr = expr->next_same_hash;
2680 /* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that
2681 matches it, otherwise return the first entry for REGNO. The result is a
2682 pointer to the table entry, or NULL if not found. */
2684 static struct expr *
2685 lookup_set (regno, pat)
2689 unsigned int hash = hash_set (regno, set_hash_table_size);
2692 expr = set_hash_table[hash];
2696 while (expr && ! expr_equiv_p (expr->expr, pat))
2697 expr = expr->next_same_hash;
2701 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2702 expr = expr->next_same_hash;
2708 /* Return the next entry for REGNO in list EXPR. */
2710 static struct expr *
2711 next_set (regno, expr)
2716 expr = expr->next_same_hash;
2717 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2722 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
2723 types may be mixed. */
2726 free_insn_expr_list_list (listp)
2731 for (list = *listp; list ; list = next)
2733 next = XEXP (list, 1);
2734 if (GET_CODE (list) == EXPR_LIST)
2735 free_EXPR_LIST_node (list);
2737 free_INSN_LIST_node (list);
2743 /* Clear canon_modify_mem_list and modify_mem_list tables. */
2745 clear_modify_mem_tables ()
2749 EXECUTE_IF_SET_IN_BITMAP
2750 (modify_mem_list_set, 0, i, free_INSN_LIST_list (modify_mem_list + i));
2751 bitmap_clear (modify_mem_list_set);
2753 EXECUTE_IF_SET_IN_BITMAP
2754 (canon_modify_mem_list_set, 0, i,
2755 free_insn_expr_list_list (canon_modify_mem_list + i));
2756 bitmap_clear (canon_modify_mem_list_set);
2759 /* Release memory used by modify_mem_list_set and canon_modify_mem_list_set. */
2762 free_modify_mem_tables ()
2764 clear_modify_mem_tables ();
2765 free (modify_mem_list);
2766 free (canon_modify_mem_list);
2767 modify_mem_list = 0;
2768 canon_modify_mem_list = 0;
2771 /* Reset tables used to keep track of what's still available [since the
2772 start of the block]. */
2775 reset_opr_set_tables ()
2777 /* Maintain a bitmap of which regs have been set since beginning of
2779 CLEAR_REG_SET (reg_set_bitmap);
2781 /* Also keep a record of the last instruction to modify memory.
2782 For now this is very trivial, we only record whether any memory
2783 location has been modified. */
2784 clear_modify_mem_tables ();
2787 /* Return non-zero if the operands of X are not set before INSN in
2788 INSN's basic block. */
2791 oprs_not_set_p (x, insn)
2801 code = GET_CODE (x);
2817 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
2818 INSN_CUID (insn), x, 0))
2821 return oprs_not_set_p (XEXP (x, 0), insn);
2824 return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
2830 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2834 /* If we are about to do the last recursive call
2835 needed at this level, change it into iteration.
2836 This function is called enough to be worth it. */
2838 return oprs_not_set_p (XEXP (x, i), insn);
2840 if (! oprs_not_set_p (XEXP (x, i), insn))
2843 else if (fmt[i] == 'E')
2844 for (j = 0; j < XVECLEN (x, i); j++)
2845 if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2852 /* Mark things set by a CALL. */
2858 if (! CONST_OR_PURE_CALL_P (insn))
2859 record_last_mem_set_info (insn);
2862 /* Mark things set by a SET. */
2865 mark_set (pat, insn)
2868 rtx dest = SET_DEST (pat);
2870 while (GET_CODE (dest) == SUBREG
2871 || GET_CODE (dest) == ZERO_EXTRACT
2872 || GET_CODE (dest) == SIGN_EXTRACT
2873 || GET_CODE (dest) == STRICT_LOW_PART)
2874 dest = XEXP (dest, 0);
2876 if (GET_CODE (dest) == REG)
2877 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
2878 else if (GET_CODE (dest) == MEM)
2879 record_last_mem_set_info (insn);
2881 if (GET_CODE (SET_SRC (pat)) == CALL)
2885 /* Record things set by a CLOBBER. */
2888 mark_clobber (pat, insn)
2891 rtx clob = XEXP (pat, 0);
2893 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2894 clob = XEXP (clob, 0);
2896 if (GET_CODE (clob) == REG)
2897 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
2899 record_last_mem_set_info (insn);
2902 /* Record things set by INSN.
2903 This data is used by oprs_not_set_p. */
2906 mark_oprs_set (insn)
2909 rtx pat = PATTERN (insn);
2912 if (GET_CODE (pat) == SET)
2913 mark_set (pat, insn);
2914 else if (GET_CODE (pat) == PARALLEL)
2915 for (i = 0; i < XVECLEN (pat, 0); i++)
2917 rtx x = XVECEXP (pat, 0, i);
2919 if (GET_CODE (x) == SET)
2921 else if (GET_CODE (x) == CLOBBER)
2922 mark_clobber (x, insn);
2923 else if (GET_CODE (x) == CALL)
2927 else if (GET_CODE (pat) == CLOBBER)
2928 mark_clobber (pat, insn);
2929 else if (GET_CODE (pat) == CALL)
2934 /* Classic GCSE reaching definition support. */
2936 /* Allocate reaching def variables. */
2939 alloc_rd_mem (n_blocks, n_insns)
2940 int n_blocks, n_insns;
2942 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2943 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2945 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2946 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2948 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2949 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2951 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2952 sbitmap_vector_zero (rd_out, n_basic_blocks);
2955 /* Free reaching def variables. */
2960 sbitmap_vector_free (rd_kill);
2961 sbitmap_vector_free (rd_gen);
2962 sbitmap_vector_free (reaching_defs);
2963 sbitmap_vector_free (rd_out);
2966 /* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */
2969 handle_rd_kill_set (insn, regno, bb)
2974 struct reg_set *this_reg;
2976 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2977 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2978 SET_BIT (rd_kill[bb->index], INSN_CUID (this_reg->insn));
2981 /* Compute the set of kill's for reaching definitions. */
2991 For each set bit in `gen' of the block (i.e each insn which
2992 generates a definition in the block)
2993 Call the reg set by the insn corresponding to that bit regx
2994 Look at the linked list starting at reg_set_table[regx]
2995 For each setting of regx in the linked list, which is not in
2997 Set the bit in `kill' corresponding to that insn. */
2998 for (bb = 0; bb < n_basic_blocks; bb++)
2999 for (cuid = 0; cuid < max_cuid; cuid++)
3000 if (TEST_BIT (rd_gen[bb], cuid))
3002 rtx insn = CUID_INSN (cuid);
3003 rtx pat = PATTERN (insn);
3005 if (GET_CODE (insn) == CALL_INSN)
3007 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3008 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
3009 handle_rd_kill_set (insn, regno, BASIC_BLOCK (bb));
3012 if (GET_CODE (pat) == PARALLEL)
3014 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3016 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
3018 if ((code == SET || code == CLOBBER)
3019 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
3020 handle_rd_kill_set (insn,
3021 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
3025 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
3026 /* Each setting of this register outside of this block
3027 must be marked in the set of kills in this block. */
3028 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), BASIC_BLOCK (bb));
3032 /* Compute the reaching definitions as in
3033 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
3034 Chapter 10. It is the same algorithm as used for computing available
3035 expressions but applied to the gens and kills of reaching definitions. */
3040 int bb, changed, passes;
3042 for (bb = 0; bb < n_basic_blocks; bb++)
3043 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
3050 for (bb = 0; bb < n_basic_blocks; bb++)
3052 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
3053 changed |= sbitmap_union_of_diff_cg (rd_out[bb], rd_gen[bb],
3054 reaching_defs[bb], rd_kill[bb]);
3060 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
3063 /* Classic GCSE available expression support. */
3065 /* Allocate memory for available expression computation. */
3068 alloc_avail_expr_mem (n_blocks, n_exprs)
3069 int n_blocks, n_exprs;
3071 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3072 sbitmap_vector_zero (ae_kill, n_basic_blocks);
3074 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3075 sbitmap_vector_zero (ae_gen, n_basic_blocks);
3077 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3078 sbitmap_vector_zero (ae_in, n_basic_blocks);
3080 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
3081 sbitmap_vector_zero (ae_out, n_basic_blocks);
3085 free_avail_expr_mem ()
3087 sbitmap_vector_free (ae_kill);
3088 sbitmap_vector_free (ae_gen);
3089 sbitmap_vector_free (ae_in);
3090 sbitmap_vector_free (ae_out);
3093 /* Compute the set of available expressions generated in each basic block. */
3102 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
3103 This is all we have to do because an expression is not recorded if it
3104 is not available, and the only expressions we want to work with are the
3105 ones that are recorded. */
3106 for (i = 0; i < expr_hash_table_size; i++)
3107 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
3108 for (occr = expr->avail_occr; occr != 0; occr = occr->next)
3109 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
3112 /* Return non-zero if expression X is killed in BB. */
3115 expr_killed_p (x, bb)
3126 code = GET_CODE (x);
3130 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x));
3133 if (load_killed_in_block_p (bb, get_max_uid () + 1, x, 0))
3136 return expr_killed_p (XEXP (x, 0), bb);
3154 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3158 /* If we are about to do the last recursive call
3159 needed at this level, change it into iteration.
3160 This function is called enough to be worth it. */
3162 return expr_killed_p (XEXP (x, i), bb);
3163 else if (expr_killed_p (XEXP (x, i), bb))
3166 else if (fmt[i] == 'E')
3167 for (j = 0; j < XVECLEN (x, i); j++)
3168 if (expr_killed_p (XVECEXP (x, i, j), bb))
3175 /* Compute the set of available expressions killed in each basic block. */
3178 compute_ae_kill (ae_gen, ae_kill)
3179 sbitmap *ae_gen, *ae_kill;
3185 for (bb = 0; bb < n_basic_blocks; bb++)
3186 for (i = 0; i < expr_hash_table_size; i++)
3187 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
3189 /* Skip EXPR if generated in this block. */
3190 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
3193 if (expr_killed_p (expr->expr, BASIC_BLOCK (bb)))
3194 SET_BIT (ae_kill[bb], expr->bitmap_index);
3198 /* Actually perform the Classic GCSE optimizations. */
3200 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
3202 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
3203 as a positive reach. We want to do this when there are two computations
3204 of the expression in the block.
3206 VISITED is a pointer to a working buffer for tracking which BB's have
3207 been visited. It is NULL for the top-level call.
3209 We treat reaching expressions that go through blocks containing the same
3210 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
3211 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3212 2 as not reaching. The intent is to improve the probability of finding
3213 only one reaching expression and to reduce register lifetimes by picking
3214 the closest such expression. */
3217 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
3221 int check_self_loop;
3226 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
3228 basic_block pred_bb = pred->src;
3230 if (visited[pred_bb->index])
3231 /* This predecessor has already been visited. Nothing to do. */
3233 else if (pred_bb == bb)
3235 /* BB loops on itself. */
3237 && TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index)
3238 && BLOCK_NUM (occr->insn) == pred_bb->index)
3241 visited[pred_bb->index] = 1;
3244 /* Ignore this predecessor if it kills the expression. */
3245 else if (TEST_BIT (ae_kill[pred_bb->index], expr->bitmap_index))
3246 visited[pred_bb->index] = 1;
3248 /* Does this predecessor generate this expression? */
3249 else if (TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index))
3251 /* Is this the occurrence we're looking for?
3252 Note that there's only one generating occurrence per block
3253 so we just need to check the block number. */
3254 if (BLOCK_NUM (occr->insn) == pred_bb->index)
3257 visited[pred_bb->index] = 1;
3260 /* Neither gen nor kill. */
3263 visited[pred_bb->index] = 1;
3264 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
3271 /* All paths have been checked. */
3275 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
3276 memory allocated for that function is returned. */
3279 expr_reaches_here_p (occr, expr, bb, check_self_loop)
3283 int check_self_loop;
3286 char *visited = (char *) xcalloc (n_basic_blocks, 1);
3288 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
3294 /* Return the instruction that computes EXPR that reaches INSN's basic block.
3295 If there is more than one such instruction, return NULL.
3297 Called only by handle_avail_expr. */
3300 computing_insn (expr, insn)
3304 basic_block bb = BLOCK_FOR_INSN (insn);
3306 if (expr->avail_occr->next == NULL)
3308 if (BLOCK_FOR_INSN (expr->avail_occr->insn) == bb)
3309 /* The available expression is actually itself
3310 (i.e. a loop in the flow graph) so do nothing. */
3313 /* (FIXME) Case that we found a pattern that was created by
3314 a substitution that took place. */
3315 return expr->avail_occr->insn;
3319 /* Pattern is computed more than once.
3320 Search backwards from this insn to see how many of these
3321 computations actually reach this insn. */
3323 rtx insn_computes_expr = NULL;
3326 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3328 if (BLOCK_FOR_INSN (occr->insn) == bb)
3330 /* The expression is generated in this block.
3331 The only time we care about this is when the expression
3332 is generated later in the block [and thus there's a loop].
3333 We let the normal cse pass handle the other cases. */
3334 if (INSN_CUID (insn) < INSN_CUID (occr->insn)
3335 && expr_reaches_here_p (occr, expr, bb, 1))
3341 insn_computes_expr = occr->insn;
3344 else if (expr_reaches_here_p (occr, expr, bb, 0))
3350 insn_computes_expr = occr->insn;
3354 if (insn_computes_expr == NULL)
3357 return insn_computes_expr;
3361 /* Return non-zero if the definition in DEF_INSN can reach INSN.
3362 Only called by can_disregard_other_sets. */
3365 def_reaches_here_p (insn, def_insn)
3370 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3373 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3375 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3377 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3379 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3380 reg = XEXP (PATTERN (def_insn), 0);
3381 else if (GET_CODE (PATTERN (def_insn)) == SET)
3382 reg = SET_DEST (PATTERN (def_insn));
3386 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3395 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The
3396 value returned is the number of definitions that reach INSN. Returning a
3397 value of zero means that [maybe] more than one definition reaches INSN and
3398 the caller can't perform whatever optimization it is trying. i.e. it is
3399 always safe to return zero. */
3402 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3403 struct reg_set **addr_this_reg;
3407 int number_of_reaching_defs = 0;
3408 struct reg_set *this_reg;
3410 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
3411 if (def_reaches_here_p (insn, this_reg->insn))
3413 number_of_reaching_defs++;
3414 /* Ignore parallels for now. */
3415 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3419 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3420 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3421 SET_SRC (PATTERN (insn)))))
3422 /* A setting of the reg to a different value reaches INSN. */
3425 if (number_of_reaching_defs > 1)
3427 /* If in this setting the value the register is being set to is
3428 equal to the previous value the register was set to and this
3429 setting reaches the insn we are trying to do the substitution
3430 on then we are ok. */
3431 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3433 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3434 SET_SRC (PATTERN (insn))))
3438 *addr_this_reg = this_reg;
3441 return number_of_reaching_defs;
3444 /* Expression computed by insn is available and the substitution is legal,
3445 so try to perform the substitution.
3447 The result is non-zero if any changes were made. */
3450 handle_avail_expr (insn, expr)
3454 rtx pat, insn_computes_expr, expr_set;
3456 struct reg_set *this_reg;
3457 int found_setting, use_src;
3460 /* We only handle the case where one computation of the expression
3461 reaches this instruction. */
3462 insn_computes_expr = computing_insn (expr, insn);
3463 if (insn_computes_expr == NULL)
3465 expr_set = single_set (insn_computes_expr);
3472 /* At this point we know only one computation of EXPR outside of this
3473 block reaches this insn. Now try to find a register that the
3474 expression is computed into. */
3475 if (GET_CODE (SET_SRC (expr_set)) == REG)
3477 /* This is the case when the available expression that reaches
3478 here has already been handled as an available expression. */
3479 unsigned int regnum_for_replacing
3480 = REGNO (SET_SRC (expr_set));
3482 /* If the register was created by GCSE we can't use `reg_set_table',
3483 however we know it's set only once. */
3484 if (regnum_for_replacing >= max_gcse_regno
3485 /* If the register the expression is computed into is set only once,
3486 or only one set reaches this insn, we can use it. */
3487 || (((this_reg = reg_set_table[regnum_for_replacing]),
3488 this_reg->next == NULL)
3489 || can_disregard_other_sets (&this_reg, insn, 0)))
3498 unsigned int regnum_for_replacing
3499 = REGNO (SET_DEST (expr_set));
3501 /* This shouldn't happen. */
3502 if (regnum_for_replacing >= max_gcse_regno)
3505 this_reg = reg_set_table[regnum_for_replacing];
3507 /* If the register the expression is computed into is set only once,
3508 or only one set reaches this insn, use it. */
3509 if (this_reg->next == NULL
3510 || can_disregard_other_sets (&this_reg, insn, 0))
3516 pat = PATTERN (insn);
3518 to = SET_SRC (expr_set);
3520 to = SET_DEST (expr_set);
3521 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3523 /* We should be able to ignore the return code from validate_change but
3524 to play it safe we check. */
3528 if (gcse_file != NULL)
3530 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3532 fprintf (gcse_file, " reg %d %s insn %d\n",
3533 REGNO (to), use_src ? "from" : "set in",
3534 INSN_UID (insn_computes_expr));
3539 /* The register that the expr is computed into is set more than once. */
3540 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3542 /* Insert an insn after insnx that copies the reg set in insnx
3543 into a new pseudo register call this new register REGN.
3544 From insnb until end of basic block or until REGB is set
3545 replace all uses of REGB with REGN. */
3548 to = gen_reg_rtx (GET_MODE (SET_DEST (expr_set)));
3550 /* Generate the new insn. */
3551 /* ??? If the change fails, we return 0, even though we created
3552 an insn. I think this is ok. */
3554 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3555 SET_DEST (expr_set)),
3556 insn_computes_expr);
3558 /* Keep register set table up to date. */
3559 record_one_set (REGNO (to), new_insn);
3561 gcse_create_count++;
3562 if (gcse_file != NULL)
3564 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3565 INSN_UID (NEXT_INSN (insn_computes_expr)),
3566 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3567 fprintf (gcse_file, ", computed in insn %d,\n",
3568 INSN_UID (insn_computes_expr));
3569 fprintf (gcse_file, " into newly allocated reg %d\n",
3573 pat = PATTERN (insn);
3575 /* Do register replacement for INSN. */
3576 changed = validate_change (insn, &SET_SRC (pat),
3578 (NEXT_INSN (insn_computes_expr))),
3581 /* We should be able to ignore the return code from validate_change but
3582 to play it safe we check. */
3586 if (gcse_file != NULL)
3589 "GCSE: Replacing the source in insn %d with reg %d ",
3591 REGNO (SET_DEST (PATTERN (NEXT_INSN
3592 (insn_computes_expr)))));
3593 fprintf (gcse_file, "set in insn %d\n",
3594 INSN_UID (insn_computes_expr));
3602 /* Perform classic GCSE. This is called by one_classic_gcse_pass after all
3603 the dataflow analysis has been done.
3605 The result is non-zero if a change was made. */
3613 /* Note we start at block 1. */
3616 for (bb = 1; bb < n_basic_blocks; bb++)
3618 /* Reset tables used to keep track of what's still valid [since the
3619 start of the block]. */
3620 reset_opr_set_tables ();
3622 for (insn = BLOCK_HEAD (bb);
3623 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3624 insn = NEXT_INSN (insn))
3626 /* Is insn of form (set (pseudo-reg) ...)? */
3627 if (GET_CODE (insn) == INSN
3628 && GET_CODE (PATTERN (insn)) == SET
3629 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3630 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3632 rtx pat = PATTERN (insn);
3633 rtx src = SET_SRC (pat);
3636 if (want_to_gcse_p (src)
3637 /* Is the expression recorded? */
3638 && ((expr = lookup_expr (src)) != NULL)
3639 /* Is the expression available [at the start of the
3641 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3642 /* Are the operands unchanged since the start of the
3644 && oprs_not_set_p (src, insn))
3645 changed |= handle_avail_expr (insn, expr);
3648 /* Keep track of everything modified by this insn. */
3649 /* ??? Need to be careful w.r.t. mods done to INSN. */
3651 mark_oprs_set (insn);
3658 /* Top level routine to perform one classic GCSE pass.
3660 Return non-zero if a change was made. */
3663 one_classic_gcse_pass (pass)
3668 gcse_subst_count = 0;
3669 gcse_create_count = 0;
3671 alloc_expr_hash_table (max_cuid);
3672 alloc_rd_mem (n_basic_blocks, max_cuid);
3673 compute_expr_hash_table ();
3675 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3676 expr_hash_table_size, n_exprs);
3682 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3684 compute_ae_kill (ae_gen, ae_kill);
3685 compute_available (ae_gen, ae_kill, ae_out, ae_in);
3686 changed = classic_gcse ();
3687 free_avail_expr_mem ();
3691 free_expr_hash_table ();
3695 fprintf (gcse_file, "\n");
3696 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3697 current_function_name, pass, bytes_used, gcse_subst_count);
3698 fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3704 /* Compute copy/constant propagation working variables. */
3706 /* Local properties of assignments. */
3707 static sbitmap *cprop_pavloc;
3708 static sbitmap *cprop_absaltered;
3710 /* Global properties of assignments (computed from the local properties). */
3711 static sbitmap *cprop_avin;
3712 static sbitmap *cprop_avout;
3714 /* Allocate vars used for copy/const propagation. N_BLOCKS is the number of
3715 basic blocks. N_SETS is the number of sets. */
3718 alloc_cprop_mem (n_blocks, n_sets)
3719 int n_blocks, n_sets;
3721 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3722 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3724 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3725 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3728 /* Free vars used by copy/const propagation. */
3733 sbitmap_vector_free (cprop_pavloc);
3734 sbitmap_vector_free (cprop_absaltered);
3735 sbitmap_vector_free (cprop_avin);
3736 sbitmap_vector_free (cprop_avout);
3739 /* For each block, compute whether X is transparent. X is either an
3740 expression or an assignment [though we don't care which, for this context
3741 an assignment is treated as an expression]. For each block where an
3742 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3746 compute_transp (x, indx, bmap, set_p)
3757 /* repeat is used to turn tail-recursion into iteration since GCC
3758 can't do it when there's no return value. */
3764 code = GET_CODE (x);
3770 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3772 for (bb = 0; bb < n_basic_blocks; bb++)
3773 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3774 SET_BIT (bmap[bb], indx);
3778 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3779 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3784 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3786 for (bb = 0; bb < n_basic_blocks; bb++)
3787 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3788 RESET_BIT (bmap[bb], indx);
3792 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3793 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3800 for (bb = 0; bb < n_basic_blocks; bb++)
3802 rtx list_entry = canon_modify_mem_list[bb];
3806 rtx dest, dest_addr;
3808 if (GET_CODE (XEXP (list_entry, 0)) == CALL_INSN)
3811 SET_BIT (bmap[bb], indx);
3813 RESET_BIT (bmap[bb], indx);
3816 /* LIST_ENTRY must be an INSN of some kind that sets memory.
3817 Examine each hunk of memory that is modified. */
3819 dest = XEXP (list_entry, 0);
3820 list_entry = XEXP (list_entry, 1);
3821 dest_addr = XEXP (list_entry, 0);
3823 if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
3824 x, rtx_addr_varies_p))
3827 SET_BIT (bmap[bb], indx);
3829 RESET_BIT (bmap[bb], indx);
3832 list_entry = XEXP (list_entry, 1);
3855 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3859 /* If we are about to do the last recursive call
3860 needed at this level, change it into iteration.
3861 This function is called enough to be worth it. */
3868 compute_transp (XEXP (x, i), indx, bmap, set_p);
3870 else if (fmt[i] == 'E')
3871 for (j = 0; j < XVECLEN (x, i); j++)
3872 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3876 /* Top level routine to do the dataflow analysis needed by copy/const
3880 compute_cprop_data ()
3882 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3883 compute_available (cprop_pavloc, cprop_absaltered,
3884 cprop_avout, cprop_avin);
3887 /* Copy/constant propagation. */
3889 /* Maximum number of register uses in an insn that we handle. */
3892 /* Table of uses found in an insn.
3893 Allocated statically to avoid alloc/free complexity and overhead. */
3894 static struct reg_use reg_use_table[MAX_USES];
3896 /* Index into `reg_use_table' while building it. */
3897 static int reg_use_count;
3899 /* Set up a list of register numbers used in INSN. The found uses are stored
3900 in `reg_use_table'. `reg_use_count' is initialized to zero before entry,
3901 and contains the number of uses in the table upon exit.
3903 ??? If a register appears multiple times we will record it multiple times.
3904 This doesn't hurt anything but it will slow things down. */
3907 find_used_regs (xptr, data)
3909 void *data ATTRIBUTE_UNUSED;
3916 /* repeat is used to turn tail-recursion into iteration since GCC
3917 can't do it when there's no return value. */
3922 code = GET_CODE (x);
3925 if (reg_use_count == MAX_USES)
3928 reg_use_table[reg_use_count].reg_rtx = x;
3932 /* Recursively scan the operands of this expression. */
3934 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3938 /* If we are about to do the last recursive call
3939 needed at this level, change it into iteration.
3940 This function is called enough to be worth it. */
3947 find_used_regs (&XEXP (x, i), data);
3949 else if (fmt[i] == 'E')
3950 for (j = 0; j < XVECLEN (x, i); j++)
3951 find_used_regs (&XVECEXP (x, i, j), data);
3955 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3956 Returns non-zero is successful. */
3959 try_replace_reg (from, to, insn)
3962 rtx note = find_reg_equal_equiv_note (insn);
3965 rtx set = single_set (insn);
3967 success = validate_replace_src (from, to, insn);
3969 /* If above failed and this is a single set, try to simplify the source of
3970 the set given our substitution. We could perhaps try this for multiple
3971 SETs, but it probably won't buy us anything. */
3972 if (!success && set != 0)
3974 src = simplify_replace_rtx (SET_SRC (set), from, to);
3976 if (!rtx_equal_p (src, SET_SRC (set))
3977 && validate_change (insn, &SET_SRC (set), src, 0))
3981 /* If we've failed to do replacement, have a single SET, and don't already
3982 have a note, add a REG_EQUAL note to not lose information. */
3983 if (!success && note == 0 && set != 0)
3984 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
3986 /* If there is already a NOTE, update the expression in it with our
3989 XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to);
3991 /* REG_EQUAL may get simplified into register.
3992 We don't allow that. Remove that note. This code ought
3993 not to hapen, because previous code ought to syntetize
3994 reg-reg move, but be on the safe side. */
3995 if (note && REG_P (XEXP (note, 0)))
3996 remove_note (insn, note);
4001 /* Find a set of REGNOs that are available on entry to INSN's block. Returns
4002 NULL no such set is found. */
4004 static struct expr *
4005 find_avail_set (regno, insn)
4009 /* SET1 contains the last set found that can be returned to the caller for
4010 use in a substitution. */
4011 struct expr *set1 = 0;
4013 /* Loops are not possible here. To get a loop we would need two sets
4014 available at the start of the block containing INSN. ie we would
4015 need two sets like this available at the start of the block:
4017 (set (reg X) (reg Y))
4018 (set (reg Y) (reg X))
4020 This can not happen since the set of (reg Y) would have killed the
4021 set of (reg X) making it unavailable at the start of this block. */
4025 struct expr *set = lookup_set (regno, NULL_RTX);
4027 /* Find a set that is available at the start of the block
4028 which contains INSN. */
4031 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
4033 set = next_set (regno, set);
4036 /* If no available set was found we've reached the end of the
4037 (possibly empty) copy chain. */
4041 if (GET_CODE (set->expr) != SET)
4044 src = SET_SRC (set->expr);
4046 /* We know the set is available.
4047 Now check that SRC is ANTLOC (i.e. none of the source operands
4048 have changed since the start of the block).
4050 If the source operand changed, we may still use it for the next
4051 iteration of this loop, but we may not use it for substitutions. */
4053 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
4056 /* If the source of the set is anything except a register, then
4057 we have reached the end of the copy chain. */
4058 if (GET_CODE (src) != REG)
4061 /* Follow the copy chain, ie start another iteration of the loop
4062 and see if we have an available copy into SRC. */
4063 regno = REGNO (src);
4066 /* SET1 holds the last set that was available and anticipatable at
4071 /* Subroutine of cprop_insn that tries to propagate constants into
4072 JUMP_INSNS. INSN must be a conditional jump. FROM is what we will try to
4073 replace, SRC is the constant we will try to substitute for it. Returns
4074 nonzero if a change was made. We know INSN has just a SET. */
4077 cprop_jump (bb, insn, from, src)
4083 rtx set = PATTERN (insn);
4084 rtx new = simplify_replace_rtx (SET_SRC (set), from, src);
4086 /* If no simplification can be made, then try the next
4088 if (rtx_equal_p (new, SET_SRC (set)))
4091 /* If this is now a no-op delete it, otherwise this must be a valid insn. */
4096 if (! validate_change (insn, &SET_SRC (set), new, 0))
4099 /* If this has turned into an unconditional jump,
4100 then put a barrier after it so that the unreachable
4101 code will be deleted. */
4102 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
4103 emit_barrier_after (insn);
4106 run_jump_opt_after_gcse = 1;
4109 if (gcse_file != NULL)
4112 "CONST-PROP: Replacing reg %d in insn %d with constant ",
4113 REGNO (from), INSN_UID (insn));
4114 print_rtl (gcse_file, src);
4115 fprintf (gcse_file, "\n");
4117 purge_dead_edges (bb);
4124 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
4125 for machines that have CC0. INSN is a single set that stores into CC0;
4126 the insn following it is a conditional jump. REG_USED is the use we will
4127 try to replace, SRC is the constant we will try to substitute for it.
4128 Returns nonzero if a change was made. */
4131 cprop_cc0_jump (bb, insn, reg_used, src)
4134 struct reg_use *reg_used;
4137 /* First substitute in the SET_SRC of INSN, then substitute that for
4139 rtx jump = NEXT_INSN (insn);
4140 rtx new_src = simplify_replace_rtx (SET_SRC (PATTERN (insn)),
4141 reg_used->reg_rtx, src);
4143 if (! cprop_jump (bb, jump, cc0_rtx, new_src))
4146 /* If we succeeded, delete the cc0 setter. */
4153 /* Perform constant and copy propagation on INSN.
4154 The result is non-zero if a change was made. */
4157 cprop_insn (bb, insn, alter_jumps)
4162 struct reg_use *reg_used;
4170 note_uses (&PATTERN (insn), find_used_regs, NULL);
4172 note = find_reg_equal_equiv_note (insn);
4174 /* We may win even when propagating constants into notes. */
4176 find_used_regs (&XEXP (note, 0), NULL);
4178 for (reg_used = ®_use_table[0]; reg_use_count > 0;
4179 reg_used++, reg_use_count--)
4181 unsigned int regno = REGNO (reg_used->reg_rtx);
4185 /* Ignore registers created by GCSE.
4186 We do this because ... */
4187 if (regno >= max_gcse_regno)
4190 /* If the register has already been set in this block, there's
4191 nothing we can do. */
4192 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
4195 /* Find an assignment that sets reg_used and is available
4196 at the start of the block. */
4197 set = find_avail_set (regno, insn);
4202 /* ??? We might be able to handle PARALLELs. Later. */
4203 if (GET_CODE (pat) != SET)
4206 src = SET_SRC (pat);
4208 /* Constant propagation. */
4209 if (CONSTANT_P (src))
4211 /* Handle normal insns first. */
4212 if (GET_CODE (insn) == INSN
4213 && try_replace_reg (reg_used->reg_rtx, src, insn))
4217 if (gcse_file != NULL)
4219 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
4221 fprintf (gcse_file, "insn %d with constant ",
4223 print_rtl (gcse_file, src);
4224 fprintf (gcse_file, "\n");
4227 /* The original insn setting reg_used may or may not now be
4228 deletable. We leave the deletion to flow. */
4231 /* Try to propagate a CONST_INT into a conditional jump.
4232 We're pretty specific about what we will handle in this
4233 code, we can extend this as necessary over time.
4235 Right now the insn in question must look like
4236 (set (pc) (if_then_else ...)) */
4237 else if (alter_jumps
4238 && GET_CODE (insn) == JUMP_INSN
4239 && condjump_p (insn)
4240 && ! simplejump_p (insn))
4241 changed |= cprop_jump (bb, insn, reg_used->reg_rtx, src);
4244 /* Similar code for machines that use a pair of CC0 setter and
4245 conditional jump insn. */
4246 else if (alter_jumps
4247 && GET_CODE (PATTERN (insn)) == SET
4248 && SET_DEST (PATTERN (insn)) == cc0_rtx
4249 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4250 && condjump_p (NEXT_INSN (insn))
4251 && ! simplejump_p (NEXT_INSN (insn))
4252 && cprop_cc0_jump (bb, insn, reg_used, src))
4259 else if (GET_CODE (src) == REG
4260 && REGNO (src) >= FIRST_PSEUDO_REGISTER
4261 && REGNO (src) != regno)
4263 if (try_replace_reg (reg_used->reg_rtx, src, insn))
4267 if (gcse_file != NULL)
4269 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
4270 regno, INSN_UID (insn));
4271 fprintf (gcse_file, " with reg %d\n", REGNO (src));
4274 /* The original insn setting reg_used may or may not now be
4275 deletable. We leave the deletion to flow. */
4276 /* FIXME: If it turns out that the insn isn't deletable,
4277 then we may have unnecessarily extended register lifetimes
4278 and made things worse. */
4286 /* Forward propagate copies. This includes copies and constants. Return
4287 non-zero if a change was made. */
4296 /* Note we start at block 1. */
4299 for (bb = 1; bb < n_basic_blocks; bb++)
4301 /* Reset tables used to keep track of what's still valid [since the
4302 start of the block]. */
4303 reset_opr_set_tables ();
4305 for (insn = BLOCK_HEAD (bb);
4306 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
4307 insn = NEXT_INSN (insn))
4310 changed |= cprop_insn (BASIC_BLOCK (bb), insn, alter_jumps);
4312 /* Keep track of everything modified by this insn. */
4313 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4314 call mark_oprs_set if we turned the insn into a NOTE. */
4315 if (GET_CODE (insn) != NOTE)
4316 mark_oprs_set (insn);
4320 if (gcse_file != NULL)
4321 fprintf (gcse_file, "\n");
4326 /* Perform one copy/constant propagation pass.
4327 F is the first insn in the function.
4328 PASS is the pass count. */
4331 one_cprop_pass (pass, alter_jumps)
4337 const_prop_count = 0;
4338 copy_prop_count = 0;
4340 alloc_set_hash_table (max_cuid);
4341 compute_set_hash_table ();
4343 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4347 alloc_cprop_mem (n_basic_blocks, n_sets);
4348 compute_cprop_data ();
4349 changed = cprop (alter_jumps);
4353 free_set_hash_table ();
4357 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
4358 current_function_name, pass, bytes_used);
4359 fprintf (gcse_file, "%d const props, %d copy props\n\n",
4360 const_prop_count, copy_prop_count);
4366 /* Compute PRE+LCM working variables. */
4368 /* Local properties of expressions. */
4369 /* Nonzero for expressions that are transparent in the block. */
4370 static sbitmap *transp;
4372 /* Nonzero for expressions that are transparent at the end of the block.
4373 This is only zero for expressions killed by abnormal critical edge
4374 created by a calls. */
4375 static sbitmap *transpout;
4377 /* Nonzero for expressions that are computed (available) in the block. */
4378 static sbitmap *comp;
4380 /* Nonzero for expressions that are locally anticipatable in the block. */
4381 static sbitmap *antloc;
4383 /* Nonzero for expressions where this block is an optimal computation
4385 static sbitmap *pre_optimal;
4387 /* Nonzero for expressions which are redundant in a particular block. */
4388 static sbitmap *pre_redundant;
4390 /* Nonzero for expressions which should be inserted on a specific edge. */
4391 static sbitmap *pre_insert_map;
4393 /* Nonzero for expressions which should be deleted in a specific block. */
4394 static sbitmap *pre_delete_map;
4396 /* Contains the edge_list returned by pre_edge_lcm. */
4397 static struct edge_list *edge_list;
4399 /* Redundant insns. */
4400 static sbitmap pre_redundant_insns;
4402 /* Allocate vars used for PRE analysis. */
4405 alloc_pre_mem (n_blocks, n_exprs)
4406 int n_blocks, n_exprs;
4408 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4409 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4410 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4413 pre_redundant = NULL;
4414 pre_insert_map = NULL;
4415 pre_delete_map = NULL;
4418 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4420 /* pre_insert and pre_delete are allocated later. */
4423 /* Free vars used for PRE analysis. */
4428 sbitmap_vector_free (transp);
4429 sbitmap_vector_free (comp);
4431 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
4434 sbitmap_vector_free (pre_optimal);
4436 sbitmap_vector_free (pre_redundant);
4438 sbitmap_vector_free (pre_insert_map);
4440 sbitmap_vector_free (pre_delete_map);
4442 sbitmap_vector_free (ae_in);
4444 sbitmap_vector_free (ae_out);
4446 transp = comp = NULL;
4447 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4448 ae_in = ae_out = NULL;
4451 /* Top level routine to do the dataflow analysis needed by PRE. */
4456 sbitmap trapping_expr;
4460 compute_local_properties (transp, comp, antloc, 0);
4461 sbitmap_vector_zero (ae_kill, n_basic_blocks);
4463 /* Collect expressions which might trap. */
4464 trapping_expr = sbitmap_alloc (n_exprs);
4465 sbitmap_zero (trapping_expr);
4466 for (ui = 0; ui < expr_hash_table_size; ui++)
4469 for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash)
4470 if (may_trap_p (e->expr))
4471 SET_BIT (trapping_expr, e->bitmap_index);
4474 /* Compute ae_kill for each basic block using:
4478 This is significantly faster than compute_ae_kill. */
4480 for (i = 0; i < n_basic_blocks; i++)
4484 /* If the current block is the destination of an abnormal edge, we
4485 kill all trapping expressions because we won't be able to properly
4486 place the instruction on the edge. So make them neither
4487 anticipatable nor transparent. This is fairly conservative. */
4488 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next)
4489 if (e->flags & EDGE_ABNORMAL)
4491 sbitmap_difference (antloc[i], antloc[i], trapping_expr);
4492 sbitmap_difference (transp[i], transp[i], trapping_expr);
4496 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4497 sbitmap_not (ae_kill[i], ae_kill[i]);
4500 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4501 ae_kill, &pre_insert_map, &pre_delete_map);
4502 sbitmap_vector_free (antloc);
4504 sbitmap_vector_free (ae_kill);
4506 sbitmap_free (trapping_expr);
4511 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4514 VISITED is a pointer to a working buffer for tracking which BB's have
4515 been visited. It is NULL for the top-level call.
4517 We treat reaching expressions that go through blocks containing the same
4518 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4519 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4520 2 as not reaching. The intent is to improve the probability of finding
4521 only one reaching expression and to reduce register lifetimes by picking
4522 the closest such expression. */
4525 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4526 basic_block occr_bb;
4533 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
4535 basic_block pred_bb = pred->src;
4537 if (pred->src == ENTRY_BLOCK_PTR
4538 /* Has predecessor has already been visited? */
4539 || visited[pred_bb->index])
4540 ;/* Nothing to do. */
4542 /* Does this predecessor generate this expression? */
4543 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
4545 /* Is this the occurrence we're looking for?
4546 Note that there's only one generating occurrence per block
4547 so we just need to check the block number. */
4548 if (occr_bb == pred_bb)
4551 visited[pred_bb->index] = 1;
4553 /* Ignore this predecessor if it kills the expression. */
4554 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
4555 visited[pred_bb->index] = 1;
4557 /* Neither gen nor kill. */
4560 visited[pred_bb->index] = 1;
4561 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4566 /* All paths have been checked. */
4570 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4571 memory allocated for that function is returned. */
4574 pre_expr_reaches_here_p (occr_bb, expr, bb)
4575 basic_block occr_bb;
4580 char *visited = (char *) xcalloc (n_basic_blocks, 1);
4582 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
4589 /* Given an expr, generate RTL which we can insert at the end of a BB,
4590 or on an edge. Set the block number of any insns generated to
4594 process_insert_insn (expr)
4597 rtx reg = expr->reaching_reg;
4598 rtx exp = copy_rtx (expr->expr);
4603 /* If the expression is something that's an operand, like a constant,
4604 just copy it to a register. */
4605 if (general_operand (exp, GET_MODE (reg)))
4606 emit_move_insn (reg, exp);
4608 /* Otherwise, make a new insn to compute this expression and make sure the
4609 insn will be recognized (this also adds any needed CLOBBERs). Copy the
4610 expression to make sure we don't have any sharing issues. */
4611 else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode, reg, exp))))
4614 pat = gen_sequence ();
4620 /* Add EXPR to the end of basic block BB.
4622 This is used by both the PRE and code hoisting.
4624 For PRE, we want to verify that the expr is either transparent
4625 or locally anticipatable in the target block. This check makes
4626 no sense for code hoisting. */
4629 insert_insn_end_bb (expr, bb, pre)
4636 rtx reg = expr->reaching_reg;
4637 int regno = REGNO (reg);
4641 pat = process_insert_insn (expr);
4643 /* If the last insn is a jump, insert EXPR in front [taking care to
4644 handle cc0, etc. properly]. Similary we need to care trapping
4645 instructions in presence of non-call exceptions. */
4647 if (GET_CODE (insn) == JUMP_INSN
4648 || (GET_CODE (insn) == INSN
4649 && (bb->succ->succ_next || (bb->succ->flags & EDGE_ABNORMAL))))
4654 /* It should always be the case that we can put these instructions
4655 anywhere in the basic block with performing PRE optimizations.
4657 if (GET_CODE (insn) == INSN && pre
4658 && !TEST_BIT (antloc[bb->index], expr->bitmap_index)
4659 && !TEST_BIT (transp[bb->index], expr->bitmap_index))
4662 /* If this is a jump table, then we can't insert stuff here. Since
4663 we know the previous real insn must be the tablejump, we insert
4664 the new instruction just before the tablejump. */
4665 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4666 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4667 insn = prev_real_insn (insn);
4670 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4671 if cc0 isn't set. */
4672 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4674 insn = XEXP (note, 0);
4677 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4678 if (maybe_cc0_setter
4679 && INSN_P (maybe_cc0_setter)
4680 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4681 insn = maybe_cc0_setter;
4684 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4685 new_insn = emit_insn_before (pat, insn);
4688 /* Likewise if the last insn is a call, as will happen in the presence
4689 of exception handling. */
4690 else if (GET_CODE (insn) == CALL_INSN
4691 && (bb->succ->succ_next || (bb->succ->flags & EDGE_ABNORMAL)))
4693 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4694 we search backward and place the instructions before the first
4695 parameter is loaded. Do this for everyone for consistency and a
4696 presumtion that we'll get better code elsewhere as well.
4698 It should always be the case that we can put these instructions
4699 anywhere in the basic block with performing PRE optimizations.
4703 && !TEST_BIT (antloc[bb->index], expr->bitmap_index)
4704 && !TEST_BIT (transp[bb->index], expr->bitmap_index))
4707 /* Since different machines initialize their parameter registers
4708 in different orders, assume nothing. Collect the set of all
4709 parameter registers. */
4710 insn = find_first_parameter_load (insn, bb->head);
4712 /* If we found all the parameter loads, then we want to insert
4713 before the first parameter load.
4715 If we did not find all the parameter loads, then we might have
4716 stopped on the head of the block, which could be a CODE_LABEL.
4717 If we inserted before the CODE_LABEL, then we would be putting
4718 the insn in the wrong basic block. In that case, put the insn
4719 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
4720 while (GET_CODE (insn) == CODE_LABEL
4721 || NOTE_INSN_BASIC_BLOCK_P (insn))
4722 insn = NEXT_INSN (insn);
4724 new_insn = emit_insn_before (pat, insn);
4727 new_insn = emit_insn_after (pat, insn);
4729 /* Keep block number table up to date.
4730 Note, PAT could be a multiple insn sequence, we have to make
4731 sure that each insn in the sequence is handled. */
4732 if (GET_CODE (pat) == SEQUENCE)
4734 for (i = 0; i < XVECLEN (pat, 0); i++)
4736 rtx insn = XVECEXP (pat, 0, i);
4738 add_label_notes (PATTERN (insn), new_insn);
4740 note_stores (PATTERN (insn), record_set_info, insn);
4745 add_label_notes (pat, new_insn);
4747 /* Keep register set table up to date. */
4748 record_one_set (regno, new_insn);
4751 gcse_create_count++;
4755 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4756 bb->index, INSN_UID (new_insn));
4757 fprintf (gcse_file, "copying expression %d to reg %d\n",
4758 expr->bitmap_index, regno);
4762 /* Insert partially redundant expressions on edges in the CFG to make
4763 the expressions fully redundant. */
4766 pre_edge_insert (edge_list, index_map)
4767 struct edge_list *edge_list;
4768 struct expr **index_map;
4770 int e, i, j, num_edges, set_size, did_insert = 0;
4773 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4774 if it reaches any of the deleted expressions. */
4776 set_size = pre_insert_map[0]->size;
4777 num_edges = NUM_EDGES (edge_list);
4778 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4779 sbitmap_vector_zero (inserted, num_edges);
4781 for (e = 0; e < num_edges; e++)
4784 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
4786 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4788 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4790 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4791 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4793 struct expr *expr = index_map[j];
4796 /* Now look at each deleted occurrence of this expression. */
4797 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4799 if (! occr->deleted_p)
4802 /* Insert this expression on this edge if if it would
4803 reach the deleted occurrence in BB. */
4804 if (!TEST_BIT (inserted[e], j))
4807 edge eg = INDEX_EDGE (edge_list, e);
4809 /* We can't insert anything on an abnormal and
4810 critical edge, so we insert the insn at the end of
4811 the previous block. There are several alternatives
4812 detailed in Morgans book P277 (sec 10.5) for
4813 handling this situation. This one is easiest for
4816 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4817 insert_insn_end_bb (index_map[j], bb, 0);
4820 insn = process_insert_insn (index_map[j]);
4821 insert_insn_on_edge (insn, eg);
4826 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4828 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4829 fprintf (gcse_file, "copy expression %d\n",
4830 expr->bitmap_index);
4833 update_ld_motion_stores (expr);
4834 SET_BIT (inserted[e], j);
4836 gcse_create_count++;
4843 sbitmap_vector_free (inserted);
4847 /* Copy the result of INSN to REG. INDX is the expression number. */
4850 pre_insert_copy_insn (expr, insn)
4854 rtx reg = expr->reaching_reg;
4855 int regno = REGNO (reg);
4856 int indx = expr->bitmap_index;
4857 rtx set = single_set (insn);
4863 new_insn = emit_insn_after (gen_move_insn (reg, SET_DEST (set)), insn);
4865 /* Keep register set table up to date. */
4866 record_one_set (regno, new_insn);
4868 gcse_create_count++;
4872 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4873 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4874 INSN_UID (insn), regno);
4875 update_ld_motion_stores (expr);
4878 /* Copy available expressions that reach the redundant expression
4879 to `reaching_reg'. */
4882 pre_insert_copies ()
4889 /* For each available expression in the table, copy the result to
4890 `reaching_reg' if the expression reaches a deleted one.
4892 ??? The current algorithm is rather brute force.
4893 Need to do some profiling. */
4895 for (i = 0; i < expr_hash_table_size; i++)
4896 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4898 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
4899 we don't want to insert a copy here because the expression may not
4900 really be redundant. So only insert an insn if the expression was
4901 deleted. This test also avoids further processing if the
4902 expression wasn't deleted anywhere. */
4903 if (expr->reaching_reg == NULL)
4906 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4908 if (! occr->deleted_p)
4911 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4913 rtx insn = avail->insn;
4915 /* No need to handle this one if handled already. */
4916 if (avail->copied_p)
4919 /* Don't handle this one if it's a redundant one. */
4920 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4923 /* Or if the expression doesn't reach the deleted one. */
4924 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
4926 BLOCK_FOR_INSN (occr->insn)))
4929 /* Copy the result of avail to reaching_reg. */
4930 pre_insert_copy_insn (expr, insn);
4931 avail->copied_p = 1;
4937 /* Delete redundant computations.
4938 Deletion is done by changing the insn to copy the `reaching_reg' of
4939 the expression into the result of the SET. It is left to later passes
4940 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4942 Returns non-zero if a change is made. */
4953 for (i = 0; i < expr_hash_table_size; i++)
4954 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4956 int indx = expr->bitmap_index;
4958 /* We only need to search antic_occr since we require
4961 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4963 rtx insn = occr->insn;
4965 basic_block bb = BLOCK_FOR_INSN (insn);
4967 if (TEST_BIT (pre_delete_map[bb->index], indx))
4969 set = single_set (insn);
4973 /* Create a pseudo-reg to store the result of reaching
4974 expressions into. Get the mode for the new pseudo from
4975 the mode of the original destination pseudo. */
4976 if (expr->reaching_reg == NULL)
4978 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4980 /* In theory this should never fail since we're creating
4983 However, on the x86 some of the movXX patterns actually
4984 contain clobbers of scratch regs. This may cause the
4985 insn created by validate_change to not match any pattern
4986 and thus cause validate_change to fail. */
4987 if (validate_change (insn, &SET_SRC (set),
4988 expr->reaching_reg, 0))
4990 occr->deleted_p = 1;
4991 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4999 "PRE: redundant insn %d (expression %d) in ",
5000 INSN_UID (insn), indx);
5001 fprintf (gcse_file, "bb %d, reaching reg is %d\n",
5002 bb->index, REGNO (expr->reaching_reg));
5011 /* Perform GCSE optimizations using PRE.
5012 This is called by one_pre_gcse_pass after all the dataflow analysis
5015 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
5016 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
5017 Compiler Design and Implementation.
5019 ??? A new pseudo reg is created to hold the reaching expression. The nice
5020 thing about the classical approach is that it would try to use an existing
5021 reg. If the register can't be adequately optimized [i.e. we introduce
5022 reload problems], one could add a pass here to propagate the new register
5025 ??? We don't handle single sets in PARALLELs because we're [currently] not
5026 able to copy the rest of the parallel when we insert copies to create full
5027 redundancies from partial redundancies. However, there's no reason why we
5028 can't handle PARALLELs in the cases where there are no partial
5035 int did_insert, changed;
5036 struct expr **index_map;
5039 /* Compute a mapping from expression number (`bitmap_index') to
5040 hash table entry. */
5042 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5043 for (i = 0; i < expr_hash_table_size; i++)
5044 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5045 index_map[expr->bitmap_index] = expr;
5047 /* Reset bitmap used to track which insns are redundant. */
5048 pre_redundant_insns = sbitmap_alloc (max_cuid);
5049 sbitmap_zero (pre_redundant_insns);
5051 /* Delete the redundant insns first so that
5052 - we know what register to use for the new insns and for the other
5053 ones with reaching expressions
5054 - we know which insns are redundant when we go to create copies */
5056 changed = pre_delete ();
5058 did_insert = pre_edge_insert (edge_list, index_map);
5060 /* In other places with reaching expressions, copy the expression to the
5061 specially allocated pseudo-reg that reaches the redundant expr. */
5062 pre_insert_copies ();
5065 commit_edge_insertions ();
5070 sbitmap_free (pre_redundant_insns);
5074 /* Top level routine to perform one PRE GCSE pass.
5076 Return non-zero if a change was made. */
5079 one_pre_gcse_pass (pass)
5084 gcse_subst_count = 0;
5085 gcse_create_count = 0;
5087 alloc_expr_hash_table (max_cuid);
5088 add_noreturn_fake_exit_edges ();
5090 compute_ld_motion_mems ();
5092 compute_expr_hash_table ();
5093 trim_ld_motion_mems ();
5095 dump_hash_table (gcse_file, "Expression", expr_hash_table,
5096 expr_hash_table_size, n_exprs);
5100 alloc_pre_mem (n_basic_blocks, n_exprs);
5101 compute_pre_data ();
5102 changed |= pre_gcse ();
5103 free_edge_list (edge_list);
5108 remove_fake_edges ();
5109 free_expr_hash_table ();
5113 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
5114 current_function_name, pass, bytes_used);
5115 fprintf (gcse_file, "%d substs, %d insns created\n",
5116 gcse_subst_count, gcse_create_count);
5122 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
5123 If notes are added to an insn which references a CODE_LABEL, the
5124 LABEL_NUSES count is incremented. We have to add REG_LABEL notes,
5125 because the following loop optimization pass requires them. */
5127 /* ??? This is very similar to the loop.c add_label_notes function. We
5128 could probably share code here. */
5130 /* ??? If there was a jump optimization pass after gcse and before loop,
5131 then we would not need to do this here, because jump would add the
5132 necessary REG_LABEL notes. */
5135 add_label_notes (x, insn)
5139 enum rtx_code code = GET_CODE (x);
5143 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
5145 /* This code used to ignore labels that referred to dispatch tables to
5146 avoid flow generating (slighly) worse code.
5148 We no longer ignore such label references (see LABEL_REF handling in
5149 mark_jump_label for additional information). */
5151 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
5153 if (LABEL_P (XEXP (x, 0)))
5154 LABEL_NUSES (XEXP (x, 0))++;
5158 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
5161 add_label_notes (XEXP (x, i), insn);
5162 else if (fmt[i] == 'E')
5163 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5164 add_label_notes (XVECEXP (x, i, j), insn);
5168 /* Compute transparent outgoing information for each block.
5170 An expression is transparent to an edge unless it is killed by
5171 the edge itself. This can only happen with abnormal control flow,
5172 when the edge is traversed through a call. This happens with
5173 non-local labels and exceptions.
5175 This would not be necessary if we split the edge. While this is
5176 normally impossible for abnormal critical edges, with some effort
5177 it should be possible with exception handling, since we still have
5178 control over which handler should be invoked. But due to increased
5179 EH table sizes, this may not be worthwhile. */
5182 compute_transpout ()
5188 sbitmap_vector_ones (transpout, n_basic_blocks);
5190 for (bb = 0; bb < n_basic_blocks; ++bb)
5192 /* Note that flow inserted a nop a the end of basic blocks that
5193 end in call instructions for reasons other than abnormal
5195 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
5198 for (i = 0; i < expr_hash_table_size; i++)
5199 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
5200 if (GET_CODE (expr->expr) == MEM)
5202 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
5203 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
5206 /* ??? Optimally, we would use interprocedural alias
5207 analysis to determine if this mem is actually killed
5209 RESET_BIT (transpout[bb], expr->bitmap_index);
5214 /* Removal of useless null pointer checks */
5216 /* Called via note_stores. X is set by SETTER. If X is a register we must
5217 invalidate nonnull_local and set nonnull_killed. DATA is really a
5218 `null_pointer_info *'.
5220 We ignore hard registers. */
5223 invalidate_nonnull_info (x, setter, data)
5225 rtx setter ATTRIBUTE_UNUSED;
5229 struct null_pointer_info *npi = (struct null_pointer_info *) data;
5231 while (GET_CODE (x) == SUBREG)
5234 /* Ignore anything that is not a register or is a hard register. */
5235 if (GET_CODE (x) != REG
5236 || REGNO (x) < npi->min_reg
5237 || REGNO (x) >= npi->max_reg)
5240 regno = REGNO (x) - npi->min_reg;
5242 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5243 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
5246 /* Do null-pointer check elimination for the registers indicated in
5247 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5248 they are not our responsibility to free. */
5251 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5253 unsigned int *block_reg;
5254 sbitmap *nonnull_avin;
5255 sbitmap *nonnull_avout;
5256 struct null_pointer_info *npi;
5260 sbitmap *nonnull_local = npi->nonnull_local;
5261 sbitmap *nonnull_killed = npi->nonnull_killed;
5263 /* Compute local properties, nonnull and killed. A register will have
5264 the nonnull property if at the end of the current block its value is
5265 known to be nonnull. The killed property indicates that somewhere in
5266 the block any information we had about the register is killed.
5268 Note that a register can have both properties in a single block. That
5269 indicates that it's killed, then later in the block a new value is
5271 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5272 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5274 for (current_block = 0; current_block < n_basic_blocks; current_block++)
5276 rtx insn, stop_insn;
5278 /* Set the current block for invalidate_nonnull_info. */
5279 npi->current_block = current_block;
5281 /* Scan each insn in the basic block looking for memory references and
5283 stop_insn = NEXT_INSN (BLOCK_END (current_block));
5284 for (insn = BLOCK_HEAD (current_block);
5286 insn = NEXT_INSN (insn))
5291 /* Ignore anything that is not a normal insn. */
5292 if (! INSN_P (insn))
5295 /* Basically ignore anything that is not a simple SET. We do have
5296 to make sure to invalidate nonnull_local and set nonnull_killed
5297 for such insns though. */
5298 set = single_set (insn);
5301 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5305 /* See if we've got a usable memory load. We handle it first
5306 in case it uses its address register as a dest (which kills
5307 the nonnull property). */
5308 if (GET_CODE (SET_SRC (set)) == MEM
5309 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5310 && REGNO (reg) >= npi->min_reg
5311 && REGNO (reg) < npi->max_reg)
5312 SET_BIT (nonnull_local[current_block],
5313 REGNO (reg) - npi->min_reg);
5315 /* Now invalidate stuff clobbered by this insn. */
5316 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
5318 /* And handle stores, we do these last since any sets in INSN can
5319 not kill the nonnull property if it is derived from a MEM
5320 appearing in a SET_DEST. */
5321 if (GET_CODE (SET_DEST (set)) == MEM
5322 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5323 && REGNO (reg) >= npi->min_reg
5324 && REGNO (reg) < npi->max_reg)
5325 SET_BIT (nonnull_local[current_block],
5326 REGNO (reg) - npi->min_reg);
5330 /* Now compute global properties based on the local properties. This
5331 is a classic global availablity algorithm. */
5332 compute_available (nonnull_local, nonnull_killed,
5333 nonnull_avout, nonnull_avin);
5335 /* Now look at each bb and see if it ends with a compare of a value
5337 for (bb = 0; bb < n_basic_blocks; bb++)
5339 rtx last_insn = BLOCK_END (bb);
5340 rtx condition, earliest;
5341 int compare_and_branch;
5343 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5344 since BLOCK_REG[BB] is zero if this block did not end with a
5345 comparison against zero, this condition works. */
5346 if (block_reg[bb] < npi->min_reg
5347 || block_reg[bb] >= npi->max_reg)
5350 /* LAST_INSN is a conditional jump. Get its condition. */
5351 condition = get_condition (last_insn, &earliest);
5353 /* If we can't determine the condition then skip. */
5357 /* Is the register known to have a nonzero value? */
5358 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
5361 /* Try to compute whether the compare/branch at the loop end is one or
5362 two instructions. */
5363 if (earliest == last_insn)
5364 compare_and_branch = 1;
5365 else if (earliest == prev_nonnote_insn (last_insn))
5366 compare_and_branch = 2;
5370 /* We know the register in this comparison is nonnull at exit from
5371 this block. We can optimize this comparison. */
5372 if (GET_CODE (condition) == NE)
5376 new_jump = emit_jump_insn_after (gen_jump (JUMP_LABEL (last_insn)),
5378 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5379 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5380 emit_barrier_after (new_jump);
5383 delete_insn (last_insn);
5384 if (compare_and_branch == 2)
5385 delete_insn (earliest);
5386 purge_dead_edges (BASIC_BLOCK (bb));
5388 /* Don't check this block again. (Note that BLOCK_END is
5389 invalid here; we deleted the last instruction in the
5395 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5398 This is conceptually similar to global constant/copy propagation and
5399 classic global CSE (it even uses the same dataflow equations as cprop).
5401 If a register is used as memory address with the form (mem (reg)), then we
5402 know that REG can not be zero at that point in the program. Any instruction
5403 which sets REG "kills" this property.
5405 So, if every path leading to a conditional branch has an available memory
5406 reference of that form, then we know the register can not have the value
5407 zero at the conditional branch.
5409 So we merely need to compute the local properies and propagate that data
5410 around the cfg, then optimize where possible.
5412 We run this pass two times. Once before CSE, then again after CSE. This
5413 has proven to be the most profitable approach. It is rare for new
5414 optimization opportunities of this nature to appear after the first CSE
5417 This could probably be integrated with global cprop with a little work. */
5420 delete_null_pointer_checks (f)
5421 rtx f ATTRIBUTE_UNUSED;
5423 sbitmap *nonnull_avin, *nonnull_avout;
5424 unsigned int *block_reg;
5429 struct null_pointer_info npi;
5431 /* If we have only a single block, then there's nothing to do. */
5432 if (n_basic_blocks <= 1)
5435 /* Trying to perform global optimizations on flow graphs which have
5436 a high connectivity will take a long time and is unlikely to be
5437 particularly useful.
5439 In normal circumstances a cfg should have about twice as many edges
5440 as blocks. But we do not want to punish small functions which have
5441 a couple switch statements. So we require a relatively large number
5442 of basic blocks and the ratio of edges to blocks to be high. */
5443 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5446 /* We need four bitmaps, each with a bit for each register in each
5448 max_reg = max_reg_num ();
5449 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5451 /* Allocate bitmaps to hold local and global properties. */
5452 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5453 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5454 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5455 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5457 /* Go through the basic blocks, seeing whether or not each block
5458 ends with a conditional branch whose condition is a comparison
5459 against zero. Record the register compared in BLOCK_REG. */
5460 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5461 for (bb = 0; bb < n_basic_blocks; bb++)
5463 rtx last_insn = BLOCK_END (bb);
5464 rtx condition, earliest, reg;
5466 /* We only want conditional branches. */
5467 if (GET_CODE (last_insn) != JUMP_INSN
5468 || !any_condjump_p (last_insn)
5469 || !onlyjump_p (last_insn))
5472 /* LAST_INSN is a conditional jump. Get its condition. */
5473 condition = get_condition (last_insn, &earliest);
5475 /* If we were unable to get the condition, or it is not an equality
5476 comparison against zero then there's nothing we can do. */
5478 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5479 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5480 || (XEXP (condition, 1)
5481 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5484 /* We must be checking a register against zero. */
5485 reg = XEXP (condition, 0);
5486 if (GET_CODE (reg) != REG)
5489 block_reg[bb] = REGNO (reg);
5492 /* Go through the algorithm for each block of registers. */
5493 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5496 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5497 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5498 nonnull_avout, &npi);
5501 /* Free the table of registers compared at the end of every block. */
5505 sbitmap_vector_free (npi.nonnull_local);
5506 sbitmap_vector_free (npi.nonnull_killed);
5507 sbitmap_vector_free (nonnull_avin);
5508 sbitmap_vector_free (nonnull_avout);
5511 /* Code Hoisting variables and subroutines. */
5513 /* Very busy expressions. */
5514 static sbitmap *hoist_vbein;
5515 static sbitmap *hoist_vbeout;
5517 /* Hoistable expressions. */
5518 static sbitmap *hoist_exprs;
5520 /* Dominator bitmaps. */
5521 static sbitmap *dominators;
5523 /* ??? We could compute post dominators and run this algorithm in
5524 reverse to to perform tail merging, doing so would probably be
5525 more effective than the tail merging code in jump.c.
5527 It's unclear if tail merging could be run in parallel with
5528 code hoisting. It would be nice. */
5530 /* Allocate vars used for code hoisting analysis. */
5533 alloc_code_hoist_mem (n_blocks, n_exprs)
5534 int n_blocks, n_exprs;
5536 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5537 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5538 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5540 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5541 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5542 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5543 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5545 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5548 /* Free vars used for code hoisting analysis. */
5551 free_code_hoist_mem ()
5553 sbitmap_vector_free (antloc);
5554 sbitmap_vector_free (transp);
5555 sbitmap_vector_free (comp);
5557 sbitmap_vector_free (hoist_vbein);
5558 sbitmap_vector_free (hoist_vbeout);
5559 sbitmap_vector_free (hoist_exprs);
5560 sbitmap_vector_free (transpout);
5562 sbitmap_vector_free (dominators);
5565 /* Compute the very busy expressions at entry/exit from each block.
5567 An expression is very busy if all paths from a given point
5568 compute the expression. */
5571 compute_code_hoist_vbeinout ()
5573 int bb, changed, passes;
5575 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5576 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5585 /* We scan the blocks in the reverse order to speed up
5587 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5589 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb], antloc[bb],
5590 hoist_vbeout[bb], transp[bb]);
5591 if (BASIC_BLOCK (bb)->next_bb != EXIT_BLOCK_PTR)
5592 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5599 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5602 /* Top level routine to do the dataflow analysis needed by code hoisting. */
5605 compute_code_hoist_data ()
5607 compute_local_properties (transp, comp, antloc, 0);
5608 compute_transpout ();
5609 compute_code_hoist_vbeinout ();
5610 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS);
5612 fprintf (gcse_file, "\n");
5615 /* Determine if the expression identified by EXPR_INDEX would
5616 reach BB unimpared if it was placed at the end of EXPR_BB.
5618 It's unclear exactly what Muchnick meant by "unimpared". It seems
5619 to me that the expression must either be computed or transparent in
5620 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5621 would allow the expression to be hoisted out of loops, even if
5622 the expression wasn't a loop invariant.
5624 Contrast this to reachability for PRE where an expression is
5625 considered reachable if *any* path reaches instead of *all*
5629 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5630 basic_block expr_bb;
5636 int visited_allocated_locally = 0;
5639 if (visited == NULL)
5641 visited_allocated_locally = 1;
5642 visited = xcalloc (n_basic_blocks, 1);
5645 for (pred = bb->pred; pred != NULL; pred = pred->pred_next)
5647 basic_block pred_bb = pred->src;
5649 if (pred->src == ENTRY_BLOCK_PTR)
5651 else if (visited[pred_bb->index])
5654 /* Does this predecessor generate this expression? */
5655 else if (TEST_BIT (comp[pred_bb->index], expr_index))
5657 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
5663 visited[pred_bb->index] = 1;
5664 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5669 if (visited_allocated_locally)
5672 return (pred == NULL);
5675 /* Actually perform code hoisting. */
5682 struct expr **index_map;
5685 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5687 /* Compute a mapping from expression number (`bitmap_index') to
5688 hash table entry. */
5690 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5691 for (i = 0; i < expr_hash_table_size; i++)
5692 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5693 index_map[expr->bitmap_index] = expr;
5695 /* Walk over each basic block looking for potentially hoistable
5696 expressions, nothing gets hoisted from the entry block. */
5697 for (bb = 0; bb < n_basic_blocks; bb++)
5700 int insn_inserted_p;
5702 /* Examine each expression that is very busy at the exit of this
5703 block. These are the potentially hoistable expressions. */
5704 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5708 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5710 /* We've found a potentially hoistable expression, now
5711 we look at every block BB dominates to see if it
5712 computes the expression. */
5713 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5715 /* Ignore self dominance. */
5717 || ! TEST_BIT (dominators[dominated], bb))
5720 /* We've found a dominated block, now see if it computes
5721 the busy expression and whether or not moving that
5722 expression to the "beginning" of that block is safe. */
5723 if (!TEST_BIT (antloc[dominated], i))
5726 /* Note if the expression would reach the dominated block
5727 unimpared if it was placed at the end of BB.
5729 Keep track of how many times this expression is hoistable
5730 from a dominated block into BB. */
5731 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i,
5732 BASIC_BLOCK (dominated), NULL))
5736 /* If we found more than one hoistable occurrence of this
5737 expression, then note it in the bitmap of expressions to
5738 hoist. It makes no sense to hoist things which are computed
5739 in only one BB, and doing so tends to pessimize register
5740 allocation. One could increase this value to try harder
5741 to avoid any possible code expansion due to register
5742 allocation issues; however experiments have shown that
5743 the vast majority of hoistable expressions are only movable
5744 from two successors, so raising this threshhold is likely
5745 to nullify any benefit we get from code hoisting. */
5748 SET_BIT (hoist_exprs[bb], i);
5754 /* If we found nothing to hoist, then quit now. */
5758 /* Loop over all the hoistable expressions. */
5759 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5761 /* We want to insert the expression into BB only once, so
5762 note when we've inserted it. */
5763 insn_inserted_p = 0;
5765 /* These tests should be the same as the tests above. */
5766 if (TEST_BIT (hoist_vbeout[bb], i))
5768 /* We've found a potentially hoistable expression, now
5769 we look at every block BB dominates to see if it
5770 computes the expression. */
5771 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5773 /* Ignore self dominance. */
5775 || ! TEST_BIT (dominators[dominated], bb))
5778 /* We've found a dominated block, now see if it computes
5779 the busy expression and whether or not moving that
5780 expression to the "beginning" of that block is safe. */
5781 if (!TEST_BIT (antloc[dominated], i))
5784 /* The expression is computed in the dominated block and
5785 it would be safe to compute it at the start of the
5786 dominated block. Now we have to determine if the
5787 expression would reach the dominated block if it was
5788 placed at the end of BB. */
5789 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i,
5790 BASIC_BLOCK (dominated), NULL))
5792 struct expr *expr = index_map[i];
5793 struct occr *occr = expr->antic_occr;
5797 /* Find the right occurrence of this expression. */
5798 while (BLOCK_NUM (occr->insn) != dominated && occr)
5801 /* Should never happen. */
5807 set = single_set (insn);
5811 /* Create a pseudo-reg to store the result of reaching
5812 expressions into. Get the mode for the new pseudo
5813 from the mode of the original destination pseudo. */
5814 if (expr->reaching_reg == NULL)
5816 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5818 /* In theory this should never fail since we're creating
5821 However, on the x86 some of the movXX patterns
5822 actually contain clobbers of scratch regs. This may
5823 cause the insn created by validate_change to not
5824 match any pattern and thus cause validate_change to
5826 if (validate_change (insn, &SET_SRC (set),
5827 expr->reaching_reg, 0))
5829 occr->deleted_p = 1;
5830 if (!insn_inserted_p)
5832 insert_insn_end_bb (index_map[i],
5833 BASIC_BLOCK (bb), 0);
5834 insn_inserted_p = 1;
5846 /* Top level routine to perform one code hoisting (aka unification) pass
5848 Return non-zero if a change was made. */
5851 one_code_hoisting_pass ()
5855 alloc_expr_hash_table (max_cuid);
5856 compute_expr_hash_table ();
5858 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5859 expr_hash_table_size, n_exprs);
5863 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5864 compute_code_hoist_data ();
5866 free_code_hoist_mem ();
5869 free_expr_hash_table ();
5874 /* Here we provide the things required to do store motion towards
5875 the exit. In order for this to be effective, gcse also needed to
5876 be taught how to move a load when it is kill only by a store to itself.
5881 void foo(float scale)
5883 for (i=0; i<10; i++)
5887 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
5888 the load out since its live around the loop, and stored at the bottom
5891 The 'Load Motion' referred to and implemented in this file is
5892 an enhancement to gcse which when using edge based lcm, recognizes
5893 this situation and allows gcse to move the load out of the loop.
5895 Once gcse has hoisted the load, store motion can then push this
5896 load towards the exit, and we end up with no loads or stores of 'i'
5899 /* This will search the ldst list for a matching expression. If it
5900 doesn't find one, we create one and initialize it. */
5902 static struct ls_expr *
5906 struct ls_expr * ptr;
5908 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
5909 if (expr_equiv_p (ptr->pattern, x))
5914 ptr = (struct ls_expr *) xmalloc (sizeof (struct ls_expr));
5916 ptr->next = pre_ldst_mems;
5919 ptr->loads = NULL_RTX;
5920 ptr->stores = NULL_RTX;
5921 ptr->reaching_reg = NULL_RTX;
5924 ptr->hash_index = 0;
5925 pre_ldst_mems = ptr;
5931 /* Free up an individual ldst entry. */
5934 free_ldst_entry (ptr)
5935 struct ls_expr * ptr;
5937 free_INSN_LIST_list (& ptr->loads);
5938 free_INSN_LIST_list (& ptr->stores);
5943 /* Free up all memory associated with the ldst list. */
5948 while (pre_ldst_mems)
5950 struct ls_expr * tmp = pre_ldst_mems;
5952 pre_ldst_mems = pre_ldst_mems->next;
5954 free_ldst_entry (tmp);
5957 pre_ldst_mems = NULL;
5960 /* Dump debugging info about the ldst list. */
5963 print_ldst_list (file)
5966 struct ls_expr * ptr;
5968 fprintf (file, "LDST list: \n");
5970 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr))
5972 fprintf (file, " Pattern (%3d): ", ptr->index);
5974 print_rtl (file, ptr->pattern);
5976 fprintf (file, "\n Loads : ");
5979 print_rtl (file, ptr->loads);
5981 fprintf (file, "(nil)");
5983 fprintf (file, "\n Stores : ");
5986 print_rtl (file, ptr->stores);
5988 fprintf (file, "(nil)");
5990 fprintf (file, "\n\n");
5993 fprintf (file, "\n");
5996 /* Returns 1 if X is in the list of ldst only expressions. */
5998 static struct ls_expr *
5999 find_rtx_in_ldst (x)
6002 struct ls_expr * ptr;
6004 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
6005 if (expr_equiv_p (ptr->pattern, x) && ! ptr->invalid)
6011 /* Assign each element of the list of mems a monotonically increasing value. */
6016 struct ls_expr * ptr;
6019 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
6025 /* Return first item in the list. */
6027 static inline struct ls_expr *
6030 return pre_ldst_mems;
6033 /* Return the next item in ther list after the specified one. */
6035 static inline struct ls_expr *
6037 struct ls_expr * ptr;
6042 /* Load Motion for loads which only kill themselves. */
6044 /* Return true if x is a simple MEM operation, with no registers or
6045 side effects. These are the types of loads we consider for the
6046 ld_motion list, otherwise we let the usual aliasing take care of it. */
6052 if (GET_CODE (x) != MEM)
6055 if (MEM_VOLATILE_P (x))
6058 if (GET_MODE (x) == BLKmode)
6061 if (!rtx_varies_p (XEXP (x, 0), 0))
6067 /* Make sure there isn't a buried reference in this pattern anywhere.
6068 If there is, invalidate the entry for it since we're not capable
6069 of fixing it up just yet.. We have to be sure we know about ALL
6070 loads since the aliasing code will allow all entries in the
6071 ld_motion list to not-alias itself. If we miss a load, we will get
6072 the wrong value since gcse might common it and we won't know to
6076 invalidate_any_buried_refs (x)
6081 struct ls_expr * ptr;
6083 /* Invalidate it in the list. */
6084 if (GET_CODE (x) == MEM && simple_mem (x))
6086 ptr = ldst_entry (x);
6090 /* Recursively process the insn. */
6091 fmt = GET_RTX_FORMAT (GET_CODE (x));
6093 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6096 invalidate_any_buried_refs (XEXP (x, i));
6097 else if (fmt[i] == 'E')
6098 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6099 invalidate_any_buried_refs (XVECEXP (x, i, j));
6103 /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
6104 being defined as MEM loads and stores to symbols, with no
6105 side effects and no registers in the expression. If there are any
6106 uses/defs which don't match this criteria, it is invalidated and
6107 trimmed out later. */
6110 compute_ld_motion_mems ()
6112 struct ls_expr * ptr;
6116 pre_ldst_mems = NULL;
6118 for (bb = 0; bb < n_basic_blocks; bb++)
6120 for (insn = BLOCK_HEAD (bb);
6121 insn && insn != NEXT_INSN (BLOCK_END (bb));
6122 insn = NEXT_INSN (insn))
6124 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
6126 if (GET_CODE (PATTERN (insn)) == SET)
6128 rtx src = SET_SRC (PATTERN (insn));
6129 rtx dest = SET_DEST (PATTERN (insn));
6131 /* Check for a simple LOAD... */
6132 if (GET_CODE (src) == MEM && simple_mem (src))
6134 ptr = ldst_entry (src);
6135 if (GET_CODE (dest) == REG)
6136 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
6142 /* Make sure there isn't a buried load somewhere. */
6143 invalidate_any_buried_refs (src);
6146 /* Check for stores. Don't worry about aliased ones, they
6147 will block any movement we might do later. We only care
6148 about this exact pattern since those are the only
6149 circumstance that we will ignore the aliasing info. */
6150 if (GET_CODE (dest) == MEM && simple_mem (dest))
6152 ptr = ldst_entry (dest);
6154 if (GET_CODE (src) != MEM
6155 && GET_CODE (src) != ASM_OPERANDS)
6156 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
6162 invalidate_any_buried_refs (PATTERN (insn));
6168 /* Remove any references that have been either invalidated or are not in the
6169 expression list for pre gcse. */
6172 trim_ld_motion_mems ()
6174 struct ls_expr * last = NULL;
6175 struct ls_expr * ptr = first_ls_expr ();
6179 int del = ptr->invalid;
6180 struct expr * expr = NULL;
6182 /* Delete if entry has been made invalid. */
6188 /* Delete if we cannot find this mem in the expression list. */
6189 for (i = 0; i < expr_hash_table_size && del; i++)
6191 for (expr = expr_hash_table[i];
6193 expr = expr->next_same_hash)
6194 if (expr_equiv_p (expr->expr, ptr->pattern))
6206 last->next = ptr->next;
6207 free_ldst_entry (ptr);
6212 pre_ldst_mems = pre_ldst_mems->next;
6213 free_ldst_entry (ptr);
6214 ptr = pre_ldst_mems;
6219 /* Set the expression field if we are keeping it. */
6226 /* Show the world what we've found. */
6227 if (gcse_file && pre_ldst_mems != NULL)
6228 print_ldst_list (gcse_file);
6231 /* This routine will take an expression which we are replacing with
6232 a reaching register, and update any stores that are needed if
6233 that expression is in the ld_motion list. Stores are updated by
6234 copying their SRC to the reaching register, and then storeing
6235 the reaching register into the store location. These keeps the
6236 correct value in the reaching register for the loads. */
6239 update_ld_motion_stores (expr)
6242 struct ls_expr * mem_ptr;
6244 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
6246 /* We can try to find just the REACHED stores, but is shouldn't
6247 matter to set the reaching reg everywhere... some might be
6248 dead and should be eliminated later. */
6250 /* We replace SET mem = expr with
6252 SET mem = reg , where reg is the
6253 reaching reg used in the load. */
6254 rtx list = mem_ptr->stores;
6256 for ( ; list != NULL_RTX; list = XEXP (list, 1))
6258 rtx insn = XEXP (list, 0);
6259 rtx pat = PATTERN (insn);
6260 rtx src = SET_SRC (pat);
6261 rtx reg = expr->reaching_reg;
6264 /* If we've already copied it, continue. */
6265 if (expr->reaching_reg == src)
6270 fprintf (gcse_file, "PRE: store updated with reaching reg ");
6271 print_rtl (gcse_file, expr->reaching_reg);
6272 fprintf (gcse_file, ":\n ");
6273 print_inline_rtx (gcse_file, insn, 8);
6274 fprintf (gcse_file, "\n");
6277 copy = gen_move_insn ( reg, SET_SRC (pat));
6278 new = emit_insn_before (copy, insn);
6279 record_one_set (REGNO (reg), new);
6280 SET_SRC (pat) = reg;
6282 /* un-recognize this pattern since it's probably different now. */
6283 INSN_CODE (insn) = -1;
6284 gcse_create_count++;
6289 /* Store motion code. */
6291 /* This is used to communicate the target bitvector we want to use in the
6292 reg_set_info routine when called via the note_stores mechanism. */
6293 static sbitmap * regvec;
6295 /* Used in computing the reverse edge graph bit vectors. */
6296 static sbitmap * st_antloc;
6298 /* Global holding the number of store expressions we are dealing with. */
6299 static int num_stores;
6301 /* Checks to set if we need to mark a register set. Called from note_stores. */
6304 reg_set_info (dest, setter, data)
6305 rtx dest, setter ATTRIBUTE_UNUSED;
6306 void * data ATTRIBUTE_UNUSED;
6308 if (GET_CODE (dest) == SUBREG)
6309 dest = SUBREG_REG (dest);
6311 if (GET_CODE (dest) == REG)
6312 SET_BIT (*regvec, REGNO (dest));
6315 /* Return non-zero if the register operands of expression X are killed
6316 anywhere in basic block BB. */
6319 store_ops_ok (x, bb)
6327 /* Repeat is used to turn tail-recursion into iteration. */
6333 code = GET_CODE (x);
6337 /* If a reg has changed after us in this
6338 block, the operand has been killed. */
6339 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x));
6367 i = GET_RTX_LENGTH (code) - 1;
6368 fmt = GET_RTX_FORMAT (code);
6374 rtx tem = XEXP (x, i);
6376 /* If we are about to do the last recursive call
6377 needed at this level, change it into iteration.
6378 This function is called enough to be worth it. */
6385 if (! store_ops_ok (tem, bb))
6388 else if (fmt[i] == 'E')
6392 for (j = 0; j < XVECLEN (x, i); j++)
6394 if (! store_ops_ok (XVECEXP (x, i, j), bb))
6403 /* Determine whether insn is MEM store pattern that we will consider moving. */
6406 find_moveable_store (insn)
6409 struct ls_expr * ptr;
6410 rtx dest = PATTERN (insn);
6412 if (GET_CODE (dest) != SET
6413 || GET_CODE (SET_SRC (dest)) == ASM_OPERANDS)
6416 dest = SET_DEST (dest);
6418 if (GET_CODE (dest) != MEM || MEM_VOLATILE_P (dest)
6419 || GET_MODE (dest) == BLKmode)
6422 if (GET_CODE (XEXP (dest, 0)) != SYMBOL_REF)
6425 if (rtx_varies_p (XEXP (dest, 0), 0))
6428 ptr = ldst_entry (dest);
6429 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
6432 /* Perform store motion. Much like gcse, except we move expressions the
6433 other way by looking at the flowgraph in reverse. */
6436 compute_store_table ()
6442 max_gcse_regno = max_reg_num ();
6444 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
6446 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
6449 /* Find all the stores we care about. */
6450 for (bb = 0; bb < n_basic_blocks; bb++)
6452 regvec = & (reg_set_in_block[bb]);
6453 for (insn = BLOCK_END (bb);
6454 insn && insn != PREV_INSN (BLOCK_HEAD (bb));
6455 insn = PREV_INSN (insn))
6457 /* Ignore anything that is not a normal insn. */
6458 if (! INSN_P (insn))
6461 if (GET_CODE (insn) == CALL_INSN)
6463 bool clobbers_all = false;
6464 #ifdef NON_SAVING_SETJMP
6465 if (NON_SAVING_SETJMP
6466 && find_reg_note (insn, REG_SETJMP, NULL_RTX))
6467 clobbers_all = true;
6470 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6472 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
6473 SET_BIT (reg_set_in_block[bb], regno);
6476 pat = PATTERN (insn);
6477 note_stores (pat, reg_set_info, NULL);
6479 /* Now that we've marked regs, look for stores. */
6480 if (GET_CODE (pat) == SET)
6481 find_moveable_store (insn);
6485 ret = enumerate_ldsts ();
6489 fprintf (gcse_file, "Store Motion Expressions.\n");
6490 print_ldst_list (gcse_file);
6496 /* Check to see if the load X is aliased with STORE_PATTERN. */
6499 load_kills_store (x, store_pattern)
6500 rtx x, store_pattern;
6502 if (true_dependence (x, GET_MODE (x), store_pattern, rtx_addr_varies_p))
6507 /* Go through the entire insn X, looking for any loads which might alias
6508 STORE_PATTERN. Return 1 if found. */
6511 find_loads (x, store_pattern)
6512 rtx x, store_pattern;
6521 if (GET_CODE (x) == SET)
6524 if (GET_CODE (x) == MEM)
6526 if (load_kills_store (x, store_pattern))
6530 /* Recursively process the insn. */
6531 fmt = GET_RTX_FORMAT (GET_CODE (x));
6533 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0 && !ret; i--)
6536 ret |= find_loads (XEXP (x, i), store_pattern);
6537 else if (fmt[i] == 'E')
6538 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6539 ret |= find_loads (XVECEXP (x, i, j), store_pattern);
6544 /* Check if INSN kills the store pattern X (is aliased with it).
6545 Return 1 if it it does. */
6548 store_killed_in_insn (x, insn)
6551 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6554 if (GET_CODE (insn) == CALL_INSN)
6556 /* A normal or pure call might read from pattern,
6557 but a const call will not. */
6558 return ! CONST_OR_PURE_CALL_P (insn) || pure_call_p (insn);
6561 if (GET_CODE (PATTERN (insn)) == SET)
6563 rtx pat = PATTERN (insn);
6564 /* Check for memory stores to aliased objects. */
6565 if (GET_CODE (SET_DEST (pat)) == MEM && !expr_equiv_p (SET_DEST (pat), x))
6566 /* pretend its a load and check for aliasing. */
6567 if (find_loads (SET_DEST (pat), x))
6569 return find_loads (SET_SRC (pat), x);
6572 return find_loads (PATTERN (insn), x);
6575 /* Returns 1 if the expression X is loaded or clobbered on or after INSN
6576 within basic block BB. */
6579 store_killed_after (x, insn, bb)
6588 /* Check if the register operands of the store are OK in this block.
6589 Note that if registers are changed ANYWHERE in the block, we'll
6590 decide we can't move it, regardless of whether it changed above
6591 or below the store. This could be improved by checking the register
6592 operands while lookinng for aliasing in each insn. */
6593 if (!store_ops_ok (XEXP (x, 0), bb))
6596 for ( ; insn && insn != NEXT_INSN (last); insn = NEXT_INSN (insn))
6597 if (store_killed_in_insn (x, insn))
6603 /* Returns 1 if the expression X is loaded or clobbered on or before INSN
6604 within basic block BB. */
6606 store_killed_before (x, insn, bb)
6610 rtx first = bb->head;
6613 return store_killed_in_insn (x, insn);
6615 /* Check if the register operands of the store are OK in this block.
6616 Note that if registers are changed ANYWHERE in the block, we'll
6617 decide we can't move it, regardless of whether it changed above
6618 or below the store. This could be improved by checking the register
6619 operands while lookinng for aliasing in each insn. */
6620 if (!store_ops_ok (XEXP (x, 0), bb))
6623 for ( ; insn && insn != PREV_INSN (first); insn = PREV_INSN (insn))
6624 if (store_killed_in_insn (x, insn))
6630 #define ANTIC_STORE_LIST(x) ((x)->loads)
6631 #define AVAIL_STORE_LIST(x) ((x)->stores)
6633 /* Given the table of available store insns at the end of blocks,
6634 determine which ones are not killed by aliasing, and generate
6635 the appropriate vectors for gen and killed. */
6637 build_store_vectors ()
6642 struct ls_expr * ptr;
6644 /* Build the gen_vector. This is any store in the table which is not killed
6645 by aliasing later in its block. */
6646 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6647 sbitmap_vector_zero (ae_gen, n_basic_blocks);
6649 st_antloc = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6650 sbitmap_vector_zero (st_antloc, n_basic_blocks);
6652 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6654 /* Put all the stores into either the antic list, or the avail list,
6656 rtx store_list = ptr->stores;
6657 ptr->stores = NULL_RTX;
6659 for (st = store_list; st != NULL; st = XEXP (st, 1))
6661 insn = XEXP (st, 0);
6662 bb = BLOCK_FOR_INSN (insn);
6664 if (!store_killed_after (ptr->pattern, insn, bb))
6666 /* If we've already seen an availale expression in this block,
6667 we can delete the one we saw already (It occurs earlier in
6668 the block), and replace it with this one). We'll copy the
6669 old SRC expression to an unused register in case there
6670 are any side effects. */
6671 if (TEST_BIT (ae_gen[bb->index], ptr->index))
6673 /* Find previous store. */
6675 for (st = AVAIL_STORE_LIST (ptr); st ; st = XEXP (st, 1))
6676 if (BLOCK_FOR_INSN (XEXP (st, 0)) == bb)
6680 rtx r = gen_reg_rtx (GET_MODE (ptr->pattern));
6682 fprintf (gcse_file, "Removing redundant store:\n");
6683 replace_store_insn (r, XEXP (st, 0), bb);
6684 XEXP (st, 0) = insn;
6688 SET_BIT (ae_gen[bb->index], ptr->index);
6689 AVAIL_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
6690 AVAIL_STORE_LIST (ptr));
6693 if (!store_killed_before (ptr->pattern, insn, bb))
6695 SET_BIT (st_antloc[BLOCK_NUM (insn)], ptr->index);
6696 ANTIC_STORE_LIST (ptr) = alloc_INSN_LIST (insn,
6697 ANTIC_STORE_LIST (ptr));
6701 /* Free the original list of store insns. */
6702 free_INSN_LIST_list (&store_list);
6705 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6706 sbitmap_vector_zero (ae_kill, n_basic_blocks);
6708 transp = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores);
6709 sbitmap_vector_zero (transp, n_basic_blocks);
6711 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6712 for (b = 0; b < n_basic_blocks; b++)
6714 if (store_killed_after (ptr->pattern, BLOCK_HEAD (b), BASIC_BLOCK (b)))
6716 /* The anticipatable expression is not killed if it's gen'd. */
6718 We leave this check out for now. If we have a code sequence
6719 in a block which looks like:
6723 We should flag this as having an ANTIC expression, NOT
6724 transparent, NOT killed, and AVAIL.
6725 Unfortunately, since we haven't re-written all loads to
6726 use the reaching reg, we'll end up doing an incorrect
6727 Load in the middle here if we push the store down. It happens in
6728 gcc.c-torture/execute/960311-1.c with -O3
6729 If we always kill it in this case, we'll sometimes do
6730 uneccessary work, but it shouldn't actually hurt anything.
6731 if (!TEST_BIT (ae_gen[b], ptr->index)). */
6732 SET_BIT (ae_kill[b], ptr->index);
6735 SET_BIT (transp[b], ptr->index);
6738 /* Any block with no exits calls some non-returning function, so
6739 we better mark the store killed here, or we might not store to
6740 it at all. If we knew it was abort, we wouldn't have to store,
6741 but we don't know that for sure. */
6744 fprintf (gcse_file, "ST_avail and ST_antic (shown under loads..)\n");
6745 print_ldst_list (gcse_file);
6746 dump_sbitmap_vector (gcse_file, "st_antloc", "", st_antloc, n_basic_blocks);
6747 dump_sbitmap_vector (gcse_file, "st_kill", "", ae_kill, n_basic_blocks);
6748 dump_sbitmap_vector (gcse_file, "Transpt", "", transp, n_basic_blocks);
6749 dump_sbitmap_vector (gcse_file, "st_avloc", "", ae_gen, n_basic_blocks);
6753 /* Insert an instruction at the begining of a basic block, and update
6754 the BLOCK_HEAD if needed. */
6757 insert_insn_start_bb (insn, bb)
6761 /* Insert at start of successor block. */
6762 rtx prev = PREV_INSN (bb->head);
6763 rtx before = bb->head;
6766 if (GET_CODE (before) != CODE_LABEL
6767 && (GET_CODE (before) != NOTE
6768 || NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK))
6771 if (prev == bb->end)
6773 before = NEXT_INSN (before);
6776 insn = emit_insn_after (insn, prev);
6780 fprintf (gcse_file, "STORE_MOTION insert store at start of BB %d:\n",
6782 print_inline_rtx (gcse_file, insn, 6);
6783 fprintf (gcse_file, "\n");
6787 /* This routine will insert a store on an edge. EXPR is the ldst entry for
6788 the memory reference, and E is the edge to insert it on. Returns non-zero
6789 if an edge insertion was performed. */
6792 insert_store (expr, e)
6793 struct ls_expr * expr;
6800 /* We did all the deleted before this insert, so if we didn't delete a
6801 store, then we haven't set the reaching reg yet either. */
6802 if (expr->reaching_reg == NULL_RTX)
6805 reg = expr->reaching_reg;
6806 insn = gen_move_insn (expr->pattern, reg);
6808 /* If we are inserting this expression on ALL predecessor edges of a BB,
6809 insert it at the start of the BB, and reset the insert bits on the other
6810 edges so we don't try to insert it on the other edges. */
6812 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
6814 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6815 if (index == EDGE_INDEX_NO_EDGE)
6817 if (! TEST_BIT (pre_insert_map[index], expr->index))
6821 /* If tmp is NULL, we found an insertion on every edge, blank the
6822 insertion vector for these edges, and insert at the start of the BB. */
6823 if (!tmp && bb != EXIT_BLOCK_PTR)
6825 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next)
6827 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest);
6828 RESET_BIT (pre_insert_map[index], expr->index);
6830 insert_insn_start_bb (insn, bb);
6834 /* We can't insert on this edge, so we'll insert at the head of the
6835 successors block. See Morgan, sec 10.5. */
6836 if ((e->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
6838 insert_insn_start_bb (insn, bb);
6842 insert_insn_on_edge (insn, e);
6846 fprintf (gcse_file, "STORE_MOTION insert insn on edge (%d, %d):\n",
6847 e->src->index, e->dest->index);
6848 print_inline_rtx (gcse_file, insn, 6);
6849 fprintf (gcse_file, "\n");
6855 /* This routine will replace a store with a SET to a specified register. */
6858 replace_store_insn (reg, del, bb)
6864 insn = gen_move_insn (reg, SET_SRC (PATTERN (del)));
6865 insn = emit_insn_after (insn, del);
6870 "STORE_MOTION delete insn in BB %d:\n ", bb->index);
6871 print_inline_rtx (gcse_file, del, 6);
6872 fprintf (gcse_file, "\nSTORE MOTION replaced with insn:\n ");
6873 print_inline_rtx (gcse_file, insn, 6);
6874 fprintf (gcse_file, "\n");
6881 /* Delete a store, but copy the value that would have been stored into
6882 the reaching_reg for later storing. */
6885 delete_store (expr, bb)
6886 struct ls_expr * expr;
6891 if (expr->reaching_reg == NULL_RTX)
6892 expr->reaching_reg = gen_reg_rtx (GET_MODE (expr->pattern));
6895 /* If there is more than 1 store, the earlier ones will be dead,
6896 but it doesn't hurt to replace them here. */
6897 reg = expr->reaching_reg;
6899 for (i = AVAIL_STORE_LIST (expr); i; i = XEXP (i, 1))
6902 if (BLOCK_FOR_INSN (del) == bb)
6904 /* We know there is only one since we deleted redundant
6905 ones during the available computation. */
6906 replace_store_insn (reg, del, bb);
6912 /* Free memory used by store motion. */
6915 free_store_memory ()
6920 sbitmap_vector_free (ae_gen);
6922 sbitmap_vector_free (ae_kill);
6924 sbitmap_vector_free (transp);
6926 sbitmap_vector_free (st_antloc);
6928 sbitmap_vector_free (pre_insert_map);
6930 sbitmap_vector_free (pre_delete_map);
6931 if (reg_set_in_block)
6932 sbitmap_vector_free (reg_set_in_block);
6934 ae_gen = ae_kill = transp = st_antloc = NULL;
6935 pre_insert_map = pre_delete_map = reg_set_in_block = NULL;
6938 /* Perform store motion. Much like gcse, except we move expressions the
6939 other way by looking at the flowgraph in reverse. */
6945 struct ls_expr * ptr;
6946 int update_flow = 0;
6950 fprintf (gcse_file, "before store motion\n");
6951 print_rtl (gcse_file, get_insns ());
6955 init_alias_analysis ();
6957 /* Find all the stores that are live to the end of their block. */
6958 num_stores = compute_store_table ();
6959 if (num_stores == 0)
6961 sbitmap_vector_free (reg_set_in_block);
6962 end_alias_analysis ();
6966 /* Now compute whats actually available to move. */
6967 add_noreturn_fake_exit_edges ();
6968 build_store_vectors ();
6970 edge_list = pre_edge_rev_lcm (gcse_file, num_stores, transp, ae_gen,
6971 st_antloc, ae_kill, &pre_insert_map,
6974 /* Now we want to insert the new stores which are going to be needed. */
6975 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
6977 for (x = 0; x < n_basic_blocks; x++)
6978 if (TEST_BIT (pre_delete_map[x], ptr->index))
6979 delete_store (ptr, BASIC_BLOCK (x));
6981 for (x = 0; x < NUM_EDGES (edge_list); x++)
6982 if (TEST_BIT (pre_insert_map[x], ptr->index))
6983 update_flow |= insert_store (ptr, INDEX_EDGE (edge_list, x));
6987 commit_edge_insertions ();
6989 free_store_memory ();
6990 free_edge_list (edge_list);
6991 remove_fake_edges ();
6992 end_alias_analysis ();