gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - dead store elimination
  28    - a store to the same address as a load does not kill the load if the
  29      source of the store is also the destination of the load.  Handling this
  30      allows more load motion, particularly out of loops.
  31    - ability to realloc sbitmap vectors would allow one initial computation
  32      of reg_set_in_block with only subsequent additions, rather than
  33      recomputing it for each pass
  34
  35 */
  36
  37 /* References searched while implementing this.
  38
  39    Compilers Principles, Techniques and Tools
  40    Aho, Sethi, Ullman
  41    Addison-Wesley, 1988
  42
  43    Global Optimization by Suppression of Partial Redundancies
  44    E. Morel, C. Renvoise
  45    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  46
  47    A Portable Machine-Independent Global Optimizer - Design and Measurements
  48    Frederick Chow
  49    Stanford Ph.D. thesis, Dec. 1983
  50
  51    A Fast Algorithm for Code Movement Optimization
  52    D.M. Dhamdhere
  53    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  54
  55    A Solution to a Problem with Morel and Renvoise's
  56    Global Optimization by Suppression of Partial Redundancies
  57    K-H Drechsler, M.P. Stadel
  58    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  59
  60    Practical Adaptation of the Global Optimization
  61    Algorithm of Morel and Renvoise
  62    D.M. Dhamdhere
  63    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  64
  65    Efficiently Computing Static Single Assignment Form and the Control
  66    Dependence Graph
  67    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  68    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  69
  70    Lazy Code Motion
  71    J. Knoop, O. Ruthing, B. Steffen
  72    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  73
  74    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  75    Time for Reducible Flow Control
  76    Thomas Ball
  77    ACM Letters on Programming Languages and Systems,
  78    Vol. 2, Num. 1-4, Mar-Dec 1993
  79
  80    An Efficient Representation for Sparse Sets
  81    Preston Briggs, Linda Torczon
  82    ACM Letters on Programming Languages and Systems,
  83    Vol. 2, Num. 1-4, Mar-Dec 1993
  84
  85    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  86    K-H Drechsler, M.P. Stadel
  87    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  88
  89    Partial Dead Code Elimination
  90    J. Knoop, O. Ruthing, B. Steffen
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    Effective Partial Redundancy Elimination
  94    P. Briggs, K.D. Cooper
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    The Program Structure Tree: Computing Control Regions in Linear Time
  98    R. Johnson, D. Pearson, K. Pingali
  99    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
 100
 101    Optimal Code Motion: Theory and Practice
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 104
 105    The power of assignment motion
 106    J. Knoop, O. Ruthing, B. Steffen
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Global code motion / global value numbering
 110    C. Click
 111    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 112
 113    Value Driven Redundancy Elimination
 114    L.T. Simpson
 115    Rice University Ph.D. thesis, Apr. 1996
 116
 117    Value Numbering
 118    L.T. Simpson
 119    Massively Scalar Compiler Project, Rice University, Sep. 1996
 120
 121    High Performance Compilers for Parallel Computing
 122    Michael Wolfe
 123    Addison-Wesley, 1996
 124
 125    Advanced Compiler Design and Implementation
 126    Steven Muchnick
 127    Morgan Kaufmann, 1997
 128
 129    Building an Optimizing Compiler
 130    Robert Morgan
 131    Digital Press, 1998
 132
 133    People wishing to speed up the code here should read:
 134      Elimination Algorithms for Data Flow Analysis
 135      B.G. Ryder, M.C. Paull
 136      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 137
 138      How to Analyze Large Programs Efficiently and Informatively
 139      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 140      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 141
 142    People wishing to do something different can find various possibilities
 143    in the above papers and elsewhere.
 144 */
 145
 146 #include "config.h"
 147 #include "system.h"
 148 #include "toplev.h"
 149
 150 #include "rtl.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "real.h"
 156 #include "insn-config.h"
 157 #include "recog.h"
 158 #include "basic-block.h"
 159 #include "output.h"
 160 #include "function.h"
 161 #include "expr.h"
 162
 163 #include "obstack.h"
 164 #define obstack_chunk_alloc gmalloc
 165 #define obstack_chunk_free free
 166
 167 /* Maximum number of passes to perform.  */
 168 #define MAX_PASSES 1
 169
 170 /* Propagate flow information through back edges and thus enable PRE's
 171    moving loop invariant calculations out of loops.
 172
 173    Originally this tended to create worse overall code, but several
 174    improvements during the development of PRE seem to have made following
 175    back edges generally a win.
 176
 177    Note much of the loop invariant code motion done here would normally
 178    be done by loop.c, which has more heuristics for when to move invariants
 179    out of loops.  At some point we might need to move some of those
 180    heuristics into gcse.c.  */
 181 #define FOLLOW_BACK_EDGES 1
 182
 183 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 184    are a superset of those done by GCSE.
 185
 186    We perform the following steps:
 187
 188    1) Compute basic block information.
 189
 190    2) Compute table of places where registers are set.
 191
 192    3) Perform copy/constant propagation.
 193
 194    4) Perform global cse.
 195
 196    5) Perform another pass of copy/constant propagation.
 197
 198    Two passes of copy/constant propagation are done because the first one
 199    enables more GCSE and the second one helps to clean up the copies that
 200    GCSE creates.  This is needed more for PRE than for Classic because Classic
 201    GCSE will try to use an existing register containing the common
 202    subexpression rather than create a new one.  This is harder to do for PRE
 203    because of the code motion (which Classic GCSE doesn't do).
 204
 205    Expressions we are interested in GCSE-ing are of the form
 206    (set (pseudo-reg) (expression)).
 207    Function want_to_gcse_p says what these are.
 208
 209    PRE handles moving invariant expressions out of loops (by treating them as
 210    partially redundant).
 211
 212    Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
 213    assignment) based GVN (global value numbering).  L. T. Simpson's paper
 214    (Rice University) on value numbering is a useful reference for this.
 215
 216    **********************
 217
 218    We used to support multiple passes but there are diminishing returns in
 219    doing so.  The first pass usually makes 90% of the changes that are doable.
 220    A second pass can make a few more changes made possible by the first pass.
 221    Experiments show any further passes don't make enough changes to justify
 222    the expense.
 223
 224    A study of spec92 using an unlimited number of passes:
 225    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 226    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 227    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 228
 229    It was found doing copy propagation between each pass enables further
 230    substitutions.
 231
 232    PRE is quite expensive in complicated functions because the DFA can take
 233    awhile to converge.  Hence we only perform one pass.  Macro MAX_PASSES can
 234    be modified if one wants to experiment.
 235
 236    **********************
 237
 238    The steps for PRE are:
 239
 240    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 241
 242    2) Perform the data flow analysis for PRE.
 243
 244    3) Delete the redundant instructions
 245
 246    4) Insert the required copies [if any] that make the partially
 247       redundant instructions fully redundant.
 248
 249    5) For other reaching expressions, insert an instruction to copy the value
 250       to a newly created pseudo that will reach the redundant instruction.
 251
 252    The deletion is done first so that when we do insertions we
 253    know which pseudo reg to use.
 254
 255    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 256    argue it is not.  The number of iterations for the algorithm to converge
 257    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 258
 259    PRE GCSE depends heavily on the second CSE pass to clean up the copies
 260    we create.  To make an expression reach the place where it's redundant,
 261    the result of the expression is copied to a new register, and the redundant
 262    expression is deleted by replacing it with this new register.  Classic GCSE
 263    doesn't have this problem as much as it computes the reaching defs of
 264    each register in each block and thus can try to use an existing register.
 265
 266    **********************
 267
 268    A fair bit of simplicity is created by creating small functions for simple
 269    tasks, even when the function is only called in one place.  This may
 270    measurably slow things down [or may not] by creating more function call
 271    overhead than is necessary.  The source is laid out so that it's trivial
 272    to make the affected functions inline so that one can measure what speed
 273    up, if any, can be achieved, and maybe later when things settle things can
 274    be rearranged.
 275
 276    Help stamp out big monolithic functions!  */
 277 \f
 278 /* GCSE global vars.  */
 279
 280 /* -dG dump file.  */
 281 static FILE *gcse_file;
 282
 283 /* Note whether or not we should run jump optimization after gcse.  We
 284    want to do this for two cases.
 285
 286     * If we changed any jumps via cprop.
 287
 288     * If we added any labels via edge splitting.  */
 289
 290 static int run_jump_opt_after_gcse;
 291
 292 /* Bitmaps are normally not included in debugging dumps.
 293    However it's useful to be able to print them from GDB.
 294    We could create special functions for this, but it's simpler to
 295    just allow passing stderr to the dump_foo fns.  Since stderr can
 296    be a macro, we store a copy here.  */
 297 static FILE *debug_stderr;
 298
 299 /* An obstack for our working variables.  */
 300 static struct obstack gcse_obstack;
 301
 302 /* Non-zero for each mode that supports (set (reg) (reg)).
 303    This is trivially true for integer and floating point values.
 304    It may or may not be true for condition codes.  */
 305 static char can_copy_p[(int) NUM_MACHINE_MODES];
 306
 307 /* Non-zero if can_copy_p has been initialized.  */
 308 static int can_copy_init_p;
 309
 310 struct reg_use {rtx reg_rtx; };
 311
 312 /* Hash table of expressions.  */
 313
 314 struct expr
 315 {
 316   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 317   rtx expr;
 318   /* Index in the available expression bitmaps.  */
 319   int bitmap_index;
 320   /* Next entry with the same hash.  */
 321   struct expr *next_same_hash;
 322   /* List of anticipatable occurrences in basic blocks in the function.
 323      An "anticipatable occurrence" is one that is the first occurrence in the
 324      basic block, the operands are not modified in the basic block prior
 325      to the occurrence and the output is not used between the start of
 326      the block and the occurrence.  */
 327   struct occr *antic_occr;
 328   /* List of available occurrence in basic blocks in the function.
 329      An "available occurrence" is one that is the last occurrence in the
 330      basic block and the operands are not modified by following statements in
 331      the basic block [including this insn].  */
 332   struct occr *avail_occr;
 333   /* Non-null if the computation is PRE redundant.
 334      The value is the newly created pseudo-reg to record a copy of the
 335      expression in all the places that reach the redundant copy.  */
 336   rtx reaching_reg;
 337 };
 338
 339 /* Occurrence of an expression.
 340    There is one per basic block.  If a pattern appears more than once the
 341    last appearance is used [or first for anticipatable expressions].  */
 342
 343 struct occr
 344 {
 345   /* Next occurrence of this expression.  */
 346   struct occr *next;
 347   /* The insn that computes the expression.  */
 348   rtx insn;
 349   /* Non-zero if this [anticipatable] occurrence has been deleted.  */
 350   char deleted_p;
 351   /* Non-zero if this [available] occurrence has been copied to
 352      reaching_reg.  */
 353   /* ??? This is mutually exclusive with deleted_p, so they could share
 354      the same byte.  */
 355   char copied_p;
 356 };
 357
 358 /* Expression and copy propagation hash tables.
 359    Each hash table is an array of buckets.
 360    ??? It is known that if it were an array of entries, structure elements
 361    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 362    not clear whether in the final analysis a sufficient amount of memory would
 363    be saved as the size of the available expression bitmaps would be larger
 364    [one could build a mapping table without holes afterwards though].
 365    Someday I'll perform the computation and figure it out.  */
 366
 367 /* Total size of the expression hash table, in elements.  */
 368 static unsigned int expr_hash_table_size;
 369
 370 /* The table itself.
 371    This is an array of `expr_hash_table_size' elements.  */
 372 static struct expr **expr_hash_table;
 373
 374 /* Total size of the copy propagation hash table, in elements.  */
 375 static int set_hash_table_size;
 376
 377 /* The table itself.
 378    This is an array of `set_hash_table_size' elements.  */
 379 static struct expr **set_hash_table;
 380
 381 /* Mapping of uids to cuids.
 382    Only real insns get cuids.  */
 383 static int *uid_cuid;
 384
 385 /* Highest UID in UID_CUID.  */
 386 static int max_uid;
 387
 388 /* Get the cuid of an insn.  */
 389 #ifdef ENABLE_CHECKING
 390 #define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)])
 391 #else
 392 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
 393 #endif
 394
 395 /* Number of cuids.  */
 396 static int max_cuid;
 397
 398 /* Mapping of cuids to insns.  */
 399 static rtx *cuid_insn;
 400
 401 /* Get insn from cuid.  */
 402 #define CUID_INSN(CUID) (cuid_insn[CUID])
 403
 404 /* Maximum register number in function prior to doing gcse + 1.
 405    Registers created during this pass have regno >= max_gcse_regno.
 406    This is named with "gcse" to not collide with global of same name.  */
 407 static unsigned int max_gcse_regno;
 408
 409 /* Maximum number of cse-able expressions found.  */
 410 static int n_exprs;
 411
 412 /* Maximum number of assignments for copy propagation found.  */
 413 static int n_sets;
 414
 415 /* Table of registers that are modified.
 416
 417    For each register, each element is a list of places where the pseudo-reg
 418    is set.
 419
 420    For simplicity, GCSE is done on sets of pseudo-regs only.  PRE GCSE only
 421    requires knowledge of which blocks kill which regs [and thus could use
 422    a bitmap instead of the lists `reg_set_table' uses].
 423
 424    `reg_set_table' and could be turned into an array of bitmaps (num-bbs x
 425    num-regs) [however perhaps it may be useful to keep the data as is].  One
 426    advantage of recording things this way is that `reg_set_table' is fairly
 427    sparse with respect to pseudo regs but for hard regs could be fairly dense
 428    [relatively speaking].  And recording sets of pseudo-regs in lists speeds
 429    up functions like compute_transp since in the case of pseudo-regs we only
 430    need to iterate over the number of times a pseudo-reg is set, not over the
 431    number of basic blocks [clearly there is a bit of a slow down in the cases
 432    where a pseudo is set more than once in a block, however it is believed
 433    that the net effect is to speed things up].  This isn't done for hard-regs
 434    because recording call-clobbered hard-regs in `reg_set_table' at each
 435    function call can consume a fair bit of memory, and iterating over
 436    hard-regs stored this way in compute_transp will be more expensive.  */
 437
 438 typedef struct reg_set
 439 {
 440   /* The next setting of this register.  */
 441   struct reg_set *next;
 442   /* The insn where it was set.  */
 443   rtx insn;
 444 } reg_set;
 445
 446 static reg_set **reg_set_table;
 447
 448 /* Size of `reg_set_table'.
 449    The table starts out at max_gcse_regno + slop, and is enlarged as
 450    necessary.  */
 451 static int reg_set_table_size;
 452
 453 /* Amount to grow `reg_set_table' by when it's full.  */
 454 #define REG_SET_TABLE_SLOP 100
 455
 456 /* Bitmap containing one bit for each register in the program.
 457    Used when performing GCSE to track which registers have been set since
 458    the start of the basic block.  */
 459 static sbitmap reg_set_bitmap;
 460
 461 /* For each block, a bitmap of registers set in the block.
 462    This is used by expr_killed_p and compute_transp.
 463    It is computed during hash table computation and not by compute_sets
 464    as it includes registers added since the last pass (or between cprop and
 465    gcse) and it's currently not easy to realloc sbitmap vectors.  */
 466 static sbitmap *reg_set_in_block;
 467
 468 /* For each block, non-zero if memory is set in that block.
 469    This is computed during hash table computation and is used by
 470    expr_killed_p and compute_transp.
 471    ??? Handling of memory is very simple, we don't make any attempt
 472    to optimize things (later).
 473    ??? This can be computed by compute_sets since the information
 474    doesn't change.  */
 475 static char *mem_set_in_block;
 476
 477 /* Various variables for statistics gathering.  */
 478
 479 /* Memory used in a pass.
 480    This isn't intended to be absolutely precise.  Its intent is only
 481    to keep an eye on memory usage.  */
 482 static int bytes_used;
 483
 484 /* GCSE substitutions made.  */
 485 static int gcse_subst_count;
 486 /* Number of copy instructions created.  */
 487 static int gcse_create_count;
 488 /* Number of constants propagated.  */
 489 static int const_prop_count;
 490 /* Number of copys propagated.  */
 491 static int copy_prop_count;
 492 \f
 493 /* These variables are used by classic GCSE.
 494    Normally they'd be defined a bit later, but `rd_gen' needs to
 495    be declared sooner.  */
 496
 497 /* Each block has a bitmap of each type.
 498    The length of each blocks bitmap is:
 499
 500        max_cuid  - for reaching definitions
 501        n_exprs - for available expressions
 502
 503    Thus we view the bitmaps as 2 dimensional arrays.  i.e.
 504    rd_kill[block_num][cuid_num]
 505    ae_kill[block_num][expr_num]                  */
 506
 507 /* For reaching defs */
 508 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
 509
 510 /* for available exprs */
 511 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
 512
 513 /* Objects of this type are passed around by the null-pointer check
 514    removal routines.  */
 515 struct null_pointer_info
 516 {
 517   /* The basic block being processed.  */
 518   int current_block;
 519   /* The first register to be handled in this pass.  */
 520   unsigned int min_reg;
 521   /* One greater than the last register to be handled in this pass.  */
 522   unsigned int max_reg;
 523   sbitmap *nonnull_local;
 524   sbitmap *nonnull_killed;
 525 };
 526 \f
 527 static void compute_can_copy    PARAMS ((void));
 528 static char *gmalloc            PARAMS ((unsigned int));
 529 static char *grealloc           PARAMS ((char *, unsigned int));
 530 static char *gcse_alloc         PARAMS ((unsigned long));
 531 static void alloc_gcse_mem      PARAMS ((rtx));
 532 static void free_gcse_mem       PARAMS ((void));
 533 static void alloc_reg_set_mem   PARAMS ((int));
 534 static void free_reg_set_mem    PARAMS ((void));
 535 static int get_bitmap_width     PARAMS ((int, int, int));
 536 static void record_one_set      PARAMS ((int, rtx));
 537 static void record_set_info     PARAMS ((rtx, rtx, void *));
 538 static void compute_sets        PARAMS ((rtx));
 539 static void hash_scan_insn      PARAMS ((rtx, int, int));
 540 static void hash_scan_set       PARAMS ((rtx, rtx, int));
 541 static void hash_scan_clobber   PARAMS ((rtx, rtx));
 542 static void hash_scan_call      PARAMS ((rtx, rtx));
 543 static int want_to_gcse_p       PARAMS ((rtx));
 544 static int oprs_unchanged_p     PARAMS ((rtx, rtx, int));
 545 static int oprs_anticipatable_p PARAMS ((rtx, rtx));
 546 static int oprs_available_p     PARAMS ((rtx, rtx));
 547 static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx,
 548                                           int, int));
 549 static void insert_set_in_table PARAMS ((rtx, rtx));
 550 static unsigned int hash_expr   PARAMS ((rtx, enum machine_mode, int *, int));
 551 static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
 552 static unsigned int hash_set    PARAMS ((int, int));
 553 static int expr_equiv_p         PARAMS ((rtx, rtx));
 554 static void record_last_reg_set_info PARAMS ((rtx, int));
 555 static void record_last_mem_set_info PARAMS ((rtx));
 556 static void record_last_set_info PARAMS ((rtx, rtx, void *));
 557 static void compute_hash_table  PARAMS ((int));
 558 static void alloc_set_hash_table PARAMS ((int));
 559 static void free_set_hash_table PARAMS ((void));
 560 static void compute_set_hash_table PARAMS ((void));
 561 static void alloc_expr_hash_table PARAMS ((unsigned int));
 562 static void free_expr_hash_table PARAMS ((void));
 563 static void compute_expr_hash_table PARAMS ((void));
 564 static void dump_hash_table     PARAMS ((FILE *, const char *, struct expr **,
 565                                          int, int));
 566 static struct expr *lookup_expr PARAMS ((rtx));
 567 static struct expr *lookup_set  PARAMS ((unsigned int, rtx));
 568 static struct expr *next_set    PARAMS ((unsigned int, struct expr *));
 569 static void reset_opr_set_tables PARAMS ((void));
 570 static int oprs_not_set_p       PARAMS ((rtx, rtx));
 571 static void mark_call           PARAMS ((rtx));
 572 static void mark_set            PARAMS ((rtx, rtx));
 573 static void mark_clobber        PARAMS ((rtx, rtx));
 574 static void mark_oprs_set       PARAMS ((rtx));
 575 static void alloc_cprop_mem     PARAMS ((int, int));
 576 static void free_cprop_mem      PARAMS ((void));
 577 static void compute_transp      PARAMS ((rtx, int, sbitmap *, int));
 578 static void compute_transpout   PARAMS ((void));
 579 static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *,
 580                                               int));
 581 static void compute_cprop_data  PARAMS ((void));
 582 static void find_used_regs      PARAMS ((rtx));
 583 static int try_replace_reg      PARAMS ((rtx, rtx, rtx));
 584 static struct expr *find_avail_set PARAMS ((int, rtx));
 585 static int cprop_jump           PARAMS ((rtx, rtx, struct reg_use *, rtx));
 586 #ifdef HAVE_cc0
 587 static int cprop_cc0_jump       PARAMS ((rtx, struct reg_use *, rtx));
 588 #endif
 589 static int cprop_insn           PARAMS ((rtx, int));
 590 static int cprop                PARAMS ((int));
 591 static int one_cprop_pass       PARAMS ((int, int));
 592 static void alloc_pre_mem       PARAMS ((int, int));
 593 static void free_pre_mem        PARAMS ((void));
 594 static void compute_pre_data    PARAMS ((void));
 595 static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
 596 static void insert_insn_end_bb  PARAMS ((struct expr *, int, int));
 597 static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
 598 static void pre_insert_copies   PARAMS ((void));
 599 static int pre_delete           PARAMS ((void));
 600 static int pre_gcse             PARAMS ((void));
 601 static int one_pre_gcse_pass    PARAMS ((int));
 602 static void add_label_notes     PARAMS ((rtx, rtx));
 603 static void alloc_code_hoist_mem PARAMS ((int, int));
 604 static void free_code_hoist_mem PARAMS ((void));
 605 static void compute_code_hoist_vbeinout PARAMS ((void));
 606 static void compute_code_hoist_data PARAMS ((void));
 607 static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
 608 static void hoist_code          PARAMS ((void));
 609 static int one_code_hoisting_pass PARAMS ((void));
 610 static void alloc_rd_mem        PARAMS ((int, int));
 611 static void free_rd_mem         PARAMS ((void));
 612 static void handle_rd_kill_set  PARAMS ((rtx, int, int));
 613 static void compute_kill_rd     PARAMS ((void));
 614 static void compute_rd          PARAMS ((void));
 615 static void alloc_avail_expr_mem PARAMS ((int, int));
 616 static void free_avail_expr_mem PARAMS ((void));
 617 static void compute_ae_gen      PARAMS ((void));
 618 static int expr_killed_p        PARAMS ((rtx, int));
 619 static void compute_ae_kill     PARAMS ((sbitmap *, sbitmap *));
 620 static int expr_reaches_here_p  PARAMS ((struct occr *, struct expr *,
 621                                          int, int));
 622 static rtx computing_insn       PARAMS ((struct expr *, rtx));
 623 static int def_reaches_here_p   PARAMS ((rtx, rtx));
 624 static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
 625 static int handle_avail_expr    PARAMS ((rtx, struct expr *));
 626 static int classic_gcse         PARAMS ((void));
 627 static int one_classic_gcse_pass PARAMS ((int));
 628 static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
 629 static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, sbitmap *,
 630                                                   sbitmap *,
 631                                                   struct null_pointer_info *));
 632 static rtx process_insert_insn  PARAMS ((struct expr *));
 633 static int pre_edge_insert      PARAMS ((struct edge_list *, struct expr **));
 634 static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
 635                                              int, int, char *));
 636 static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
 637                                                  int, char *));
 638 \f
 639 /* Entry point for global common subexpression elimination.
 640    F is the first instruction in the function.  */
 641
 642 int
 643 gcse_main (f, file)
 644      rtx f;
 645      FILE *file;
 646 {
 647   int changed, pass;
 648   /* Bytes used at start of pass.  */
 649   int initial_bytes_used;
 650   /* Maximum number of bytes used by a pass.  */
 651   int max_pass_bytes;
 652   /* Point to release obstack data from for each pass.  */
 653   char *gcse_obstack_bottom;
 654
 655   /* We do not construct an accurate cfg in functions which call
 656      setjmp, so just punt to be safe.  */
 657   if (current_function_calls_setjmp)
 658     return 0;
 659
 660   /* Assume that we do not need to run jump optimizations after gcse.  */
 661   run_jump_opt_after_gcse = 0;
 662
 663   /* For calling dump_foo fns from gdb.  */
 664   debug_stderr = stderr;
 665   gcse_file = file;
 666
 667   /* Identify the basic block information for this function, including
 668      successors and predecessors.  */
 669   max_gcse_regno = max_reg_num ();
 670
 671   if (file)
 672     dump_flow_info (file);
 673
 674   /* Return if there's nothing to do.  */
 675   if (n_basic_blocks <= 1)
 676     return 0;
 677
 678   /* Trying to perform global optimizations on flow graphs which have
 679      a high connectivity will take a long time and is unlikely to be
 680      particularly useful.
 681
 682      In normal circumstances a cfg should have about twice has many edges
 683      as blocks.  But we do not want to punish small functions which have
 684      a couple switch statements.  So we require a relatively large number
 685      of basic blocks and the ratio of edges to blocks to be high.  */
 686   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
 687     return 0;
 688
 689   /* See what modes support reg/reg copy operations.  */
 690   if (! can_copy_init_p)
 691     {
 692       compute_can_copy ();
 693       can_copy_init_p = 1;
 694     }
 695
 696   gcc_obstack_init (&gcse_obstack);
 697   bytes_used = 0;
 698
 699   /* Record where pseudo-registers are set.  This data is kept accurate
 700      during each pass.  ??? We could also record hard-reg information here
 701      [since it's unchanging], however it is currently done during hash table
 702      computation.
 703
 704      It may be tempting to compute MEM set information here too, but MEM sets
 705      will be subject to code motion one day and thus we need to compute
 706      information about memory sets when we build the hash tables.  */
 707
 708   alloc_reg_set_mem (max_gcse_regno);
 709   compute_sets (f);
 710
 711   pass = 0;
 712   initial_bytes_used = bytes_used;
 713   max_pass_bytes = 0;
 714   gcse_obstack_bottom = gcse_alloc (1);
 715   changed = 1;
 716   while (changed && pass < MAX_PASSES)
 717     {
 718       changed = 0;
 719       if (file)
 720         fprintf (file, "GCSE pass %d\n\n", pass + 1);
 721
 722       /* Initialize bytes_used to the space for the pred/succ lists,
 723          and the reg_set_table data.  */
 724       bytes_used = initial_bytes_used;
 725
 726       /* Each pass may create new registers, so recalculate each time.  */
 727       max_gcse_regno = max_reg_num ();
 728
 729       alloc_gcse_mem (f);
 730
 731       /* Don't allow constant propagation to modify jumps
 732          during this pass.  */
 733       changed = one_cprop_pass (pass + 1, 0);
 734
 735       if (optimize_size)
 736         changed |= one_classic_gcse_pass (pass + 1);
 737       else
 738         {
 739           changed |= one_pre_gcse_pass (pass + 1);
 740           free_reg_set_mem ();
 741           alloc_reg_set_mem (max_reg_num ());
 742           compute_sets (f);
 743           run_jump_opt_after_gcse = 1;
 744         }
 745
 746       if (max_pass_bytes < bytes_used)
 747         max_pass_bytes = bytes_used;
 748
 749       /* Free up memory, then reallocate for code hoisting.  We can
 750          not re-use the existing allocated memory because the tables
 751          will not have info for the insns or registers created by
 752          partial redundancy elimination.  */
 753       free_gcse_mem ();
 754
 755       /* It does not make sense to run code hoisting unless we optimizing
 756          for code size -- it rarely makes programs faster, and can make
 757          them bigger if we did partial redundancy elimination (when optimizing
 758          for space, we use a classic gcse algorithm instead of partial
 759          redundancy algorithms).  */
 760       if (optimize_size)
 761         {
 762           max_gcse_regno = max_reg_num ();
 763           alloc_gcse_mem (f);
 764           changed |= one_code_hoisting_pass ();
 765           free_gcse_mem ();
 766
 767           if (max_pass_bytes < bytes_used)
 768             max_pass_bytes = bytes_used;
 769         }
 770
 771       if (file)
 772         {
 773           fprintf (file, "\n");
 774           fflush (file);
 775         }
 776
 777       obstack_free (&gcse_obstack, gcse_obstack_bottom);
 778       pass++;
 779     }
 780
 781   /* Do one last pass of copy propagation, including cprop into
 782      conditional jumps.  */
 783
 784   max_gcse_regno = max_reg_num ();
 785   alloc_gcse_mem (f);
 786   /* This time, go ahead and allow cprop to alter jumps.  */
 787   one_cprop_pass (pass + 1, 1);
 788   free_gcse_mem ();
 789
 790   if (file)
 791     {
 792       fprintf (file, "GCSE of %s: %d basic blocks, ",
 793                current_function_name, n_basic_blocks);
 794       fprintf (file, "%d pass%s, %d bytes\n\n",
 795                pass, pass > 1 ? "es" : "", max_pass_bytes);
 796     }
 797
 798   obstack_free (&gcse_obstack, NULL_PTR);
 799   free_reg_set_mem ();
 800   return run_jump_opt_after_gcse;
 801 }
 802 \f
 803 /* Misc. utilities.  */
 804
 805 /* Compute which modes support reg/reg copy operations.  */
 806
 807 static void
 808 compute_can_copy ()
 809 {
 810   int i;
 811 #ifndef AVOID_CCMODE_COPIES
 812   rtx reg,insn;
 813 #endif
 814   char *free_point = (char *) oballoc (1);
 815
 816   bzero (can_copy_p, NUM_MACHINE_MODES);
 817
 818   start_sequence ();
 819   for (i = 0; i < NUM_MACHINE_MODES; i++)
 820     if (GET_MODE_CLASS (i) == MODE_CC)
 821       {
 822 #ifdef AVOID_CCMODE_COPIES
 823         can_copy_p[i] = 0;
 824 #else
 825         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 826         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 827         if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
 828           can_copy_p[i] = 1;
 829 #endif
 830       }
 831     else
 832       can_copy_p[i] = 1;
 833
 834   end_sequence ();
 835
 836   /* Free the objects we just allocated.  */
 837   obfree (free_point);
 838 }
 839 \f
 840 /* Cover function to xmalloc to record bytes allocated.  */
 841
 842 static char *
 843 gmalloc (size)
 844      unsigned int size;
 845 {
 846   bytes_used += size;
 847   return xmalloc (size);
 848 }
 849
 850 /* Cover function to xrealloc.
 851    We don't record the additional size since we don't know it.
 852    It won't affect memory usage stats much anyway.  */
 853
 854 static char *
 855 grealloc (ptr, size)
 856      char *ptr;
 857      unsigned int size;
 858 {
 859   return xrealloc (ptr, size);
 860 }
 861
 862 /* Cover function to obstack_alloc.
 863    We don't need to record the bytes allocated here since
 864    obstack_chunk_alloc is set to gmalloc.  */
 865
 866 static char *
 867 gcse_alloc (size)
 868      unsigned long size;
 869 {
 870   return (char *) obstack_alloc (&gcse_obstack, size);
 871 }
 872
 873 /* Allocate memory for the cuid mapping array,
 874    and reg/memory set tracking tables.
 875
 876    This is called at the start of each pass.  */
 877
 878 static void
 879 alloc_gcse_mem (f)
 880      rtx f;
 881 {
 882   int i,n;
 883   rtx insn;
 884
 885   /* Find the largest UID and create a mapping from UIDs to CUIDs.
 886      CUIDs are like UIDs except they increase monotonically, have no gaps,
 887      and only apply to real insns.  */
 888
 889   max_uid = get_max_uid ();
 890   n = (max_uid + 1) * sizeof (int);
 891   uid_cuid = (int *) gmalloc (n);
 892   bzero ((char *) uid_cuid, n);
 893   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 894     {
 895       if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 896         uid_cuid[INSN_UID (insn)] = i++;
 897       else
 898         uid_cuid[INSN_UID (insn)] = i;
 899     }
 900
 901   /* Create a table mapping cuids to insns.  */
 902
 903   max_cuid = i;
 904   n = (max_cuid + 1) * sizeof (rtx);
 905   cuid_insn = (rtx *) gmalloc (n);
 906   bzero ((char *) cuid_insn, n);
 907   for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
 908     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
 909       CUID_INSN (i++) = insn;
 910
 911   /* Allocate vars to track sets of regs.  */
 912   reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
 913
 914   /* Allocate vars to track sets of regs, memory per block.  */
 915   reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
 916                                                        max_gcse_regno);
 917   mem_set_in_block = (char *) gmalloc (n_basic_blocks);
 918 }
 919
 920 /* Free memory allocated by alloc_gcse_mem.  */
 921
 922 static void
 923 free_gcse_mem ()
 924 {
 925   free (uid_cuid);
 926   free (cuid_insn);
 927
 928   free (reg_set_bitmap);
 929
 930   free (reg_set_in_block);
 931   free (mem_set_in_block);
 932 }
 933
 934 /* Many of the global optimization algorithms work by solving dataflow
 935    equations for various expressions.  Initially, some local value is
 936    computed for each expression in each block.  Then, the values across the
 937    various blocks are combined (by following flow graph edges) to arrive at
 938    global values.  Conceptually, each set of equations is independent.  We
 939    may therefore solve all the equations in parallel, solve them one at a
 940    time, or pick any intermediate approach.
 941
 942    When you're going to need N two-dimensional bitmaps, each X (say, the
 943    number of blocks) by Y (say, the number of expressions), call this
 944    function.  It's not important what X and Y represent; only that Y
 945    correspond to the things that can be done in parallel.  This function will
 946    return an appropriate chunking factor C; you should solve C sets of
 947    equations in parallel.  By going through this function, we can easily
 948    trade space against time; by solving fewer equations in parallel we use
 949    less space.  */
 950
 951 static int
 952 get_bitmap_width (n, x, y)
 953      int n;
 954      int x;
 955      int y;
 956 {
 957   /* It's not really worth figuring out *exactly* how much memory will
 958      be used by a particular choice.  The important thing is to get
 959      something approximately right.  */
 960   size_t max_bitmap_memory = 10 * 1024 * 1024;
 961
 962   /* The number of bytes we'd use for a single column of minimum
 963      width.  */
 964   size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
 965
 966   /* Often, it's reasonable just to solve all the equations in
 967      parallel.  */
 968   if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
 969     return y;
 970
 971   /* Otherwise, pick the largest width we can, without going over the
 972      limit.  */
 973   return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
 974                              / column_size);
 975 }
 976 \f
 977 /* Compute the local properties of each recorded expression.
 978
 979    Local properties are those that are defined by the block, irrespective of
 980    other blocks.
 981
 982    An expression is transparent in a block if its operands are not modified
 983    in the block.
 984
 985    An expression is computed (locally available) in a block if it is computed
 986    at least once and expression would contain the same value if the
 987    computation was moved to the end of the block.
 988
 989    An expression is locally anticipatable in a block if it is computed at
 990    least once and expression would contain the same value if the computation
 991    was moved to the beginning of the block.
 992
 993    We call this routine for cprop, pre and code hoisting.  They all compute
 994    basically the same information and thus can easily share this code.
 995
 996    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 997    properties.  If NULL, then it is not necessary to compute or record that
 998    particular property.
 999
1000    SETP controls which hash table to look at.  If zero, this routine looks at
1001    the expr hash table; if nonzero this routine looks at the set hash table.
1002    Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
1003    ABSALTERED.  */
1004
1005 static void
1006 compute_local_properties (transp, comp, antloc, setp)
1007      sbitmap *transp;
1008      sbitmap *comp;
1009      sbitmap *antloc;
1010      int setp;
1011 {
1012   unsigned int i, hash_table_size;
1013   struct expr **hash_table;
1014
1015   /* Initialize any bitmaps that were passed in.  */
1016   if (transp)
1017     {
1018       if (setp)
1019         sbitmap_vector_zero (transp, n_basic_blocks);
1020       else
1021         sbitmap_vector_ones (transp, n_basic_blocks);
1022     }
1023
1024   if (comp)
1025     sbitmap_vector_zero (comp, n_basic_blocks);
1026   if (antloc)
1027     sbitmap_vector_zero (antloc, n_basic_blocks);
1028
1029   /* We use the same code for cprop, pre and hoisting.  For cprop
1030      we care about the set hash table, for pre and hoisting we
1031      care about the expr hash table.  */
1032   hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1033   hash_table = setp ? set_hash_table : expr_hash_table;
1034
1035   for (i = 0; i < hash_table_size; i++)
1036     {
1037       struct expr *expr;
1038
1039       for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1040         {
1041           int indx = expr->bitmap_index;
1042           struct occr *occr;
1043
1044           /* The expression is transparent in this block if it is not killed.
1045              We start by assuming all are transparent [none are killed], and
1046              then reset the bits for those that are.  */
1047           if (transp)
1048             compute_transp (expr->expr, indx, transp, setp);
1049
1050           /* The occurrences recorded in antic_occr are exactly those that
1051              we want to set to non-zero in ANTLOC.  */
1052           if (antloc)
1053             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1054               {
1055                 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx);
1056
1057                 /* While we're scanning the table, this is a good place to
1058                    initialize this.  */
1059                 occr->deleted_p = 0;
1060               }
1061
1062           /* The occurrences recorded in avail_occr are exactly those that
1063              we want to set to non-zero in COMP.  */
1064           if (comp)
1065             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1066               {
1067                 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx);
1068
1069                 /* While we're scanning the table, this is a good place to
1070                    initialize this.  */
1071                 occr->copied_p = 0;
1072               }
1073
1074           /* While we're scanning the table, this is a good place to
1075              initialize this.  */
1076           expr->reaching_reg = 0;
1077         }
1078     }
1079 }
1080 \f
1081 /* Register set information.
1082
1083    `reg_set_table' records where each register is set or otherwise
1084    modified.  */
1085
1086 static struct obstack reg_set_obstack;
1087
1088 static void
1089 alloc_reg_set_mem (n_regs)
1090      int n_regs;
1091 {
1092   unsigned int n;
1093
1094   reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1095   n = reg_set_table_size * sizeof (struct reg_set *);
1096   reg_set_table = (struct reg_set **) gmalloc (n);
1097   bzero ((char *) reg_set_table, n);
1098
1099   gcc_obstack_init (&reg_set_obstack);
1100 }
1101
1102 static void
1103 free_reg_set_mem ()
1104 {
1105   free (reg_set_table);
1106   obstack_free (&reg_set_obstack, NULL_PTR);
1107 }
1108
1109 /* Record REGNO in the reg_set table.  */
1110
1111 static void
1112 record_one_set (regno, insn)
1113      int regno;
1114      rtx insn;
1115 {
1116   /* allocate a new reg_set element and link it onto the list */
1117   struct reg_set *new_reg_info;
1118
1119   /* If the table isn't big enough, enlarge it.  */
1120   if (regno >= reg_set_table_size)
1121     {
1122       int new_size = regno + REG_SET_TABLE_SLOP;
1123
1124       reg_set_table
1125         = (struct reg_set **) grealloc ((char *) reg_set_table,
1126                                         new_size * sizeof (struct reg_set *));
1127       bzero ((char *) (reg_set_table + reg_set_table_size),
1128              (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1129       reg_set_table_size = new_size;
1130     }
1131
1132   new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1133                                                    sizeof (struct reg_set));
1134   bytes_used += sizeof (struct reg_set);
1135   new_reg_info->insn = insn;
1136   new_reg_info->next = reg_set_table[regno];
1137   reg_set_table[regno] = new_reg_info;
1138 }
1139
1140 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
1141    an insn.  The DATA is really the instruction in which the SET is
1142    occurring.  */
1143
1144 static void
1145 record_set_info (dest, setter, data)
1146      rtx dest, setter ATTRIBUTE_UNUSED;
1147      void *data;
1148 {
1149   rtx record_set_insn = (rtx) data;
1150
1151   if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1152     record_one_set (REGNO (dest), record_set_insn);
1153 }
1154
1155 /* Scan the function and record each set of each pseudo-register.
1156
1157    This is called once, at the start of the gcse pass.  See the comments for
1158    `reg_set_table' for further documenation.  */
1159
1160 static void
1161 compute_sets (f)
1162      rtx f;
1163 {
1164   rtx insn;
1165
1166   for (insn = f; insn != 0; insn = NEXT_INSN (insn))
1167     if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1168       note_stores (PATTERN (insn), record_set_info, insn);
1169 }
1170 \f
1171 /* Hash table support.  */
1172
1173 /* For each register, the cuid of the first/last insn in the block to set it,
1174    or -1 if not set.  */
1175 #define NEVER_SET -1
1176 static int *reg_first_set;
1177 static int *reg_last_set;
1178
1179 /* While computing "first/last set" info, this is the CUID of first/last insn
1180    to set memory or -1 if not set.  `mem_last_set' is also used when
1181    performing GCSE to record whether memory has been set since the beginning
1182    of the block.
1183
1184    Note that handling of memory is very simple, we don't make any attempt
1185    to optimize things (later).  */
1186 static int mem_first_set;
1187 static int mem_last_set;
1188
1189 /* Perform a quick check whether X, the source of a set, is something
1190    we want to consider for GCSE.  */
1191
1192 static int
1193 want_to_gcse_p (x)
1194      rtx x;
1195 {
1196   switch (GET_CODE (x))
1197     {
1198     case REG:
1199     case SUBREG:
1200     case CONST_INT:
1201     case CONST_DOUBLE:
1202     case CALL:
1203       return 0;
1204
1205     default:
1206       break;
1207     }
1208
1209   return 1;
1210 }
1211
1212 /* Return non-zero if the operands of expression X are unchanged from the
1213    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1214    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
1215
1216 static int
1217 oprs_unchanged_p (x, insn, avail_p)
1218      rtx x, insn;
1219      int avail_p;
1220 {
1221   int i, j;
1222   enum rtx_code code;
1223   const char *fmt;
1224
1225   if (x == 0)
1226     return 1;
1227
1228   code = GET_CODE (x);
1229   switch (code)
1230     {
1231     case REG:
1232       if (avail_p)
1233         return (reg_last_set[REGNO (x)] == NEVER_SET
1234                 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1235       else
1236         return (reg_first_set[REGNO (x)] == NEVER_SET
1237                 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1238
1239     case MEM:
1240       if (avail_p && mem_last_set != NEVER_SET
1241           && mem_last_set >= INSN_CUID (insn))
1242         return 0;
1243       else if (! avail_p && mem_first_set != NEVER_SET
1244                && mem_first_set < INSN_CUID (insn))
1245         return 0;
1246       else
1247         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
1248
1249     case PRE_DEC:
1250     case PRE_INC:
1251     case POST_DEC:
1252     case POST_INC:
1253       return 0;
1254
1255     case PC:
1256     case CC0: /*FIXME*/
1257     case CONST:
1258     case CONST_INT:
1259     case CONST_DOUBLE:
1260     case SYMBOL_REF:
1261     case LABEL_REF:
1262     case ADDR_VEC:
1263     case ADDR_DIFF_VEC:
1264       return 1;
1265
1266     default:
1267       break;
1268     }
1269
1270   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1271     {
1272       if (fmt[i] == 'e')
1273         {
1274           /* If we are about to do the last recursive call needed at this
1275              level, change it into iteration.  This function is called enough
1276              to be worth it.  */
1277           if (i == 0)
1278             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
1279
1280           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
1281             return 0;
1282         }
1283       else if (fmt[i] == 'E')
1284         for (j = 0; j < XVECLEN (x, i); j++)
1285           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1286             return 0;
1287     }
1288
1289   return 1;
1290 }
1291
1292 /* Return non-zero if the operands of expression X are unchanged from
1293    the start of INSN's basic block up to but not including INSN.  */
1294
1295 static int
1296 oprs_anticipatable_p (x, insn)
1297      rtx x, insn;
1298 {
1299   return oprs_unchanged_p (x, insn, 0);
1300 }
1301
1302 /* Return non-zero if the operands of expression X are unchanged from
1303    INSN to the end of INSN's basic block.  */
1304
1305 static int
1306 oprs_available_p (x, insn)
1307      rtx x, insn;
1308 {
1309   return oprs_unchanged_p (x, insn, 1);
1310 }
1311
1312 /* Hash expression X.
1313
1314    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1315    indicating if a volatile operand is found or if the expression contains
1316    something we don't want to insert in the table.
1317
1318    ??? One might want to merge this with canon_hash.  Later.  */
1319
1320 static unsigned int
1321 hash_expr (x, mode, do_not_record_p, hash_table_size)
1322      rtx x;
1323      enum machine_mode mode;
1324      int *do_not_record_p;
1325      int hash_table_size;
1326 {
1327   unsigned int hash;
1328
1329   *do_not_record_p = 0;
1330
1331   hash = hash_expr_1 (x, mode, do_not_record_p);
1332   return hash % hash_table_size;
1333 }
1334
1335 /* Subroutine of hash_expr to do the actual work.  */
1336
1337 static unsigned int
1338 hash_expr_1 (x, mode, do_not_record_p)
1339      rtx x;
1340      enum machine_mode mode;
1341      int *do_not_record_p;
1342 {
1343   int i, j;
1344   unsigned hash = 0;
1345   enum rtx_code code;
1346   const char *fmt;
1347
1348   /* Used to turn recursion into iteration.  We can't rely on GCC's
1349      tail-recursion eliminatio since we need to keep accumulating values
1350      in HASH.  */
1351
1352   if (x == 0)
1353     return hash;
1354
1355  repeat:
1356   code = GET_CODE (x);
1357   switch (code)
1358     {
1359     case REG:
1360       hash += ((unsigned int) REG << 7) + REGNO (x);
1361       return hash;
1362
1363     case CONST_INT:
1364       hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
1365                + (unsigned int) INTVAL (x));
1366       return hash;
1367
1368     case CONST_DOUBLE:
1369       /* This is like the general case, except that it only counts
1370          the integers representing the constant.  */
1371       hash += (unsigned int) code + (unsigned int) GET_MODE (x);
1372       if (GET_MODE (x) != VOIDmode)
1373         for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1374           hash += (unsigned int) XWINT (x, i);
1375       else
1376         hash += ((unsigned int) CONST_DOUBLE_LOW (x)
1377                  + (unsigned int) CONST_DOUBLE_HIGH (x));
1378       return hash;
1379
1380       /* Assume there is only one rtx object for any given label.  */
1381     case LABEL_REF:
1382       /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1383          differences and differences between each stage's debugging dumps.  */
1384       hash += (((unsigned int) LABEL_REF << 7)
1385                + CODE_LABEL_NUMBER (XEXP (x, 0)));
1386       return hash;
1387
1388     case SYMBOL_REF:
1389       {
1390         /* Don't hash on the symbol's address to avoid bootstrap differences.
1391            Different hash values may cause expressions to be recorded in
1392            different orders and thus different registers to be used in the
1393            final assembler.  This also avoids differences in the dump files
1394            between various stages.  */
1395         unsigned int h = 0;
1396         const unsigned char *p = (const unsigned char *) XSTR (x, 0);
1397
1398         while (*p)
1399           h += (h << 7) + *p++; /* ??? revisit */
1400
1401         hash += ((unsigned int) SYMBOL_REF << 7) + h;
1402         return hash;
1403       }
1404
1405     case MEM:
1406       if (MEM_VOLATILE_P (x))
1407         {
1408           *do_not_record_p = 1;
1409           return 0;
1410         }
1411
1412       hash += (unsigned int) MEM;
1413       hash += MEM_ALIAS_SET (x);
1414       x = XEXP (x, 0);
1415       goto repeat;
1416
1417     case PRE_DEC:
1418     case PRE_INC:
1419     case POST_DEC:
1420     case POST_INC:
1421     case PC:
1422     case CC0:
1423     case CALL:
1424     case UNSPEC_VOLATILE:
1425       *do_not_record_p = 1;
1426       return 0;
1427
1428     case ASM_OPERANDS:
1429       if (MEM_VOLATILE_P (x))
1430         {
1431           *do_not_record_p = 1;
1432           return 0;
1433         }
1434
1435     default:
1436       break;
1437     }
1438
1439   hash += (unsigned) code + (unsigned) GET_MODE (x);
1440   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1441     {
1442       if (fmt[i] == 'e')
1443         {
1444           /* If we are about to do the last recursive call
1445              needed at this level, change it into iteration.
1446              This function is called enough to be worth it.  */
1447           if (i == 0)
1448             {
1449               x = XEXP (x, i);
1450               goto repeat;
1451             }
1452
1453           hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p);
1454           if (*do_not_record_p)
1455             return 0;
1456         }
1457
1458       else if (fmt[i] == 'E')
1459         for (j = 0; j < XVECLEN (x, i); j++)
1460           {
1461             hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1462             if (*do_not_record_p)
1463               return 0;
1464           }
1465
1466       else if (fmt[i] == 's')
1467         {
1468           register const unsigned char *p =
1469             (const unsigned char *) XSTR (x, i);
1470
1471           if (p)
1472             while (*p)
1473               hash += *p++;
1474         }
1475       else if (fmt[i] == 'i')
1476         hash += (unsigned int) XINT (x, i);
1477       else
1478         abort ();
1479     }
1480
1481   return hash;
1482 }
1483
1484 /* Hash a set of register REGNO.
1485
1486    Sets are hashed on the register that is set.  This simplifies the PRE copy
1487    propagation code.
1488
1489    ??? May need to make things more elaborate.  Later, as necessary.  */
1490
1491 static unsigned int
1492 hash_set (regno, hash_table_size)
1493      int regno;
1494      int hash_table_size;
1495 {
1496   unsigned int hash;
1497
1498   hash = regno;
1499   return hash % hash_table_size;
1500 }
1501
1502 /* Return non-zero if exp1 is equivalent to exp2.
1503    ??? Borrowed from cse.c.  Might want to remerge with cse.c.  Later.  */
1504
1505 static int
1506 expr_equiv_p (x, y)
1507      rtx x, y;
1508 {
1509   register int i, j;
1510   register enum rtx_code code;
1511   register const char *fmt;
1512
1513   if (x == y)
1514     return 1;
1515
1516   if (x == 0 || y == 0)
1517     return x == y;
1518
1519   code = GET_CODE (x);
1520   if (code != GET_CODE (y))
1521     return 0;
1522
1523   /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.  */
1524   if (GET_MODE (x) != GET_MODE (y))
1525     return 0;
1526
1527   switch (code)
1528     {
1529     case PC:
1530     case CC0:
1531       return x == y;
1532
1533     case CONST_INT:
1534       return INTVAL (x) == INTVAL (y);
1535
1536     case LABEL_REF:
1537       return XEXP (x, 0) == XEXP (y, 0);
1538
1539     case SYMBOL_REF:
1540       return XSTR (x, 0) == XSTR (y, 0);
1541
1542     case REG:
1543       return REGNO (x) == REGNO (y);
1544
1545     case MEM:
1546       /* Can't merge two expressions in different alias sets, since we can
1547          decide that the expression is transparent in a block when it isn't,
1548          due to it being set with the different alias set.  */
1549       if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1550         return 0;
1551       break;
1552
1553     /*  For commutative operations, check both orders.  */
1554     case PLUS:
1555     case MULT:
1556     case AND:
1557     case IOR:
1558     case XOR:
1559     case NE:
1560     case EQ:
1561       return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1562                && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1563               || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1564                   && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1565
1566     default:
1567       break;
1568     }
1569
1570   /* Compare the elements.  If any pair of corresponding elements
1571      fail to match, return 0 for the whole thing.  */
1572
1573   fmt = GET_RTX_FORMAT (code);
1574   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1575     {
1576       switch (fmt[i])
1577         {
1578         case 'e':
1579           if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1580             return 0;
1581           break;
1582
1583         case 'E':
1584           if (XVECLEN (x, i) != XVECLEN (y, i))
1585             return 0;
1586           for (j = 0; j < XVECLEN (x, i); j++)
1587             if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1588               return 0;
1589           break;
1590
1591         case 's':
1592           if (strcmp (XSTR (x, i), XSTR (y, i)))
1593             return 0;
1594           break;
1595
1596         case 'i':
1597           if (XINT (x, i) != XINT (y, i))
1598             return 0;
1599           break;
1600
1601         case 'w':
1602           if (XWINT (x, i) != XWINT (y, i))
1603             return 0;
1604         break;
1605
1606         case '0':
1607           break;
1608
1609         default:
1610           abort ();
1611         }
1612       }
1613
1614   return 1;
1615 }
1616
1617 /* Insert expression X in INSN in the hash table.
1618    If it is already present, record it as the last occurrence in INSN's
1619    basic block.
1620
1621    MODE is the mode of the value X is being stored into.
1622    It is only used if X is a CONST_INT.
1623
1624    ANTIC_P is non-zero if X is an anticipatable expression.
1625    AVAIL_P is non-zero if X is an available expression.  */
1626
1627 static void
1628 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1629      rtx x;
1630      enum machine_mode mode;
1631      rtx insn;
1632      int antic_p, avail_p;
1633 {
1634   int found, do_not_record_p;
1635   unsigned int hash;
1636   struct expr *cur_expr, *last_expr = NULL;
1637   struct occr *antic_occr, *avail_occr;
1638   struct occr *last_occr = NULL;
1639
1640   hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1641
1642   /* Do not insert expression in table if it contains volatile operands,
1643      or if hash_expr determines the expression is something we don't want
1644      to or can't handle.  */
1645   if (do_not_record_p)
1646     return;
1647
1648   cur_expr = expr_hash_table[hash];
1649   found = 0;
1650
1651   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1652     {
1653       /* If the expression isn't found, save a pointer to the end of
1654          the list.  */
1655       last_expr = cur_expr;
1656       cur_expr = cur_expr->next_same_hash;
1657     }
1658
1659   if (! found)
1660     {
1661       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1662       bytes_used += sizeof (struct expr);
1663       if (expr_hash_table[hash] == NULL)
1664         /* This is the first pattern that hashed to this index.  */
1665         expr_hash_table[hash] = cur_expr;
1666       else
1667         /* Add EXPR to end of this hash chain.  */
1668         last_expr->next_same_hash = cur_expr;
1669
1670       /* Set the fields of the expr element.  */
1671       cur_expr->expr = x;
1672       cur_expr->bitmap_index = n_exprs++;
1673       cur_expr->next_same_hash = NULL;
1674       cur_expr->antic_occr = NULL;
1675       cur_expr->avail_occr = NULL;
1676     }
1677
1678   /* Now record the occurrence(s).  */
1679   if (antic_p)
1680     {
1681       antic_occr = cur_expr->antic_occr;
1682
1683       /* Search for another occurrence in the same basic block.  */
1684       while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1685         {
1686           /* If an occurrence isn't found, save a pointer to the end of
1687              the list.  */
1688           last_occr = antic_occr;
1689           antic_occr = antic_occr->next;
1690         }
1691
1692       if (antic_occr)
1693         /* Found another instance of the expression in the same basic block.
1694            Prefer the currently recorded one.  We want the first one in the
1695            block and the block is scanned from start to end.  */
1696         ; /* nothing to do */
1697       else
1698         {
1699           /* First occurrence of this expression in this basic block.  */
1700           antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1701           bytes_used += sizeof (struct occr);
1702           /* First occurrence of this expression in any block?  */
1703           if (cur_expr->antic_occr == NULL)
1704             cur_expr->antic_occr = antic_occr;
1705           else
1706             last_occr->next = antic_occr;
1707
1708           antic_occr->insn = insn;
1709           antic_occr->next = NULL;
1710         }
1711     }
1712
1713   if (avail_p)
1714     {
1715       avail_occr = cur_expr->avail_occr;
1716
1717       /* Search for another occurrence in the same basic block.  */
1718       while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1719         {
1720           /* If an occurrence isn't found, save a pointer to the end of
1721              the list.  */
1722           last_occr = avail_occr;
1723           avail_occr = avail_occr->next;
1724         }
1725
1726       if (avail_occr)
1727         /* Found another instance of the expression in the same basic block.
1728            Prefer this occurrence to the currently recorded one.  We want
1729            the last one in the block and the block is scanned from start
1730            to end.  */
1731         avail_occr->insn = insn;
1732       else
1733         {
1734           /* First occurrence of this expression in this basic block.  */
1735           avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1736           bytes_used += sizeof (struct occr);
1737
1738           /* First occurrence of this expression in any block?  */
1739           if (cur_expr->avail_occr == NULL)
1740             cur_expr->avail_occr = avail_occr;
1741           else
1742             last_occr->next = avail_occr;
1743
1744           avail_occr->insn = insn;
1745           avail_occr->next = NULL;
1746         }
1747     }
1748 }
1749
1750 /* Insert pattern X in INSN in the hash table.
1751    X is a SET of a reg to either another reg or a constant.
1752    If it is already present, record it as the last occurrence in INSN's
1753    basic block.  */
1754
1755 static void
1756 insert_set_in_table (x, insn)
1757      rtx x;
1758      rtx insn;
1759 {
1760   int found;
1761   unsigned int hash;
1762   struct expr *cur_expr, *last_expr = NULL;
1763   struct occr *cur_occr, *last_occr = NULL;
1764
1765   if (GET_CODE (x) != SET
1766       || GET_CODE (SET_DEST (x)) != REG)
1767     abort ();
1768
1769   hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1770
1771   cur_expr = set_hash_table[hash];
1772   found = 0;
1773
1774   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1775     {
1776       /* If the expression isn't found, save a pointer to the end of
1777          the list.  */
1778       last_expr = cur_expr;
1779       cur_expr = cur_expr->next_same_hash;
1780     }
1781
1782   if (! found)
1783     {
1784       cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1785       bytes_used += sizeof (struct expr);
1786       if (set_hash_table[hash] == NULL)
1787         /* This is the first pattern that hashed to this index.  */
1788         set_hash_table[hash] = cur_expr;
1789       else
1790         /* Add EXPR to end of this hash chain.  */
1791         last_expr->next_same_hash = cur_expr;
1792
1793       /* Set the fields of the expr element.
1794          We must copy X because it can be modified when copy propagation is
1795          performed on its operands.  */
1796       /* ??? Should this go in a different obstack?  */
1797       cur_expr->expr = copy_rtx (x);
1798       cur_expr->bitmap_index = n_sets++;
1799       cur_expr->next_same_hash = NULL;
1800       cur_expr->antic_occr = NULL;
1801       cur_expr->avail_occr = NULL;
1802     }
1803
1804   /* Now record the occurrence.  */
1805   cur_occr = cur_expr->avail_occr;
1806
1807   /* Search for another occurrence in the same basic block.  */
1808   while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1809     {
1810       /* If an occurrence isn't found, save a pointer to the end of
1811          the list.  */
1812       last_occr = cur_occr;
1813       cur_occr = cur_occr->next;
1814     }
1815
1816   if (cur_occr)
1817     /* Found another instance of the expression in the same basic block.
1818        Prefer this occurrence to the currently recorded one.  We want the
1819        last one in the block and the block is scanned from start to end.  */
1820     cur_occr->insn = insn;
1821   else
1822     {
1823       /* First occurrence of this expression in this basic block.  */
1824       cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1825       bytes_used += sizeof (struct occr);
1826
1827       /* First occurrence of this expression in any block?  */
1828       if (cur_expr->avail_occr == NULL)
1829         cur_expr->avail_occr = cur_occr;
1830       else
1831         last_occr->next = cur_occr;
1832
1833       cur_occr->insn = insn;
1834       cur_occr->next = NULL;
1835     }
1836 }
1837
1838 /* Scan pattern PAT of INSN and add an entry to the hash table.  If SET_P is
1839    non-zero, this is for the assignment hash table, otherwise it is for the
1840    expression hash table.  */
1841
1842 static void
1843 hash_scan_set (pat, insn, set_p)
1844      rtx pat, insn;
1845      int set_p;
1846 {
1847   rtx src = SET_SRC (pat);
1848   rtx dest = SET_DEST (pat);
1849
1850   if (GET_CODE (src) == CALL)
1851     hash_scan_call (src, insn);
1852
1853   if (GET_CODE (dest) == REG)
1854     {
1855       int regno = REGNO (dest);
1856       rtx tmp;
1857
1858       /* Only record sets of pseudo-regs in the hash table.  */
1859       if (! set_p
1860           && regno >= FIRST_PSEUDO_REGISTER
1861           /* Don't GCSE something if we can't do a reg/reg copy.  */
1862           && can_copy_p [GET_MODE (dest)]
1863           /* Is SET_SRC something we want to gcse?  */
1864           && want_to_gcse_p (src))
1865         {
1866           /* An expression is not anticipatable if its operands are
1867              modified before this insn.  */
1868           int antic_p = oprs_anticipatable_p (src, insn);
1869           /* An expression is not available if its operands are
1870              subsequently modified, including this insn.  */
1871           int avail_p = oprs_available_p (src, insn);
1872
1873           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1874         }
1875
1876       /* Record sets for constant/copy propagation.  */
1877       else if (set_p
1878                && regno >= FIRST_PSEUDO_REGISTER
1879                && ((GET_CODE (src) == REG
1880                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1881                     && can_copy_p [GET_MODE (dest)])
1882                    || GET_CODE (src) == CONST_INT
1883                    || GET_CODE (src) == SYMBOL_REF
1884                    || GET_CODE (src) == CONST_DOUBLE)
1885                /* A copy is not available if its src or dest is subsequently
1886                   modified.  Here we want to search from INSN+1 on, but
1887                   oprs_available_p searches from INSN on.  */
1888                && (insn == BLOCK_END (BLOCK_NUM (insn))
1889                    || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1890                        && oprs_available_p (pat, tmp))))
1891         insert_set_in_table (pat, insn);
1892     }
1893 }
1894
1895 static void
1896 hash_scan_clobber (x, insn)
1897      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1898 {
1899   /* Currently nothing to do.  */
1900 }
1901
1902 static void
1903 hash_scan_call (x, insn)
1904      rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1905 {
1906   /* Currently nothing to do.  */
1907 }
1908
1909 /* Process INSN and add hash table entries as appropriate.
1910
1911    Only available expressions that set a single pseudo-reg are recorded.
1912
1913    Single sets in a PARALLEL could be handled, but it's an extra complication
1914    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1915    are also in the PARALLEL.  Later.
1916
1917    If SET_P is non-zero, this is for the assignment hash table,
1918    otherwise it is for the expression hash table.
1919    If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1920    not record any expressions.  */
1921
1922 static void
1923 hash_scan_insn (insn, set_p, in_libcall_block)
1924      rtx insn;
1925      int set_p;
1926      int in_libcall_block;
1927 {
1928   rtx pat = PATTERN (insn);
1929   int i;
1930
1931   /* Pick out the sets of INSN and for other forms of instructions record
1932      what's been modified.  */
1933
1934   if (GET_CODE (pat) == SET && ! in_libcall_block)
1935     {
1936       /* Ignore obvious no-ops.  */
1937       if (SET_SRC (pat) != SET_DEST (pat))
1938         hash_scan_set (pat, insn, set_p);
1939     }
1940   else if (GET_CODE (pat) == PARALLEL)
1941     for (i = 0; i < XVECLEN (pat, 0); i++)
1942       {
1943         rtx x = XVECEXP (pat, 0, i);
1944
1945         if (GET_CODE (x) == SET)
1946           {
1947             if (GET_CODE (SET_SRC (x)) == CALL)
1948               hash_scan_call (SET_SRC (x), insn);
1949           }
1950         else if (GET_CODE (x) == CLOBBER)
1951           hash_scan_clobber (x, insn);
1952         else if (GET_CODE (x) == CALL)
1953           hash_scan_call (x, insn);
1954       }
1955
1956   else if (GET_CODE (pat) == CLOBBER)
1957     hash_scan_clobber (pat, insn);
1958   else if (GET_CODE (pat) == CALL)
1959     hash_scan_call (pat, insn);
1960 }
1961
1962 static void
1963 dump_hash_table (file, name, table, table_size, total_size)
1964      FILE *file;
1965      const char *name;
1966      struct expr **table;
1967      int table_size, total_size;
1968 {
1969   int i;
1970   /* Flattened out table, so it's printed in proper order.  */
1971   struct expr **flat_table;
1972   unsigned int *hash_val;
1973   struct expr *expr;
1974
1975   flat_table
1976     = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
1977   hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
1978
1979   for (i = 0; i < table_size; i++)
1980     for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1981       {
1982         flat_table[expr->bitmap_index] = expr;
1983         hash_val[expr->bitmap_index] = i;
1984       }
1985
1986   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1987            name, table_size, total_size);
1988
1989   for (i = 0; i < total_size; i++)
1990     if (flat_table[i] != 0)
1991       {
1992         expr = flat_table[i];
1993         fprintf (file, "Index %d (hash value %d)\n  ",
1994                  expr->bitmap_index, hash_val[i]);
1995         print_rtl (file, expr->expr);
1996         fprintf (file, "\n");
1997       }
1998
1999   fprintf (file, "\n");
2000
2001   free (flat_table);
2002   free (hash_val);
2003 }
2004
2005 /* Record register first/last/block set information for REGNO in INSN.
2006
2007    reg_first_set records the first place in the block where the register
2008    is set and is used to compute "anticipatability".
2009
2010    reg_last_set records the last place in the block where the register
2011    is set and is used to compute "availability".
2012
2013    reg_set_in_block records whether the register is set in the block
2014    and is used to compute "transparency".  */
2015
2016 static void
2017 record_last_reg_set_info (insn, regno)
2018      rtx insn;
2019      int regno;
2020 {
2021   if (reg_first_set[regno] == NEVER_SET)
2022     reg_first_set[regno] = INSN_CUID (insn);
2023
2024   reg_last_set[regno] = INSN_CUID (insn);
2025   SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2026 }
2027
2028 /* Record memory first/last/block set information for INSN.  */
2029
2030 static void
2031 record_last_mem_set_info (insn)
2032      rtx insn;
2033 {
2034   if (mem_first_set == NEVER_SET)
2035     mem_first_set = INSN_CUID (insn);
2036
2037   mem_last_set = INSN_CUID (insn);
2038   mem_set_in_block[BLOCK_NUM (insn)] = 1;
2039 }
2040
2041 /* Called from compute_hash_table via note_stores to handle one
2042    SET or CLOBBER in an insn.  DATA is really the instruction in which
2043    the SET is taking place.  */
2044
2045 static void
2046 record_last_set_info (dest, setter, data)
2047      rtx dest, setter ATTRIBUTE_UNUSED;
2048      void *data;
2049 {
2050   rtx last_set_insn = (rtx) data;
2051
2052   if (GET_CODE (dest) == SUBREG)
2053     dest = SUBREG_REG (dest);
2054
2055   if (GET_CODE (dest) == REG)
2056     record_last_reg_set_info (last_set_insn, REGNO (dest));
2057   else if (GET_CODE (dest) == MEM
2058            /* Ignore pushes, they clobber nothing.  */
2059            && ! push_operand (dest, GET_MODE (dest)))
2060     record_last_mem_set_info (last_set_insn);
2061 }
2062
2063 /* Top level function to create an expression or assignment hash table.
2064
2065    Expression entries are placed in the hash table if
2066    - they are of the form (set (pseudo-reg) src),
2067    - src is something we want to perform GCSE on,
2068    - none of the operands are subsequently modified in the block
2069
2070    Assignment entries are placed in the hash table if
2071    - they are of the form (set (pseudo-reg) src),
2072    - src is something we want to perform const/copy propagation on,
2073    - none of the operands or target are subsequently modified in the block
2074
2075    Currently src must be a pseudo-reg or a const_int.
2076
2077    F is the first insn.
2078    SET_P is non-zero for computing the assignment hash table.  */
2079
2080 static void
2081 compute_hash_table (set_p)
2082      int set_p;
2083 {
2084   int bb;
2085
2086   /* While we compute the hash table we also compute a bit array of which
2087      registers are set in which blocks.
2088      We also compute which blocks set memory, in the absence of aliasing
2089      support [which is TODO].
2090      ??? This isn't needed during const/copy propagation, but it's cheap to
2091      compute.  Later.  */
2092   sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2093   bzero ((char *) mem_set_in_block, n_basic_blocks);
2094
2095   /* Some working arrays used to track first and last set in each block.  */
2096   /* ??? One could use alloca here, but at some size a threshold is crossed
2097      beyond which one should use malloc.  Are we at that threshold here?  */
2098   reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2099   reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2100
2101   for (bb = 0; bb < n_basic_blocks; bb++)
2102     {
2103       rtx insn;
2104       unsigned int regno;
2105       int in_libcall_block;
2106       unsigned int i;
2107
2108       /* First pass over the instructions records information used to
2109          determine when registers and memory are first and last set.
2110          ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2111          could be moved to compute_sets since they currently don't change.  */
2112
2113       for (i = 0; i < max_gcse_regno; i++)
2114         reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2115
2116       mem_first_set = NEVER_SET;
2117       mem_last_set = NEVER_SET;
2118
2119       for (insn = BLOCK_HEAD (bb);
2120            insn && insn != NEXT_INSN (BLOCK_END (bb));
2121            insn = NEXT_INSN (insn))
2122         {
2123 #ifdef NON_SAVING_SETJMP
2124           if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2125               && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2126             {
2127               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2128                 record_last_reg_set_info (insn, regno);
2129               continue;
2130             }
2131 #endif
2132
2133           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2134             continue;
2135
2136           if (GET_CODE (insn) == CALL_INSN)
2137             {
2138               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2139                 if ((call_used_regs[regno]
2140                      && regno != STACK_POINTER_REGNUM
2141 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2142                      && regno != HARD_FRAME_POINTER_REGNUM
2143 #endif
2144 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2145                      && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2146 #endif
2147 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2148                      && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2149 #endif
2150
2151                      && regno != FRAME_POINTER_REGNUM)
2152                     || global_regs[regno])
2153                   record_last_reg_set_info (insn, regno);
2154
2155               if (! CONST_CALL_P (insn))
2156                 record_last_mem_set_info (insn);
2157             }
2158
2159           note_stores (PATTERN (insn), record_last_set_info, insn);
2160         }
2161
2162       /* The next pass builds the hash table.  */
2163
2164       for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2165            insn && insn != NEXT_INSN (BLOCK_END (bb));
2166            insn = NEXT_INSN (insn))
2167         if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2168           {
2169             if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2170               in_libcall_block = 1;
2171             else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2172               in_libcall_block = 0;
2173             hash_scan_insn (insn, set_p, in_libcall_block);
2174         }
2175     }
2176
2177   free (reg_first_set);
2178   free (reg_last_set);
2179
2180   /* Catch bugs early.  */
2181   reg_first_set = reg_last_set = 0;
2182 }
2183
2184 /* Allocate space for the set hash table.
2185    N_INSNS is the number of instructions in the function.
2186    It is used to determine the number of buckets to use.  */
2187
2188 static void
2189 alloc_set_hash_table (n_insns)
2190      int n_insns;
2191 {
2192   int n;
2193
2194   set_hash_table_size = n_insns / 4;
2195   if (set_hash_table_size < 11)
2196     set_hash_table_size = 11;
2197
2198   /* Attempt to maintain efficient use of hash table.
2199      Making it an odd number is simplest for now.
2200      ??? Later take some measurements.  */
2201   set_hash_table_size |= 1;
2202   n = set_hash_table_size * sizeof (struct expr *);
2203   set_hash_table = (struct expr **) gmalloc (n);
2204 }
2205
2206 /* Free things allocated by alloc_set_hash_table.  */
2207
2208 static void
2209 free_set_hash_table ()
2210 {
2211   free (set_hash_table);
2212 }
2213
2214 /* Compute the hash table for doing copy/const propagation.  */
2215
2216 static void
2217 compute_set_hash_table ()
2218 {
2219   /* Initialize count of number of entries in hash table.  */
2220   n_sets = 0;
2221   bzero ((char *) set_hash_table,
2222          set_hash_table_size * sizeof (struct expr *));
2223
2224   compute_hash_table (1);
2225 }
2226
2227 /* Allocate space for the expression hash table.
2228    N_INSNS is the number of instructions in the function.
2229    It is used to determine the number of buckets to use.  */
2230
2231 static void
2232 alloc_expr_hash_table (n_insns)
2233      unsigned int n_insns;
2234 {
2235   int n;
2236
2237   expr_hash_table_size = n_insns / 2;
2238   /* Make sure the amount is usable.  */
2239   if (expr_hash_table_size < 11)
2240     expr_hash_table_size = 11;
2241
2242   /* Attempt to maintain efficient use of hash table.
2243      Making it an odd number is simplest for now.
2244      ??? Later take some measurements.  */
2245   expr_hash_table_size |= 1;
2246   n = expr_hash_table_size * sizeof (struct expr *);
2247   expr_hash_table = (struct expr **) gmalloc (n);
2248 }
2249
2250 /* Free things allocated by alloc_expr_hash_table.  */
2251
2252 static void
2253 free_expr_hash_table ()
2254 {
2255   free (expr_hash_table);
2256 }
2257
2258 /* Compute the hash table for doing GCSE.  */
2259
2260 static void
2261 compute_expr_hash_table ()
2262 {
2263   /* Initialize count of number of entries in hash table.  */
2264   n_exprs = 0;
2265   bzero ((char *) expr_hash_table,
2266          expr_hash_table_size * sizeof (struct expr *));
2267
2268   compute_hash_table (0);
2269 }
2270 \f
2271 /* Expression tracking support.  */
2272
2273 /* Lookup pattern PAT in the expression table.
2274    The result is a pointer to the table entry, or NULL if not found.  */
2275
2276 static struct expr *
2277 lookup_expr (pat)
2278      rtx pat;
2279 {
2280   int do_not_record_p;
2281   unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2282                                  expr_hash_table_size);
2283   struct expr *expr;
2284
2285   if (do_not_record_p)
2286     return NULL;
2287
2288   expr = expr_hash_table[hash];
2289
2290   while (expr && ! expr_equiv_p (expr->expr, pat))
2291     expr = expr->next_same_hash;
2292
2293   return expr;
2294 }
2295
2296 /* Lookup REGNO in the set table.  If PAT is non-NULL look for the entry that
2297    matches it, otherwise return the first entry for REGNO.  The result is a
2298    pointer to the table entry, or NULL if not found.  */
2299
2300 static struct expr *
2301 lookup_set (regno, pat)
2302      unsigned int regno;
2303      rtx pat;
2304 {
2305   unsigned int hash = hash_set (regno, set_hash_table_size);
2306   struct expr *expr;
2307
2308   expr = set_hash_table[hash];
2309
2310   if (pat)
2311     {
2312       while (expr && ! expr_equiv_p (expr->expr, pat))
2313         expr = expr->next_same_hash;
2314     }
2315   else
2316     {
2317       while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2318         expr = expr->next_same_hash;
2319     }
2320
2321   return expr;
2322 }
2323
2324 /* Return the next entry for REGNO in list EXPR.  */
2325
2326 static struct expr *
2327 next_set (regno, expr)
2328      unsigned int regno;
2329      struct expr *expr;
2330 {
2331   do
2332     expr = expr->next_same_hash;
2333   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2334
2335   return expr;
2336 }
2337
2338 /* Reset tables used to keep track of what's still available [since the
2339    start of the block].  */
2340
2341 static void
2342 reset_opr_set_tables ()
2343 {
2344   /* Maintain a bitmap of which regs have been set since beginning of
2345      the block.  */
2346   sbitmap_zero (reg_set_bitmap);
2347
2348   /* Also keep a record of the last instruction to modify memory.
2349      For now this is very trivial, we only record whether any memory
2350      location has been modified.  */
2351   mem_last_set = 0;
2352 }
2353
2354 /* Return non-zero if the operands of X are not set before INSN in
2355    INSN's basic block.  */
2356
2357 static int
2358 oprs_not_set_p (x, insn)
2359      rtx x, insn;
2360 {
2361   int i, j;
2362   enum rtx_code code;
2363   const char *fmt;
2364
2365   if (x == 0)
2366     return 1;
2367
2368   code = GET_CODE (x);
2369   switch (code)
2370     {
2371     case PC:
2372     case CC0:
2373     case CONST:
2374     case CONST_INT:
2375     case CONST_DOUBLE:
2376     case SYMBOL_REF:
2377     case LABEL_REF:
2378     case ADDR_VEC:
2379     case ADDR_DIFF_VEC:
2380       return 1;
2381
2382     case MEM:
2383       if (mem_last_set != 0)
2384         return 0;
2385       else
2386         return oprs_not_set_p (XEXP (x, 0), insn);
2387
2388     case REG:
2389       return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2390
2391     default:
2392       break;
2393     }
2394
2395   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2396     {
2397       if (fmt[i] == 'e')
2398         {
2399           /* If we are about to do the last recursive call
2400              needed at this level, change it into iteration.
2401              This function is called enough to be worth it.  */
2402           if (i == 0)
2403             return oprs_not_set_p (XEXP (x, i), insn);
2404
2405           if (! oprs_not_set_p (XEXP (x, i), insn))
2406             return 0;
2407         }
2408       else if (fmt[i] == 'E')
2409         for (j = 0; j < XVECLEN (x, i); j++)
2410           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
2411             return 0;
2412     }
2413
2414   return 1;
2415 }
2416
2417 /* Mark things set by a CALL.  */
2418
2419 static void
2420 mark_call (insn)
2421      rtx insn;
2422 {
2423   mem_last_set = INSN_CUID (insn);
2424 }
2425
2426 /* Mark things set by a SET.  */
2427
2428 static void
2429 mark_set (pat, insn)
2430      rtx pat, insn;
2431 {
2432   rtx dest = SET_DEST (pat);
2433
2434   while (GET_CODE (dest) == SUBREG
2435          || GET_CODE (dest) == ZERO_EXTRACT
2436          || GET_CODE (dest) == SIGN_EXTRACT
2437          || GET_CODE (dest) == STRICT_LOW_PART)
2438     dest = XEXP (dest, 0);
2439
2440   if (GET_CODE (dest) == REG)
2441     SET_BIT (reg_set_bitmap, REGNO (dest));
2442   else if (GET_CODE (dest) == MEM)
2443     mem_last_set = INSN_CUID (insn);
2444
2445   if (GET_CODE (SET_SRC (pat)) == CALL)
2446     mark_call (insn);
2447 }
2448
2449 /* Record things set by a CLOBBER.  */
2450
2451 static void
2452 mark_clobber (pat, insn)
2453      rtx pat, insn;
2454 {
2455   rtx clob = XEXP (pat, 0);
2456
2457   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2458     clob = XEXP (clob, 0);
2459
2460   if (GET_CODE (clob) == REG)
2461     SET_BIT (reg_set_bitmap, REGNO (clob));
2462   else
2463     mem_last_set = INSN_CUID (insn);
2464 }
2465
2466 /* Record things set by INSN.
2467    This data is used by oprs_not_set_p.  */
2468
2469 static void
2470 mark_oprs_set (insn)
2471      rtx insn;
2472 {
2473   rtx pat = PATTERN (insn);
2474   int i;
2475
2476   if (GET_CODE (pat) == SET)
2477     mark_set (pat, insn);
2478   else if (GET_CODE (pat) == PARALLEL)
2479     for (i = 0; i < XVECLEN (pat, 0); i++)
2480       {
2481         rtx x = XVECEXP (pat, 0, i);
2482
2483         if (GET_CODE (x) == SET)
2484           mark_set (x, insn);
2485         else if (GET_CODE (x) == CLOBBER)
2486           mark_clobber (x, insn);
2487         else if (GET_CODE (x) == CALL)
2488           mark_call (insn);
2489       }
2490
2491   else if (GET_CODE (pat) == CLOBBER)
2492     mark_clobber (pat, insn);
2493   else if (GET_CODE (pat) == CALL)
2494     mark_call (insn);
2495 }
2496
2497 \f
2498 /* Classic GCSE reaching definition support.  */
2499
2500 /* Allocate reaching def variables.  */
2501
2502 static void
2503 alloc_rd_mem (n_blocks, n_insns)
2504      int n_blocks, n_insns;
2505 {
2506   rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2507   sbitmap_vector_zero (rd_kill, n_basic_blocks);
2508
2509   rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2510   sbitmap_vector_zero (rd_gen, n_basic_blocks);
2511
2512   reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2513   sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2514
2515   rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2516   sbitmap_vector_zero (rd_out, n_basic_blocks);
2517 }
2518
2519 /* Free reaching def variables.  */
2520
2521 static void
2522 free_rd_mem ()
2523 {
2524   free (rd_kill);
2525   free (rd_gen);
2526   free (reaching_defs);
2527   free (rd_out);
2528 }
2529
2530 /* Add INSN to the kills of BB.  REGNO, set in BB, is killed by INSN.  */
2531
2532 static void
2533 handle_rd_kill_set (insn, regno, bb)
2534      rtx insn;
2535      int regno, bb;
2536 {
2537   struct reg_set *this_reg;
2538
2539   for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next)
2540     if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2541       SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2542 }
2543
2544 /* Compute the set of kill's for reaching definitions.  */
2545
2546 static void
2547 compute_kill_rd ()
2548 {
2549   int bb, cuid;
2550   int regno, i;
2551
2552   /* For each block
2553        For each set bit in `gen' of the block (i.e each insn which
2554            generates a definition in the block)
2555          Call the reg set by the insn corresponding to that bit regx
2556          Look at the linked list starting at reg_set_table[regx]
2557          For each setting of regx in the linked list, which is not in
2558              this block
2559            Set the bit in `kill' corresponding to that insn.   */
2560   for (bb = 0; bb < n_basic_blocks; bb++)
2561     for (cuid = 0; cuid < max_cuid; cuid++)
2562       if (TEST_BIT (rd_gen[bb], cuid))
2563         {
2564           rtx insn = CUID_INSN (cuid);
2565           rtx pat = PATTERN (insn);
2566
2567           if (GET_CODE (insn) == CALL_INSN)
2568             {
2569               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2570                 {
2571                   if ((call_used_regs[regno]
2572                        && regno != STACK_POINTER_REGNUM
2573 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2574                        && regno != HARD_FRAME_POINTER_REGNUM
2575 #endif
2576 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2577                        && ! (regno == ARG_POINTER_REGNUM
2578                              && fixed_regs[regno])
2579 #endif
2580 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2581                        && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2582 #endif
2583                        && regno != FRAME_POINTER_REGNUM)
2584                       || global_regs[regno])
2585                     handle_rd_kill_set (insn, regno, bb);
2586                 }
2587             }
2588
2589           if (GET_CODE (pat) == PARALLEL)
2590             {
2591               for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2592                 {
2593                   enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2594
2595                   if ((code == SET || code == CLOBBER)
2596                       && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2597                     handle_rd_kill_set (insn,
2598                                         REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2599                                         bb);
2600                 }
2601             }
2602           else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG)
2603             /* Each setting of this register outside of this block
2604                must be marked in the set of kills in this block.  */
2605             handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2606         }
2607 }
2608
2609 /* Compute the reaching definitions as in
2610    Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2611    Chapter 10.  It is the same algorithm as used for computing available
2612    expressions but applied to the gens and kills of reaching definitions.  */
2613
2614 static void
2615 compute_rd ()
2616 {
2617   int bb, changed, passes;
2618
2619   for (bb = 0; bb < n_basic_blocks; bb++)
2620     sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2621
2622   passes = 0;
2623   changed = 1;
2624   while (changed)
2625     {
2626       changed = 0;
2627       for (bb = 0; bb < n_basic_blocks; bb++)
2628         {
2629           sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
2630           changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2631                                             reaching_defs[bb], rd_kill[bb]);
2632         }
2633       passes++;
2634     }
2635
2636   if (gcse_file)
2637     fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2638 }
2639 \f
2640 /* Classic GCSE available expression support.  */
2641
2642 /* Allocate memory for available expression computation.  */
2643
2644 static void
2645 alloc_avail_expr_mem (n_blocks, n_exprs)
2646      int n_blocks, n_exprs;
2647 {
2648   ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2649   sbitmap_vector_zero (ae_kill, n_basic_blocks);
2650
2651   ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2652   sbitmap_vector_zero (ae_gen, n_basic_blocks);
2653
2654   ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2655   sbitmap_vector_zero (ae_in, n_basic_blocks);
2656
2657   ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2658   sbitmap_vector_zero (ae_out, n_basic_blocks);
2659 }
2660
2661 static void
2662 free_avail_expr_mem ()
2663 {
2664   free (ae_kill);
2665   free (ae_gen);
2666   free (ae_in);
2667   free (ae_out);
2668 }
2669
2670 /* Compute the set of available expressions generated in each basic block.  */
2671
2672 static void
2673 compute_ae_gen ()
2674 {
2675   unsigned int i;
2676   struct expr *expr;
2677   struct occr *occr;
2678
2679   /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2680      This is all we have to do because an expression is not recorded if it
2681      is not available, and the only expressions we want to work with are the
2682      ones that are recorded.  */
2683   for (i = 0; i < expr_hash_table_size; i++)
2684     for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash)
2685       for (occr = expr->avail_occr; occr != 0; occr = occr->next)
2686         SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2687 }
2688
2689 /* Return non-zero if expression X is killed in BB.  */
2690
2691 static int
2692 expr_killed_p (x, bb)
2693      rtx x;
2694      int bb;
2695 {
2696   int i, j;
2697   enum rtx_code code;
2698   const char *fmt;
2699
2700   if (x == 0)
2701     return 1;
2702
2703   code = GET_CODE (x);
2704   switch (code)
2705     {
2706     case REG:
2707       return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2708
2709     case MEM:
2710       if (mem_set_in_block[bb])
2711         return 1;
2712       else
2713         return expr_killed_p (XEXP (x, 0), bb);
2714
2715     case PC:
2716     case CC0: /*FIXME*/
2717     case CONST:
2718     case CONST_INT:
2719     case CONST_DOUBLE:
2720     case SYMBOL_REF:
2721     case LABEL_REF:
2722     case ADDR_VEC:
2723     case ADDR_DIFF_VEC:
2724       return 0;
2725
2726     default:
2727       break;
2728     }
2729
2730   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2731     {
2732       if (fmt[i] == 'e')
2733         {
2734           /* If we are about to do the last recursive call
2735              needed at this level, change it into iteration.
2736              This function is called enough to be worth it.  */
2737           if (i == 0)
2738             return expr_killed_p (XEXP (x, i), bb);
2739           else if (expr_killed_p (XEXP (x, i), bb))
2740             return 1;
2741         }
2742       else if (fmt[i] == 'E')
2743         for (j = 0; j < XVECLEN (x, i); j++)
2744           if (expr_killed_p (XVECEXP (x, i, j), bb))
2745             return 1;
2746     }
2747
2748   return 0;
2749 }
2750
2751 /* Compute the set of available expressions killed in each basic block.  */
2752
2753 static void
2754 compute_ae_kill (ae_gen, ae_kill)
2755      sbitmap *ae_gen, *ae_kill;
2756 {
2757   int bb;
2758   unsigned int i;
2759   struct expr *expr;
2760
2761   for (bb = 0; bb < n_basic_blocks; bb++)
2762     for (i = 0; i < expr_hash_table_size; i++)
2763       for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash)
2764         {
2765           /* Skip EXPR if generated in this block.  */
2766           if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2767             continue;
2768
2769           if (expr_killed_p (expr->expr, bb))
2770             SET_BIT (ae_kill[bb], expr->bitmap_index);
2771         }
2772 }
2773 \f
2774 /* Actually perform the Classic GCSE optimizations.  */
2775
2776 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2777
2778    CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2779    as a positive reach.  We want to do this when there are two computations
2780    of the expression in the block.
2781
2782    VISITED is a pointer to a working buffer for tracking which BB's have
2783    been visited.  It is NULL for the top-level call.
2784
2785    We treat reaching expressions that go through blocks containing the same
2786    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2787    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2788    2 as not reaching.  The intent is to improve the probability of finding
2789    only one reaching expression and to reduce register lifetimes by picking
2790    the closest such expression.  */
2791
2792 static int
2793 expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
2794      struct occr *occr;
2795      struct expr *expr;
2796      int bb;
2797      int check_self_loop;
2798      char *visited;
2799 {
2800   edge pred;
2801
2802   for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
2803     {
2804       int pred_bb = pred->src->index;
2805
2806       if (visited[pred_bb])
2807         /* This predecessor has already been visited. Nothing to do.  */
2808           ;
2809       else if (pred_bb == bb)
2810         {
2811           /* BB loops on itself.  */
2812           if (check_self_loop
2813               && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2814               && BLOCK_NUM (occr->insn) == pred_bb)
2815             return 1;
2816
2817           visited[pred_bb] = 1;
2818         }
2819
2820       /* Ignore this predecessor if it kills the expression.  */
2821       else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2822         visited[pred_bb] = 1;
2823
2824       /* Does this predecessor generate this expression?  */
2825       else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2826         {
2827           /* Is this the occurrence we're looking for?
2828              Note that there's only one generating occurrence per block
2829              so we just need to check the block number.  */
2830           if (BLOCK_NUM (occr->insn) == pred_bb)
2831             return 1;
2832
2833           visited[pred_bb] = 1;
2834         }
2835
2836       /* Neither gen nor kill.  */
2837       else
2838         {
2839           visited[pred_bb] = 1;
2840           if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2841               visited))
2842
2843             return 1;
2844         }
2845     }
2846
2847   /* All paths have been checked.  */
2848   return 0;
2849 }
2850
2851 /* This wrapper for expr_reaches_here_p_work() is to ensure that any
2852    memory allocated for that function is returned. */
2853
2854 static int
2855 expr_reaches_here_p (occr, expr, bb, check_self_loop)
2856      struct occr *occr;
2857      struct expr *expr;
2858      int bb;
2859      int check_self_loop;
2860 {
2861   int rval;
2862   char *visited = (char *) xcalloc (n_basic_blocks, 1);
2863
2864   rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited);
2865
2866   free (visited);
2867   return rval;
2868 }
2869
2870 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2871    If there is more than one such instruction, return NULL.
2872
2873    Called only by handle_avail_expr.  */
2874
2875 static rtx
2876 computing_insn (expr, insn)
2877      struct expr *expr;
2878      rtx insn;
2879 {
2880   int bb = BLOCK_NUM (insn);
2881
2882   if (expr->avail_occr->next == NULL)
2883     {
2884       if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2885         /* The available expression is actually itself
2886            (i.e. a loop in the flow graph) so do nothing.  */
2887         return NULL;
2888
2889       /* (FIXME) Case that we found a pattern that was created by
2890          a substitution that took place.  */
2891       return expr->avail_occr->insn;
2892     }
2893   else
2894     {
2895       /* Pattern is computed more than once.
2896          Search backwards from this insn to see how many of these
2897          computations actually reach this insn.  */
2898       struct occr *occr;
2899       rtx insn_computes_expr = NULL;
2900       int can_reach = 0;
2901
2902       for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2903         {
2904           if (BLOCK_NUM (occr->insn) == bb)
2905             {
2906               /* The expression is generated in this block.
2907                  The only time we care about this is when the expression
2908                  is generated later in the block [and thus there's a loop].
2909                  We let the normal cse pass handle the other cases.  */
2910               if (INSN_CUID (insn) < INSN_CUID (occr->insn)
2911                   && expr_reaches_here_p (occr, expr, bb, 1))
2912                 {
2913                   can_reach++;
2914                   if (can_reach > 1)
2915                     return NULL;
2916
2917                   insn_computes_expr = occr->insn;
2918                 }
2919             }
2920           else if (expr_reaches_here_p (occr, expr, bb, 0))
2921             {
2922               can_reach++;
2923               if (can_reach > 1)
2924                 return NULL;
2925
2926               insn_computes_expr = occr->insn;
2927             }
2928         }
2929
2930       if (insn_computes_expr == NULL)
2931         abort ();
2932
2933       return insn_computes_expr;
2934     }
2935 }
2936
2937 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2938    Only called by can_disregard_other_sets.  */
2939
2940 static int
2941 def_reaches_here_p (insn, def_insn)
2942      rtx insn, def_insn;
2943 {
2944   rtx reg;
2945
2946   if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2947     return 1;
2948
2949   if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2950     {
2951       if (INSN_CUID (def_insn) < INSN_CUID (insn))
2952         {
2953           if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2954             return 1;
2955           else if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2956             reg = XEXP (PATTERN (def_insn), 0);
2957           else if (GET_CODE (PATTERN (def_insn)) == SET)
2958             reg = SET_DEST (PATTERN (def_insn));
2959           else
2960             abort ();
2961
2962           return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2963         }
2964       else
2965         return 0;
2966     }
2967
2968   return 0;
2969 }
2970
2971 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.  The
2972    value returned is the number of definitions that reach INSN.  Returning a
2973    value of zero means that [maybe] more than one definition reaches INSN and
2974    the caller can't perform whatever optimization it is trying.  i.e. it is
2975    always safe to return zero.  */
2976
2977 static int
2978 can_disregard_other_sets (addr_this_reg, insn, for_combine)
2979      struct reg_set **addr_this_reg;
2980      rtx insn;
2981      int for_combine;
2982 {
2983   int number_of_reaching_defs = 0;
2984   struct reg_set *this_reg;
2985
2986   for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next)
2987     if (def_reaches_here_p (insn, this_reg->insn))
2988       {
2989         number_of_reaching_defs++;
2990         /* Ignore parallels for now.  */
2991         if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
2992           return 0;
2993
2994         if (!for_combine
2995             && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
2996                 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
2997                                   SET_SRC (PATTERN (insn)))))
2998           /* A setting of the reg to a different value reaches INSN.  */
2999           return 0;
3000
3001         if (number_of_reaching_defs > 1)
3002           {
3003             /* If in this setting the value the register is being set to is
3004                equal to the previous value the register was set to and this
3005                setting reaches the insn we are trying to do the substitution
3006                on then we are ok.  */
3007             if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3008               return 0;
3009             else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3010                                     SET_SRC (PATTERN (insn))))
3011               return 0;
3012           }
3013
3014         *addr_this_reg = this_reg;
3015       }
3016
3017   return number_of_reaching_defs;
3018 }
3019
3020 /* Expression computed by insn is available and the substitution is legal,
3021    so try to perform the substitution.
3022
3023    The result is non-zero if any changes were made.  */
3024
3025 static int
3026 handle_avail_expr (insn, expr)
3027      rtx insn;
3028      struct expr *expr;
3029 {
3030   rtx pat, insn_computes_expr;
3031   rtx to;
3032   struct reg_set *this_reg;
3033   int found_setting, use_src;
3034   int changed = 0;
3035
3036   /* We only handle the case where one computation of the expression
3037      reaches this instruction.  */
3038   insn_computes_expr = computing_insn (expr, insn);
3039   if (insn_computes_expr == NULL)
3040     return 0;
3041
3042   found_setting = 0;
3043   use_src = 0;
3044
3045   /* At this point we know only one computation of EXPR outside of this
3046      block reaches this insn.  Now try to find a register that the
3047      expression is computed into.  */
3048   if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3049     {
3050       /* This is the case when the available expression that reaches
3051          here has already been handled as an available expression.  */
3052       unsigned int regnum_for_replacing
3053         = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3054
3055       /* If the register was created by GCSE we can't use `reg_set_table',
3056          however we know it's set only once.  */
3057       if (regnum_for_replacing >= max_gcse_regno
3058           /* If the register the expression is computed into is set only once,
3059              or only one set reaches this insn, we can use it.  */
3060           || (((this_reg = reg_set_table[regnum_for_replacing]),
3061                this_reg->next == NULL)
3062               || can_disregard_other_sets (&this_reg, insn, 0)))
3063        {
3064          use_src = 1;
3065          found_setting = 1;
3066        }
3067     }
3068
3069   if (!found_setting)
3070     {
3071       unsigned int regnum_for_replacing
3072         = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3073
3074       /* This shouldn't happen.  */
3075       if (regnum_for_replacing >= max_gcse_regno)
3076         abort ();
3077
3078       this_reg = reg_set_table[regnum_for_replacing];
3079
3080       /* If the register the expression is computed into is set only once,
3081          or only one set reaches this insn, use it.  */
3082       if (this_reg->next == NULL
3083           || can_disregard_other_sets (&this_reg, insn, 0))
3084         found_setting = 1;
3085     }
3086
3087   if (found_setting)
3088     {
3089       pat = PATTERN (insn);
3090       if (use_src)
3091         to = SET_SRC (PATTERN (insn_computes_expr));
3092       else
3093         to = SET_DEST (PATTERN (insn_computes_expr));
3094       changed = validate_change (insn, &SET_SRC (pat), to, 0);
3095
3096       /* We should be able to ignore the return code from validate_change but
3097          to play it safe we check.  */
3098       if (changed)
3099         {
3100           gcse_subst_count++;
3101           if (gcse_file != NULL)
3102             {
3103               fprintf (gcse_file, "GCSE: Replacing the source in insn %d with",
3104                        INSN_UID (insn));
3105               fprintf (gcse_file, " reg %d %s insn %d\n",
3106                        REGNO (to), use_src ? "from" : "set in",
3107                        INSN_UID (insn_computes_expr));
3108             }
3109         }
3110     }
3111
3112   /* The register that the expr is computed into is set more than once.  */
3113   else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3114     {
3115       /* Insert an insn after insnx that copies the reg set in insnx
3116          into a new pseudo register call this new register REGN.
3117          From insnb until end of basic block or until REGB is set
3118          replace all uses of REGB with REGN.  */
3119       rtx new_insn;
3120
3121       to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3122
3123       /* Generate the new insn.  */
3124       /* ??? If the change fails, we return 0, even though we created
3125          an insn.  I think this is ok.  */
3126       new_insn
3127         = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3128                                         SET_DEST (PATTERN
3129                                                   (insn_computes_expr))),
3130                            insn_computes_expr);
3131
3132       /* Keep block number table up to date.  */
3133       set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3134
3135       /* Keep register set table up to date.  */
3136       record_one_set (REGNO (to), new_insn);
3137
3138       gcse_create_count++;
3139       if (gcse_file != NULL)
3140         {
3141           fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d",
3142                    INSN_UID (NEXT_INSN (insn_computes_expr)),
3143                    REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))));
3144           fprintf (gcse_file, ", computed in insn %d,\n",
3145                    INSN_UID (insn_computes_expr));
3146           fprintf (gcse_file, "      into newly allocated reg %d\n",
3147                    REGNO (to));
3148         }
3149
3150       pat = PATTERN (insn);
3151
3152       /* Do register replacement for INSN.  */
3153       changed = validate_change (insn, &SET_SRC (pat),
3154                                  SET_DEST (PATTERN
3155                                            (NEXT_INSN (insn_computes_expr))),
3156                                  0);
3157
3158       /* We should be able to ignore the return code from validate_change but
3159          to play it safe we check.  */
3160       if (changed)
3161         {
3162           gcse_subst_count++;
3163           if (gcse_file != NULL)
3164             {
3165               fprintf (gcse_file,
3166                        "GCSE: Replacing the source in insn %d with reg %d ",
3167                        INSN_UID (insn),
3168                        REGNO (SET_DEST (PATTERN (NEXT_INSN
3169                                                  (insn_computes_expr)))));
3170               fprintf (gcse_file, "set in insn %d\n",
3171                        INSN_UID (insn_computes_expr));
3172             }
3173         }
3174     }
3175
3176   return changed;
3177 }
3178
3179 /* Perform classic GCSE.  This is called by one_classic_gcse_pass after all
3180    the dataflow analysis has been done.
3181
3182    The result is non-zero if a change was made.  */
3183
3184 static int
3185 classic_gcse ()
3186 {
3187   int bb, changed;
3188   rtx insn;
3189
3190   /* Note we start at block 1.  */
3191
3192   changed = 0;
3193   for (bb = 1; bb < n_basic_blocks; bb++)
3194     {
3195       /* Reset tables used to keep track of what's still valid [since the
3196          start of the block].  */
3197       reset_opr_set_tables ();
3198
3199       for (insn = BLOCK_HEAD (bb);
3200            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3201            insn = NEXT_INSN (insn))
3202         {
3203           /* Is insn of form (set (pseudo-reg) ...)?  */
3204           if (GET_CODE (insn) == INSN
3205               && GET_CODE (PATTERN (insn)) == SET
3206               && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3207               && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3208             {
3209               rtx pat = PATTERN (insn);
3210               rtx src = SET_SRC (pat);
3211               struct expr *expr;
3212
3213               if (want_to_gcse_p (src)
3214                   /* Is the expression recorded?  */
3215                   && ((expr = lookup_expr (src)) != NULL)
3216                   /* Is the expression available [at the start of the
3217                      block]?  */
3218                   && TEST_BIT (ae_in[bb], expr->bitmap_index)
3219                   /* Are the operands unchanged since the start of the
3220                      block?  */
3221                   && oprs_not_set_p (src, insn))
3222                 changed |= handle_avail_expr (insn, expr);
3223             }
3224
3225           /* Keep track of everything modified by this insn.  */
3226           /* ??? Need to be careful w.r.t. mods done to INSN.  */
3227           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3228             mark_oprs_set (insn);
3229         }
3230     }
3231
3232   return changed;
3233 }
3234
3235 /* Top level routine to perform one classic GCSE pass.
3236
3237    Return non-zero if a change was made.  */
3238
3239 static int
3240 one_classic_gcse_pass (pass)
3241      int pass;
3242 {
3243   int changed = 0;
3244
3245   gcse_subst_count = 0;
3246   gcse_create_count = 0;
3247
3248   alloc_expr_hash_table (max_cuid);
3249   alloc_rd_mem (n_basic_blocks, max_cuid);
3250   compute_expr_hash_table ();
3251   if (gcse_file)
3252     dump_hash_table (gcse_file, "Expression", expr_hash_table,
3253                      expr_hash_table_size, n_exprs);
3254
3255   if (n_exprs > 0)
3256     {
3257       compute_kill_rd ();
3258       compute_rd ();
3259       alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3260       compute_ae_gen ();
3261       compute_ae_kill (ae_gen, ae_kill);
3262       compute_available (ae_gen, ae_kill, ae_out, ae_in);
3263       changed = classic_gcse ();
3264       free_avail_expr_mem ();
3265     }
3266
3267   free_rd_mem ();
3268   free_expr_hash_table ();
3269
3270   if (gcse_file)
3271     {
3272       fprintf (gcse_file, "\n");
3273       fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,",
3274                current_function_name, pass, bytes_used, gcse_subst_count);
3275       fprintf (gcse_file, "%d insns created\n", gcse_create_count);
3276     }
3277
3278   return changed;
3279 }
3280 \f
3281 /* Compute copy/constant propagation working variables.  */
3282
3283 /* Local properties of assignments.  */
3284 static sbitmap *cprop_pavloc;
3285 static sbitmap *cprop_absaltered;
3286
3287 /* Global properties of assignments (computed from the local properties).  */
3288 static sbitmap *cprop_avin;
3289 static sbitmap *cprop_avout;
3290
3291 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
3292    basic blocks.  N_SETS is the number of sets.  */
3293
3294 static void
3295 alloc_cprop_mem (n_blocks, n_sets)
3296      int n_blocks, n_sets;
3297 {
3298   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3299   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3300
3301   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3302   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3303 }
3304
3305 /* Free vars used by copy/const propagation.  */
3306
3307 static void
3308 free_cprop_mem ()
3309 {
3310   free (cprop_pavloc);
3311   free (cprop_absaltered);
3312   free (cprop_avin);
3313   free (cprop_avout);
3314 }
3315
3316 /* For each block, compute whether X is transparent.  X is either an
3317    expression or an assignment [though we don't care which, for this context
3318    an assignment is treated as an expression].  For each block where an
3319    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
3320    bit in BMAP.  */
3321
3322 static void
3323 compute_transp (x, indx, bmap, set_p)
3324      rtx x;
3325      int indx;
3326      sbitmap *bmap;
3327      int set_p;
3328 {
3329   int bb, i, j;
3330   enum rtx_code code;
3331   reg_set *r;
3332   const char *fmt;
3333
3334   /* repeat is used to turn tail-recursion into iteration since GCC
3335      can't do it when there's no return value.  */
3336  repeat:
3337
3338   if (x == 0)
3339     return;
3340
3341   code = GET_CODE (x);
3342   switch (code)
3343     {
3344     case REG:
3345       if (set_p)
3346         {
3347           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3348             {
3349               for (bb = 0; bb < n_basic_blocks; bb++)
3350                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3351                   SET_BIT (bmap[bb], indx);
3352             }
3353           else
3354             {
3355               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3356                 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3357             }
3358         }
3359       else
3360         {
3361           if (REGNO (x) < FIRST_PSEUDO_REGISTER)
3362             {
3363               for (bb = 0; bb < n_basic_blocks; bb++)
3364                 if (TEST_BIT (reg_set_in_block[bb], REGNO (x)))
3365                   RESET_BIT (bmap[bb], indx);
3366             }
3367           else
3368             {
3369               for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next)
3370                 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx);
3371             }
3372         }
3373
3374       return;
3375
3376     case MEM:
3377       if (set_p)
3378         {
3379           for (bb = 0; bb < n_basic_blocks; bb++)
3380             if (mem_set_in_block[bb])
3381               SET_BIT (bmap[bb], indx);
3382         }
3383       else
3384         {
3385           for (bb = 0; bb < n_basic_blocks; bb++)
3386             if (mem_set_in_block[bb])
3387               RESET_BIT (bmap[bb], indx);
3388         }
3389
3390       x = XEXP (x, 0);
3391       goto repeat;
3392
3393     case PC:
3394     case CC0: /*FIXME*/
3395     case CONST:
3396     case CONST_INT:
3397     case CONST_DOUBLE:
3398     case SYMBOL_REF:
3399     case LABEL_REF:
3400     case ADDR_VEC:
3401     case ADDR_DIFF_VEC:
3402       return;
3403
3404     default:
3405       break;
3406     }
3407
3408   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3409     {
3410       if (fmt[i] == 'e')
3411         {
3412           /* If we are about to do the last recursive call
3413              needed at this level, change it into iteration.
3414              This function is called enough to be worth it.  */
3415           if (i == 0)
3416             {
3417               x = XEXP (x, i);
3418               goto repeat;
3419             }
3420
3421           compute_transp (XEXP (x, i), indx, bmap, set_p);
3422         }
3423       else if (fmt[i] == 'E')
3424         for (j = 0; j < XVECLEN (x, i); j++)
3425           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3426     }
3427 }
3428
3429 /* Top level routine to do the dataflow analysis needed by copy/const
3430    propagation.  */
3431
3432 static void
3433 compute_cprop_data ()
3434 {
3435   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
3436   compute_available (cprop_pavloc, cprop_absaltered,
3437                      cprop_avout, cprop_avin);
3438 }
3439 \f
3440 /* Copy/constant propagation.  */
3441
3442 /* Maximum number of register uses in an insn that we handle.  */
3443 #define MAX_USES 8
3444
3445 /* Table of uses found in an insn.
3446    Allocated statically to avoid alloc/free complexity and overhead.  */
3447 static struct reg_use reg_use_table[MAX_USES];
3448
3449 /* Index into `reg_use_table' while building it.  */
3450 static int reg_use_count;
3451
3452 /* Set up a list of register numbers used in INSN.  The found uses are stored
3453    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
3454    and contains the number of uses in the table upon exit.
3455
3456    ??? If a register appears multiple times we will record it multiple times.
3457    This doesn't hurt anything but it will slow things down.  */
3458
3459 static void
3460 find_used_regs (x)
3461      rtx x;
3462 {
3463   int i, j;
3464   enum rtx_code code;
3465   const char *fmt;
3466
3467   /* repeat is used to turn tail-recursion into iteration since GCC
3468      can't do it when there's no return value.  */
3469  repeat:
3470
3471   if (x == 0)
3472     return;
3473
3474   code = GET_CODE (x);
3475   switch (code)
3476     {
3477     case REG:
3478       if (reg_use_count == MAX_USES)
3479         return;
3480
3481       reg_use_table[reg_use_count].reg_rtx = x;
3482       reg_use_count++;
3483       return;
3484
3485     case MEM:
3486       x = XEXP (x, 0);
3487       goto repeat;
3488
3489     case PC:
3490     case CC0:
3491     case CONST:
3492     case CONST_INT:
3493     case CONST_DOUBLE:
3494     case SYMBOL_REF:
3495     case LABEL_REF:
3496     case CLOBBER:
3497     case ADDR_VEC:
3498     case ADDR_DIFF_VEC:
3499     case ASM_INPUT: /*FIXME*/
3500       return;
3501
3502     case SET:
3503       if (GET_CODE (SET_DEST (x)) == MEM)
3504         find_used_regs (SET_DEST (x));
3505       x = SET_SRC (x);
3506       goto repeat;
3507
3508     default:
3509       break;
3510     }
3511
3512   /* Recursively scan the operands of this expression.  */
3513
3514   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
3515     {
3516       if (fmt[i] == 'e')
3517         {
3518           /* If we are about to do the last recursive call
3519              needed at this level, change it into iteration.
3520              This function is called enough to be worth it.  */
3521           if (i == 0)
3522             {
3523               x = XEXP (x, 0);
3524               goto repeat;
3525             }
3526
3527           find_used_regs (XEXP (x, i));
3528         }
3529       else if (fmt[i] == 'E')
3530         for (j = 0; j < XVECLEN (x, i); j++)
3531           find_used_regs (XVECEXP (x, i, j));
3532     }
3533 }
3534
3535 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3536    Returns non-zero is successful.  */
3537
3538 static int
3539 try_replace_reg (from, to, insn)
3540      rtx from, to, insn;
3541 {
3542   rtx note;
3543   rtx src;
3544   int success;
3545   rtx set;
3546
3547   note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3548
3549   if (!note)
3550     note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3551
3552   /* If this fails we could try to simplify the result of the
3553      replacement and attempt to recognize the simplified insn.
3554
3555      But we need a general simplify_rtx that doesn't have pass
3556      specific state variables.  I'm not aware of one at the moment.  */
3557
3558   success = validate_replace_src (from, to, insn);
3559   set = single_set (insn);
3560
3561   /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3562      information.  */
3563   if (!success && !note)
3564     {
3565       if (!set)
3566         return 0;
3567
3568       note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3569                                                    copy_rtx (SET_SRC (set)),
3570                                                    REG_NOTES (insn));
3571     }
3572
3573   /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes.  Also
3574      try to simplify them.  */
3575   if (note)
3576     {
3577       rtx simplified;
3578
3579       src = XEXP (note, 0);
3580       replace_rtx (src, from, to);
3581
3582       /* Try to simplify resulting note. */
3583       simplified = simplify_rtx (src);
3584       if (simplified)
3585         {
3586           src = simplified;
3587           XEXP (note, 0) = src;
3588         }
3589
3590       /* REG_EQUAL may get simplified into register.
3591          We don't allow that. Remove that note. This code ought
3592          not to hapen, because previous code ought to syntetize
3593          reg-reg move, but be on the safe side.  */
3594       else if (REG_P (src))
3595         remove_note (insn, note);
3596     }
3597   return success;
3598 }
3599
3600 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
3601    NULL no such set is found.  */
3602
3603 static struct expr *
3604 find_avail_set (regno, insn)
3605      int regno;
3606      rtx insn;
3607 {
3608   /* SET1 contains the last set found that can be returned to the caller for
3609      use in a substitution.  */
3610   struct expr *set1 = 0;
3611
3612   /* Loops are not possible here.  To get a loop we would need two sets
3613      available at the start of the block containing INSN.  ie we would
3614      need two sets like this available at the start of the block:
3615
3616        (set (reg X) (reg Y))
3617        (set (reg Y) (reg X))
3618
3619      This can not happen since the set of (reg Y) would have killed the
3620      set of (reg X) making it unavailable at the start of this block.  */
3621   while (1)
3622      {
3623       rtx src;
3624       struct expr *set = lookup_set (regno, NULL_RTX);
3625
3626       /* Find a set that is available at the start of the block
3627          which contains INSN.  */
3628       while (set)
3629         {
3630           if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3631             break;
3632           set = next_set (regno, set);
3633         }
3634
3635       /* If no available set was found we've reached the end of the
3636          (possibly empty) copy chain.  */
3637       if (set == 0)
3638         break;
3639
3640       if (GET_CODE (set->expr) != SET)
3641         abort ();
3642
3643       src = SET_SRC (set->expr);
3644
3645       /* We know the set is available.
3646          Now check that SRC is ANTLOC (i.e. none of the source operands
3647          have changed since the start of the block).
3648
3649          If the source operand changed, we may still use it for the next
3650          iteration of this loop, but we may not use it for substitutions.  */
3651
3652       if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3653         set1 = set;
3654
3655       /* If the source of the set is anything except a register, then
3656          we have reached the end of the copy chain.  */
3657       if (GET_CODE (src) != REG)
3658         break;
3659
3660       /* Follow the copy chain, ie start another iteration of the loop
3661          and see if we have an available copy into SRC.  */
3662       regno = REGNO (src);
3663      }
3664
3665   /* SET1 holds the last set that was available and anticipatable at
3666      INSN.  */
3667   return set1;
3668 }
3669
3670 /* Subroutine of cprop_insn that tries to propagate constants into
3671    JUMP_INSNS.  INSN must be a conditional jump; COPY is a copy of it
3672    that we can use for substitutions.
3673    REG_USED is the use we will try to replace, SRC is the constant we
3674    will try to substitute for it.
3675    Returns nonzero if a change was made.  */
3676
3677 static int
3678 cprop_jump (insn, copy, reg_used, src)
3679      rtx insn, copy;
3680      struct reg_use *reg_used;
3681      rtx src;
3682 {
3683   rtx set = PATTERN (copy);
3684   rtx temp;
3685
3686   /* Replace the register with the appropriate constant.  */
3687   replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3688
3689   temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3690                                      GET_MODE (SET_SRC (set)),
3691                                      GET_MODE (XEXP (SET_SRC (set), 0)),
3692                                      XEXP (SET_SRC (set), 0),
3693                                      XEXP (SET_SRC (set), 1),
3694                                      XEXP (SET_SRC (set), 2));
3695
3696   /* If no simplification can be made, then try the next
3697      register.  */
3698   if (temp == 0)
3699     return 0;
3700
3701   SET_SRC (set) = temp;
3702
3703   /* That may have changed the structure of TEMP, so
3704      force it to be rerecognized if it has not turned
3705      into a nop or unconditional jump.  */
3706
3707   INSN_CODE (copy) = -1;
3708   if ((SET_DEST (set) == pc_rtx
3709        && (SET_SRC (set) == pc_rtx
3710            || GET_CODE (SET_SRC (set)) == LABEL_REF))
3711       || recog (PATTERN (copy), copy, NULL) >= 0)
3712     {
3713       /* This has either become an unconditional jump
3714          or a nop-jump.  We'd like to delete nop jumps
3715          here, but doing so confuses gcse.  So we just
3716          make the replacement and let later passes
3717          sort things out.  */
3718       PATTERN (insn) = set;
3719       INSN_CODE (insn) = -1;
3720
3721       /* One less use of the label this insn used to jump to
3722          if we turned this into a NOP jump.  */
3723       if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3724         --LABEL_NUSES (JUMP_LABEL (insn));
3725
3726       /* If this has turned into an unconditional jump,
3727          then put a barrier after it so that the unreachable
3728          code will be deleted.  */
3729       if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3730         emit_barrier_after (insn);
3731
3732       run_jump_opt_after_gcse = 1;
3733
3734       const_prop_count++;
3735       if (gcse_file != NULL)
3736         {
3737           fprintf (gcse_file,
3738                    "CONST-PROP: Replacing reg %d in insn %d with constant ",
3739                    REGNO (reg_used->reg_rtx), INSN_UID (insn));
3740           print_rtl (gcse_file, src);
3741           fprintf (gcse_file, "\n");
3742         }
3743
3744       return 1;
3745     }
3746   return 0;
3747 }
3748
3749 #ifdef HAVE_cc0
3750
3751 /* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS
3752    for machines that have CC0.  INSN is a single set that stores into CC0;
3753    the insn following it is a conditional jump.  REG_USED is the use we will
3754    try to replace, SRC is the constant we will try to substitute for it.
3755    Returns nonzero if a change was made.  */
3756
3757 static int
3758 cprop_cc0_jump (insn, reg_used, src)
3759      rtx insn;
3760      struct reg_use *reg_used;
3761      rtx src;
3762 {
3763   rtx jump = NEXT_INSN (insn);
3764   rtx copy = copy_rtx (jump);
3765   rtx set = PATTERN (copy);
3766
3767   /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3768      substitute into it.  */
3769   replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3770   if (! cprop_jump (jump, copy, reg_used, src))
3771     return 0;
3772
3773   /* If we succeeded, delete the cc0 setter.  */
3774   PUT_CODE (insn, NOTE);
3775   NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3776   NOTE_SOURCE_FILE (insn) = 0;
3777   return 1;
3778  }
3779 #endif
3780
3781 /* Perform constant and copy propagation on INSN.
3782    The result is non-zero if a change was made.  */
3783
3784 static int
3785 cprop_insn (insn, alter_jumps)
3786      rtx insn;
3787      int alter_jumps;
3788 {
3789   struct reg_use *reg_used;
3790   int changed = 0;
3791   rtx note;
3792
3793   /* Only propagate into SETs.  Note that a conditional jump is a
3794      SET with pc_rtx as the destination.  */
3795   if ((GET_CODE (insn) != INSN
3796        && GET_CODE (insn) != JUMP_INSN)
3797       || GET_CODE (PATTERN (insn)) != SET)
3798     return 0;
3799
3800   reg_use_count = 0;
3801   find_used_regs (PATTERN (insn));
3802
3803   note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3804   if (!note)
3805     note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3806
3807   /* We may win even when propagating constants into notes. */
3808   if (note)
3809     find_used_regs (XEXP (note, 0));
3810
3811   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
3812        reg_used++, reg_use_count--)
3813     {
3814       unsigned int regno = REGNO (reg_used->reg_rtx);
3815       rtx pat, src;
3816       struct expr *set;
3817
3818       /* Ignore registers created by GCSE.
3819          We do this because ... */
3820       if (regno >= max_gcse_regno)
3821         continue;
3822
3823       /* If the register has already been set in this block, there's
3824          nothing we can do.  */
3825       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3826         continue;
3827
3828       /* Find an assignment that sets reg_used and is available
3829          at the start of the block.  */
3830       set = find_avail_set (regno, insn);
3831       if (! set)
3832         continue;
3833
3834       pat = set->expr;
3835       /* ??? We might be able to handle PARALLELs.  Later.  */
3836       if (GET_CODE (pat) != SET)
3837         abort ();
3838
3839       src = SET_SRC (pat);
3840
3841       /* Constant propagation.  */
3842       if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
3843           || GET_CODE (src) == SYMBOL_REF)
3844         {
3845           /* Handle normal insns first.  */
3846           if (GET_CODE (insn) == INSN
3847               && try_replace_reg (reg_used->reg_rtx, src, insn))
3848             {
3849               changed = 1;
3850               const_prop_count++;
3851               if (gcse_file != NULL)
3852                 {
3853                   fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ",
3854                            regno);
3855                   fprintf (gcse_file, "insn %d with constant ",
3856                            INSN_UID (insn));
3857                   print_rtl (gcse_file, src);
3858                   fprintf (gcse_file, "\n");
3859                 }
3860
3861               /* The original insn setting reg_used may or may not now be
3862                  deletable.  We leave the deletion to flow.  */
3863             }
3864
3865           /* Try to propagate a CONST_INT into a conditional jump.
3866              We're pretty specific about what we will handle in this
3867              code, we can extend this as necessary over time.
3868
3869              Right now the insn in question must look like
3870              (set (pc) (if_then_else ...))  */
3871           else if (alter_jumps
3872                    && GET_CODE (insn) == JUMP_INSN
3873                    && condjump_p (insn)
3874                    && ! simplejump_p (insn))
3875             changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
3876 #ifdef HAVE_cc0
3877           /* Similar code for machines that use a pair of CC0 setter and
3878              conditional jump insn.  */
3879           else if (alter_jumps
3880                    && GET_CODE (PATTERN (insn)) == SET
3881                    && SET_DEST (PATTERN (insn)) == cc0_rtx
3882                    && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
3883                    && condjump_p (NEXT_INSN (insn))
3884                    && ! simplejump_p (NEXT_INSN (insn)))
3885             changed |= cprop_cc0_jump (insn, reg_used, src);
3886 #endif
3887         }
3888       else if (GET_CODE (src) == REG
3889                && REGNO (src) >= FIRST_PSEUDO_REGISTER
3890                && REGNO (src) != regno)
3891         {
3892           if (try_replace_reg (reg_used->reg_rtx, src, insn))
3893             {
3894               changed = 1;
3895               copy_prop_count++;
3896               if (gcse_file != NULL)
3897                 {
3898                   fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d",
3899                            regno, INSN_UID (insn));
3900                   fprintf (gcse_file, " with reg %d\n", REGNO (src));
3901                 }
3902
3903               /* The original insn setting reg_used may or may not now be
3904                  deletable.  We leave the deletion to flow.  */
3905               /* FIXME: If it turns out that the insn isn't deletable,
3906                  then we may have unnecessarily extended register lifetimes
3907                  and made things worse.  */
3908             }
3909         }
3910     }
3911
3912   return changed;
3913 }
3914
3915 /* Forward propagate copies.  This includes copies and constants.  Return
3916    non-zero if a change was made.  */
3917
3918 static int
3919 cprop (alter_jumps)
3920      int alter_jumps;
3921 {
3922   int bb, changed;
3923   rtx insn;
3924
3925   /* Note we start at block 1.  */
3926
3927   changed = 0;
3928   for (bb = 1; bb < n_basic_blocks; bb++)
3929     {
3930       /* Reset tables used to keep track of what's still valid [since the
3931          start of the block].  */
3932       reset_opr_set_tables ();
3933
3934       for (insn = BLOCK_HEAD (bb);
3935            insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3936            insn = NEXT_INSN (insn))
3937         {
3938           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3939             {
3940               changed |= cprop_insn (insn, alter_jumps);
3941
3942               /* Keep track of everything modified by this insn.  */
3943               /* ??? Need to be careful w.r.t. mods done to INSN.  Don't
3944                  call mark_oprs_set if we turned the insn into a NOTE.  */
3945               if (GET_CODE (insn) != NOTE)
3946                 mark_oprs_set (insn);
3947             }
3948         }
3949     }
3950
3951   if (gcse_file != NULL)
3952     fprintf (gcse_file, "\n");
3953
3954   return changed;
3955 }
3956
3957 /* Perform one copy/constant propagation pass.
3958    F is the first insn in the function.
3959    PASS is the pass count.  */
3960
3961 static int
3962 one_cprop_pass (pass, alter_jumps)
3963      int pass;
3964      int alter_jumps;
3965 {
3966   int changed = 0;
3967
3968   const_prop_count = 0;
3969   copy_prop_count = 0;
3970
3971   alloc_set_hash_table (max_cuid);
3972   compute_set_hash_table ();
3973   if (gcse_file)
3974     dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3975                      n_sets);
3976   if (n_sets > 0)
3977     {
3978       alloc_cprop_mem (n_basic_blocks, n_sets);
3979       compute_cprop_data ();
3980       changed = cprop (alter_jumps);
3981       free_cprop_mem ();
3982     }
3983
3984   free_set_hash_table ();
3985
3986   if (gcse_file)
3987     {
3988       fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ",
3989                current_function_name, pass, bytes_used);
3990       fprintf (gcse_file, "%d const props, %d copy props\n\n",
3991                const_prop_count, copy_prop_count);
3992     }
3993
3994   return changed;
3995 }
3996 \f
3997 /* Compute PRE+LCM working variables.  */
3998
3999 /* Local properties of expressions.  */
4000 /* Nonzero for expressions that are transparent in the block.  */
4001 static sbitmap *transp;
4002
4003 /* Nonzero for expressions that are transparent at the end of the block.
4004    This is only zero for expressions killed by abnormal critical edge
4005    created by a calls.  */
4006 static sbitmap *transpout;
4007
4008 /* Nonzero for expressions that are computed (available) in the block.  */
4009 static sbitmap *comp;
4010
4011 /* Nonzero for expressions that are locally anticipatable in the block.  */
4012 static sbitmap *antloc;
4013
4014 /* Nonzero for expressions where this block is an optimal computation
4015    point.  */
4016 static sbitmap *pre_optimal;
4017
4018 /* Nonzero for expressions which are redundant in a particular block.  */
4019 static sbitmap *pre_redundant;
4020
4021 /* Nonzero for expressions which should be inserted on a specific edge.  */
4022 static sbitmap *pre_insert_map;
4023
4024 /* Nonzero for expressions which should be deleted in a specific block.  */
4025 static sbitmap *pre_delete_map;
4026
4027 /* Contains the edge_list returned by pre_edge_lcm.  */
4028 static struct edge_list *edge_list;
4029
4030 /* Redundant insns.  */
4031 static sbitmap pre_redundant_insns;
4032
4033 /* Allocate vars used for PRE analysis.  */
4034
4035 static void
4036 alloc_pre_mem (n_blocks, n_exprs)
4037      int n_blocks, n_exprs;
4038 {
4039   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4040   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4041   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4042
4043   pre_optimal = NULL;
4044   pre_redundant = NULL;
4045   pre_insert_map = NULL;
4046   pre_delete_map = NULL;
4047   ae_in = NULL;
4048   ae_out = NULL;
4049   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4050
4051   /* pre_insert and pre_delete are allocated later.  */
4052 }
4053
4054 /* Free vars used for PRE analysis.  */
4055
4056 static void
4057 free_pre_mem ()
4058 {
4059   free (transp);
4060   free (comp);
4061
4062   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
4063
4064   if (pre_optimal)
4065     free (pre_optimal);
4066   if (pre_redundant)
4067     free (pre_redundant);
4068   if (pre_insert_map)
4069     free (pre_insert_map);
4070   if (pre_delete_map)
4071     free (pre_delete_map);
4072
4073   if (ae_in)
4074     free (ae_in);
4075   if (ae_out)
4076     free (ae_out);
4077
4078   transp = comp = NULL;
4079   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4080   ae_in = ae_out = NULL;
4081 }
4082
4083 /* Top level routine to do the dataflow analysis needed by PRE.  */
4084
4085 static void
4086 compute_pre_data ()
4087 {
4088   int i;
4089
4090   compute_local_properties (transp, comp, antloc, 0);
4091   sbitmap_vector_zero (ae_kill, n_basic_blocks);
4092
4093   /* Compute ae_kill for each basic block using:
4094
4095      ~(TRANSP | COMP)
4096
4097      This is significantly faster than compute_ae_kill.  */
4098
4099   for (i = 0; i < n_basic_blocks; i++)
4100     {
4101       sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]);
4102       sbitmap_not (ae_kill[i], ae_kill[i]);
4103     }
4104
4105   edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4106                             ae_kill, &pre_insert_map, &pre_delete_map);
4107   free (antloc);
4108   antloc = NULL;
4109   free (ae_kill);
4110   ae_kill = NULL;
4111 }
4112 \f
4113 /* PRE utilities */
4114
4115 /* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4116    block BB.
4117
4118    VISITED is a pointer to a working buffer for tracking which BB's have
4119    been visited.  It is NULL for the top-level call.
4120
4121    We treat reaching expressions that go through blocks containing the same
4122    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
4123    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4124    2 as not reaching.  The intent is to improve the probability of finding
4125    only one reaching expression and to reduce register lifetimes by picking
4126    the closest such expression.  */
4127
4128 static int
4129 pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
4130      int occr_bb;
4131      struct expr *expr;
4132      int bb;
4133      char *visited;
4134 {
4135   edge pred;
4136
4137   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
4138     {
4139       int pred_bb = pred->src->index;
4140
4141       if (pred->src == ENTRY_BLOCK_PTR
4142           /* Has predecessor has already been visited?  */
4143           || visited[pred_bb])
4144         ;/* Nothing to do.  */
4145
4146       /* Does this predecessor generate this expression?  */
4147       else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
4148         {
4149           /* Is this the occurrence we're looking for?
4150              Note that there's only one generating occurrence per block
4151              so we just need to check the block number.  */
4152           if (occr_bb == pred_bb)
4153             return 1;
4154
4155           visited[pred_bb] = 1;
4156         }
4157       /* Ignore this predecessor if it kills the expression.  */
4158       else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
4159         visited[pred_bb] = 1;
4160
4161       /* Neither gen nor kill.  */
4162       else
4163         {
4164           visited[pred_bb] = 1;
4165           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
4166             return 1;
4167         }
4168     }
4169
4170   /* All paths have been checked.  */
4171   return 0;
4172 }
4173
4174 /* The wrapper for pre_expr_reaches_here_work that ensures that any
4175    memory allocated for that function is returned. */
4176
4177 static int
4178 pre_expr_reaches_here_p (occr_bb, expr, bb)
4179      int occr_bb;
4180      struct expr *expr;
4181      int bb;
4182 {
4183   int rval;
4184   char *visited = (char *) xcalloc (n_basic_blocks, 1);
4185
4186   rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
4187
4188   free (visited);
4189   return rval;
4190 }
4191 \f
4192
4193 /* Given an expr, generate RTL which we can insert at the end of a BB,
4194    or on an edge.  Set the block number of any insns generated to
4195    the value of BB.  */
4196
4197 static rtx
4198 process_insert_insn (expr)
4199      struct expr *expr;
4200 {
4201   rtx reg = expr->reaching_reg;
4202   rtx pat, copied_expr;
4203   rtx first_new_insn;
4204
4205   start_sequence ();
4206   copied_expr = copy_rtx (expr->expr);
4207   emit_move_insn (reg, copied_expr);
4208   first_new_insn = get_insns ();
4209   pat = gen_sequence ();
4210   end_sequence ();
4211
4212   return pat;
4213 }
4214
4215 /* Add EXPR to the end of basic block BB.
4216
4217    This is used by both the PRE and code hoisting.
4218
4219    For PRE, we want to verify that the expr is either transparent
4220    or locally anticipatable in the target block.  This check makes
4221    no sense for code hoisting.  */
4222
4223 static void
4224 insert_insn_end_bb (expr, bb, pre)
4225      struct expr *expr;
4226      int bb;
4227      int pre;
4228 {
4229   rtx insn = BLOCK_END (bb);
4230   rtx new_insn;
4231   rtx reg = expr->reaching_reg;
4232   int regno = REGNO (reg);
4233   rtx pat;
4234   int i;
4235
4236   pat = process_insert_insn (expr);
4237
4238   /* If the last insn is a jump, insert EXPR in front [taking care to
4239      handle cc0, etc. properly].  */
4240
4241   if (GET_CODE (insn) == JUMP_INSN)
4242     {
4243 #ifdef HAVE_cc0
4244       rtx note;
4245 #endif
4246
4247       /* If this is a jump table, then we can't insert stuff here.  Since
4248          we know the previous real insn must be the tablejump, we insert
4249          the new instruction just before the tablejump.  */
4250       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4251           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4252         insn = prev_real_insn (insn);
4253
4254 #ifdef HAVE_cc0
4255       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4256          if cc0 isn't set.  */
4257       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4258       if (note)
4259         insn = XEXP (note, 0);
4260       else
4261         {
4262           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4263           if (maybe_cc0_setter
4264               && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4265               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4266             insn = maybe_cc0_setter;
4267         }
4268 #endif
4269       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
4270       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4271     }
4272
4273   /* Likewise if the last insn is a call, as will happen in the presence
4274      of exception handling.  */
4275   else if (GET_CODE (insn) == CALL_INSN)
4276     {
4277       HARD_REG_SET parm_regs;
4278       int nparm_regs;
4279       rtx p;
4280
4281       /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4282          we search backward and place the instructions before the first
4283          parameter is loaded.  Do this for everyone for consistency and a
4284          presumtion that we'll get better code elsewhere as well.
4285
4286          It should always be the case that we can put these instructions
4287          anywhere in the basic block with performing PRE optimizations.
4288          Check this.  */
4289
4290       if (pre
4291           && !TEST_BIT (antloc[bb], expr->bitmap_index)
4292           && !TEST_BIT (transp[bb], expr->bitmap_index))
4293         abort ();
4294
4295       /* Since different machines initialize their parameter registers
4296          in different orders, assume nothing.  Collect the set of all
4297          parameter registers.  */
4298       CLEAR_HARD_REG_SET (parm_regs);
4299       nparm_regs = 0;
4300       for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4301         if (GET_CODE (XEXP (p, 0)) == USE
4302             && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4303           {
4304             if (REGNO (XEXP (XEXP (p, 0), 0)) >= FIRST_PSEUDO_REGISTER)
4305               abort ();
4306
4307             SET_HARD_REG_BIT (parm_regs, REGNO (XEXP (XEXP (p, 0), 0)));
4308             nparm_regs++;
4309           }
4310
4311       /* Search backward for the first set of a register in this set.  */
4312       while (nparm_regs && BLOCK_HEAD (bb) != insn)
4313         {
4314           insn = PREV_INSN (insn);
4315           p = single_set (insn);
4316           if (p && GET_CODE (SET_DEST (p)) == REG
4317               && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4318               && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4319             {
4320               CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4321               nparm_regs--;
4322             }
4323         }
4324
4325       /* If we found all the parameter loads, then we want to insert
4326          before the first parameter load.
4327
4328          If we did not find all the parameter loads, then we might have
4329          stopped on the head of the block, which could be a CODE_LABEL.
4330          If we inserted before the CODE_LABEL, then we would be putting
4331          the insn in the wrong basic block.  In that case, put the insn
4332          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
4333       while (GET_CODE (insn) == CODE_LABEL
4334              || NOTE_INSN_BASIC_BLOCK_P (insn))
4335         insn = NEXT_INSN (insn);
4336
4337       new_insn = emit_block_insn_before (pat, insn, BASIC_BLOCK (bb));
4338     }
4339   else
4340     {
4341       new_insn = emit_insn_after (pat, insn);
4342       BLOCK_END (bb) = new_insn;
4343     }
4344
4345   /* Keep block number table up to date.
4346      Note, PAT could be a multiple insn sequence, we have to make
4347      sure that each insn in the sequence is handled.  */
4348   if (GET_CODE (pat) == SEQUENCE)
4349     {
4350       for (i = 0; i < XVECLEN (pat, 0); i++)
4351         {
4352           rtx insn = XVECEXP (pat, 0, i);
4353
4354           set_block_num (insn, bb);
4355           if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4356             add_label_notes (PATTERN (insn), new_insn);
4357
4358           note_stores (PATTERN (insn), record_set_info, insn);
4359         }
4360     }
4361   else
4362     {
4363       add_label_notes (SET_SRC (pat), new_insn);
4364       set_block_num (new_insn, bb);
4365
4366       /* Keep register set table up to date.  */
4367       record_one_set (regno, new_insn);
4368     }
4369
4370   gcse_create_count++;
4371
4372   if (gcse_file)
4373     {
4374       fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ",
4375                bb, INSN_UID (new_insn));
4376       fprintf (gcse_file, "copying expression %d to reg %d\n",
4377                expr->bitmap_index, regno);
4378     }
4379 }
4380
4381 /* Insert partially redundant expressions on edges in the CFG to make
4382    the expressions fully redundant.  */
4383
4384 static int
4385 pre_edge_insert (edge_list, index_map)
4386      struct edge_list *edge_list;
4387      struct expr **index_map;
4388 {
4389   int e, i, j, num_edges, set_size, did_insert = 0;
4390   sbitmap *inserted;
4391
4392   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4393      if it reaches any of the deleted expressions.  */
4394
4395   set_size = pre_insert_map[0]->size;
4396   num_edges = NUM_EDGES (edge_list);
4397   inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4398   sbitmap_vector_zero (inserted, num_edges);
4399
4400   for (e = 0; e < num_edges; e++)
4401     {
4402       int indx;
4403       basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4404       int bb = pred->index;
4405
4406       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
4407         {
4408           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
4409
4410           for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
4411             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4412               {
4413                 struct expr *expr = index_map[j];
4414                 struct occr *occr;
4415
4416                 /* Now look at each deleted occurence of this expression.  */
4417                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4418                   {
4419                     if (! occr->deleted_p)
4420                       continue;
4421
4422                     /* Insert this expression on this edge if if it would
4423                        reach the deleted occurence in BB.  */
4424                     if (!TEST_BIT (inserted[e], j))
4425                       {
4426                         rtx insn;
4427                         edge eg = INDEX_EDGE (edge_list, e);
4428
4429                         /* We can't insert anything on an abnormal and
4430                            critical edge, so we insert the insn at the end of
4431                            the previous block. There are several alternatives
4432                            detailed in Morgans book P277 (sec 10.5) for
4433                            handling this situation.  This one is easiest for
4434                            now.  */
4435
4436                         if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4437                           insert_insn_end_bb (index_map[j], bb, 0);
4438                         else
4439                           {
4440                             insn = process_insert_insn (index_map[j]);
4441                             insert_insn_on_edge (insn, eg);
4442                           }
4443
4444                         if (gcse_file)
4445                           {
4446                             fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ",
4447                                      bb,
4448                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
4449                             fprintf (gcse_file, "copy expression %d\n",
4450                                      expr->bitmap_index);
4451                           }
4452
4453                         SET_BIT (inserted[e], j);
4454                         did_insert = 1;
4455                         gcse_create_count++;
4456                       }
4457                   }
4458               }
4459         }
4460     }
4461
4462   free (inserted);
4463   return did_insert;
4464 }
4465
4466 /* Copy the result of INSN to REG.  INDX is the expression number.  */
4467
4468 static void
4469 pre_insert_copy_insn (expr, insn)
4470      struct expr *expr;
4471      rtx insn;
4472 {
4473   rtx reg = expr->reaching_reg;
4474   int regno = REGNO (reg);
4475   int indx = expr->bitmap_index;
4476   rtx set = single_set (insn);
4477   rtx new_insn;
4478   int bb = BLOCK_NUM (insn);
4479
4480   if (!set)
4481     abort ();
4482
4483   new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4484                               insn);
4485
4486   /* Keep block number table up to date.  */
4487   set_block_num (new_insn, bb);
4488
4489   /* Keep register set table up to date.  */
4490   record_one_set (regno, new_insn);
4491   if (insn == BLOCK_END (bb))
4492     BLOCK_END (bb) = new_insn;
4493
4494   gcse_create_count++;
4495
4496   if (gcse_file)
4497     fprintf (gcse_file,
4498              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4499               BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4500               INSN_UID (insn), regno);
4501 }
4502
4503 /* Copy available expressions that reach the redundant expression
4504    to `reaching_reg'.  */
4505
4506 static void
4507 pre_insert_copies ()
4508 {
4509   unsigned int i;
4510   struct expr *expr;
4511   struct occr *occr;
4512   struct occr *avail;
4513
4514   /* For each available expression in the table, copy the result to
4515      `reaching_reg' if the expression reaches a deleted one.
4516
4517      ??? The current algorithm is rather brute force.
4518      Need to do some profiling.  */
4519
4520   for (i = 0; i < expr_hash_table_size; i++)
4521     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4522       {
4523         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
4524            we don't want to insert a copy here because the expression may not
4525            really be redundant.  So only insert an insn if the expression was
4526            deleted.  This test also avoids further processing if the
4527            expression wasn't deleted anywhere.  */
4528         if (expr->reaching_reg == NULL)
4529           continue;
4530
4531         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4532           {
4533             if (! occr->deleted_p)
4534               continue;
4535
4536             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4537               {
4538                 rtx insn = avail->insn;
4539
4540                 /* No need to handle this one if handled already.  */
4541                 if (avail->copied_p)
4542                   continue;
4543
4544                 /* Don't handle this one if it's a redundant one.  */
4545                 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
4546                   continue;
4547
4548                 /* Or if the expression doesn't reach the deleted one.  */
4549                 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
4550                                                BLOCK_NUM (occr->insn)))
4551                   continue;
4552
4553                 /* Copy the result of avail to reaching_reg.  */
4554                 pre_insert_copy_insn (expr, insn);
4555                 avail->copied_p = 1;
4556               }
4557           }
4558       }
4559 }
4560
4561 /* Delete redundant computations.
4562    Deletion is done by changing the insn to copy the `reaching_reg' of
4563    the expression into the result of the SET.  It is left to later passes
4564    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4565
4566    Returns non-zero if a change is made.  */
4567
4568 static int
4569 pre_delete ()
4570 {
4571   unsigned int i;
4572   int changed;
4573   struct expr *expr;
4574   struct occr *occr;
4575
4576   changed = 0;
4577   for (i = 0; i < expr_hash_table_size; i++)
4578     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4579       {
4580         int indx = expr->bitmap_index;
4581
4582         /* We only need to search antic_occr since we require
4583            ANTLOC != 0.  */
4584
4585         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4586           {
4587             rtx insn = occr->insn;
4588             rtx set;
4589             int bb = BLOCK_NUM (insn);
4590
4591             if (TEST_BIT (pre_delete_map[bb], indx))
4592               {
4593                 set = single_set (insn);
4594                 if (! set)
4595                   abort ();
4596
4597                 /* Create a pseudo-reg to store the result of reaching
4598                    expressions into.  Get the mode for the new pseudo from
4599                    the mode of the original destination pseudo.  */
4600                 if (expr->reaching_reg == NULL)
4601                   expr->reaching_reg
4602                     = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4603
4604                 /* In theory this should never fail since we're creating
4605                    a reg->reg copy.
4606
4607                    However, on the x86 some of the movXX patterns actually
4608                    contain clobbers of scratch regs.  This may cause the
4609                    insn created by validate_change to not match any pattern
4610                    and thus cause validate_change to fail.   */
4611                 if (validate_change (insn, &SET_SRC (set),
4612                                      expr->reaching_reg, 0))
4613                   {
4614                     occr->deleted_p = 1;
4615                     SET_BIT (pre_redundant_insns, INSN_CUID (insn));
4616                     changed = 1;
4617                     gcse_subst_count++;
4618                   }
4619
4620                 if (gcse_file)
4621                   {
4622                     fprintf (gcse_file,
4623                              "PRE: redundant insn %d (expression %d) in ",
4624                                INSN_UID (insn), indx);
4625                     fprintf (gcse_file, "bb %d, reaching reg is %d\n",
4626                              bb, REGNO (expr->reaching_reg));
4627                   }
4628               }
4629           }
4630       }
4631
4632   return changed;
4633 }
4634
4635 /* Perform GCSE optimizations using PRE.
4636    This is called by one_pre_gcse_pass after all the dataflow analysis
4637    has been done.
4638
4639    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
4640    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
4641    Compiler Design and Implementation.
4642
4643    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4644    thing about the classical approach is that it would try to use an existing
4645    reg.  If the register can't be adequately optimized [i.e. we introduce
4646    reload problems], one could add a pass here to propagate the new register
4647    through the block.
4648
4649    ??? We don't handle single sets in PARALLELs because we're [currently] not
4650    able to copy the rest of the parallel when we insert copies to create full
4651    redundancies from partial redundancies.  However, there's no reason why we
4652    can't handle PARALLELs in the cases where there are no partial
4653    redundancies.  */
4654
4655 static int
4656 pre_gcse ()
4657 {
4658   unsigned int i;
4659   int did_insert, changed;
4660   struct expr **index_map;
4661   struct expr *expr;
4662
4663   /* Compute a mapping from expression number (`bitmap_index') to
4664      hash table entry.  */
4665
4666   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
4667   for (i = 0; i < expr_hash_table_size; i++)
4668     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4669       index_map[expr->bitmap_index] = expr;
4670
4671   /* Reset bitmap used to track which insns are redundant.  */
4672   pre_redundant_insns = sbitmap_alloc (max_cuid);
4673   sbitmap_zero (pre_redundant_insns);
4674
4675   /* Delete the redundant insns first so that
4676      - we know what register to use for the new insns and for the other
4677        ones with reaching expressions
4678      - we know which insns are redundant when we go to create copies  */
4679
4680   changed = pre_delete ();
4681
4682   did_insert = pre_edge_insert (edge_list, index_map);
4683
4684   /* In other places with reaching expressions, copy the expression to the
4685      specially allocated pseudo-reg that reaches the redundant expr.  */
4686   pre_insert_copies ();
4687   if (did_insert)
4688     {
4689       commit_edge_insertions ();
4690       changed = 1;
4691     }
4692
4693   free (index_map);
4694   free (pre_redundant_insns);
4695   return changed;
4696 }
4697
4698 /* Top level routine to perform one PRE GCSE pass.
4699
4700    Return non-zero if a change was made.  */
4701
4702 static int
4703 one_pre_gcse_pass (pass)
4704      int pass;
4705 {
4706   int changed = 0;
4707
4708   gcse_subst_count = 0;
4709   gcse_create_count = 0;
4710
4711   alloc_expr_hash_table (max_cuid);
4712   add_noreturn_fake_exit_edges ();
4713   compute_expr_hash_table ();
4714   if (gcse_file)
4715     dump_hash_table (gcse_file, "Expression", expr_hash_table,
4716                      expr_hash_table_size, n_exprs);
4717
4718   if (n_exprs > 0)
4719     {
4720       alloc_pre_mem (n_basic_blocks, n_exprs);
4721       compute_pre_data ();
4722       changed |= pre_gcse ();
4723       free_edge_list (edge_list);
4724       free_pre_mem ();
4725     }
4726
4727   remove_fake_edges ();
4728   free_expr_hash_table ();
4729
4730   if (gcse_file)
4731     {
4732       fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ",
4733                current_function_name, pass, bytes_used);
4734       fprintf (gcse_file, "%d substs, %d insns created\n",
4735                gcse_subst_count, gcse_create_count);
4736     }
4737
4738   return changed;
4739 }
4740 \f
4741 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4742    We have to add REG_LABEL notes, because the following loop optimization
4743    pass requires them.  */
4744
4745 /* ??? This is very similar to the loop.c add_label_notes function.  We
4746    could probably share code here.  */
4747
4748 /* ??? If there was a jump optimization pass after gcse and before loop,
4749    then we would not need to do this here, because jump would add the
4750    necessary REG_LABEL notes.  */
4751
4752 static void
4753 add_label_notes (x, insn)
4754      rtx x;
4755      rtx insn;
4756 {
4757   enum rtx_code code = GET_CODE (x);
4758   int i, j;
4759   const char *fmt;
4760
4761   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4762     {
4763       /* This code used to ignore labels that referred to dispatch tables to
4764          avoid flow generating (slighly) worse code.
4765
4766          We no longer ignore such label references (see LABEL_REF handling in
4767          mark_jump_label for additional information).  */
4768
4769       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4770                                             REG_NOTES (insn));
4771       return;
4772     }
4773
4774   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4775     {
4776       if (fmt[i] == 'e')
4777         add_label_notes (XEXP (x, i), insn);
4778       else if (fmt[i] == 'E')
4779         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4780           add_label_notes (XVECEXP (x, i, j), insn);
4781     }
4782 }
4783
4784 /* Compute transparent outgoing information for each block.
4785
4786    An expression is transparent to an edge unless it is killed by
4787    the edge itself.  This can only happen with abnormal control flow,
4788    when the edge is traversed through a call.  This happens with
4789    non-local labels and exceptions.
4790
4791    This would not be necessary if we split the edge.  While this is
4792    normally impossible for abnormal critical edges, with some effort
4793    it should be possible with exception handling, since we still have
4794    control over which handler should be invoked.  But due to increased
4795    EH table sizes, this may not be worthwhile.  */
4796
4797 static void
4798 compute_transpout ()
4799 {
4800   int bb;
4801   unsigned int i;
4802   struct expr *expr;
4803
4804   sbitmap_vector_ones (transpout, n_basic_blocks);
4805
4806   for (bb = 0; bb < n_basic_blocks; ++bb)
4807     {
4808       /* Note that flow inserted a nop a the end of basic blocks that
4809          end in call instructions for reasons other than abnormal
4810          control flow.  */
4811       if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4812         continue;
4813
4814       for (i = 0; i < expr_hash_table_size; i++)
4815         for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4816           if (GET_CODE (expr->expr) == MEM)
4817             {
4818               if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
4819                   && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
4820                 continue;
4821
4822               /* ??? Optimally, we would use interprocedural alias
4823                  analysis to determine if this mem is actually killed
4824                  by this call.  */
4825               RESET_BIT (transpout[bb], expr->bitmap_index);
4826             }
4827     }
4828 }
4829
4830 /* Removal of useless null pointer checks */
4831
4832 /* Called via note_stores.  X is set by SETTER.  If X is a register we must
4833    invalidate nonnull_local and set nonnull_killed.  DATA is really a
4834    `null_pointer_info *'.
4835
4836    We ignore hard registers.  */
4837
4838 static void
4839 invalidate_nonnull_info (x, setter, data)
4840      rtx x;
4841      rtx setter ATTRIBUTE_UNUSED;
4842      void *data;
4843 {
4844   unsigned int regno;
4845   struct null_pointer_info *npi = (struct null_pointer_info *) data;
4846
4847   while (GET_CODE (x) == SUBREG)
4848     x = SUBREG_REG (x);
4849
4850   /* Ignore anything that is not a register or is a hard register.  */
4851   if (GET_CODE (x) != REG
4852       || REGNO (x) < npi->min_reg
4853       || REGNO (x) >= npi->max_reg)
4854     return;
4855
4856   regno = REGNO (x) - npi->min_reg;
4857
4858   RESET_BIT (npi->nonnull_local[npi->current_block], regno);
4859   SET_BIT (npi->nonnull_killed[npi->current_block], regno);
4860 }
4861
4862 /* Do null-pointer check elimination for the registers indicated in
4863    NPI.  NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
4864    they are not our responsibility to free.  */
4865
4866 static void
4867 delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
4868      unsigned int *block_reg;
4869      sbitmap *nonnull_avin;
4870      sbitmap *nonnull_avout;
4871      struct null_pointer_info *npi;
4872 {
4873   int bb;
4874   int current_block;
4875   sbitmap *nonnull_local = npi->nonnull_local;
4876   sbitmap *nonnull_killed = npi->nonnull_killed;
4877
4878   /* Compute local properties, nonnull and killed.  A register will have
4879      the nonnull property if at the end of the current block its value is
4880      known to be nonnull.  The killed property indicates that somewhere in
4881      the block any information we had about the register is killed.
4882
4883      Note that a register can have both properties in a single block.  That
4884      indicates that it's killed, then later in the block a new value is
4885      computed.  */
4886   sbitmap_vector_zero (nonnull_local, n_basic_blocks);
4887   sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
4888
4889   for (current_block = 0; current_block < n_basic_blocks; current_block++)
4890     {
4891       rtx insn, stop_insn;
4892
4893       /* Set the current block for invalidate_nonnull_info.  */
4894       npi->current_block = current_block;
4895
4896       /* Scan each insn in the basic block looking for memory references and
4897          register sets.  */
4898       stop_insn = NEXT_INSN (BLOCK_END (current_block));
4899       for (insn = BLOCK_HEAD (current_block);
4900            insn != stop_insn;
4901            insn = NEXT_INSN (insn))
4902         {
4903           rtx set;
4904           rtx reg;
4905
4906           /* Ignore anything that is not a normal insn.  */
4907           if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4908             continue;
4909
4910           /* Basically ignore anything that is not a simple SET.  We do have
4911              to make sure to invalidate nonnull_local and set nonnull_killed
4912              for such insns though.  */
4913           set = single_set (insn);
4914           if (!set)
4915             {
4916               note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4917               continue;
4918             }
4919
4920           /* See if we've got a useable memory load.  We handle it first
4921              in case it uses its address register as a dest (which kills
4922              the nonnull property).  */
4923           if (GET_CODE (SET_SRC (set)) == MEM
4924               && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
4925               && REGNO (reg) >= npi->min_reg
4926               && REGNO (reg) < npi->max_reg)
4927             SET_BIT (nonnull_local[current_block],
4928                      REGNO (reg) - npi->min_reg);
4929
4930           /* Now invalidate stuff clobbered by this insn.  */
4931           note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
4932
4933           /* And handle stores, we do these last since any sets in INSN can
4934              not kill the nonnull property if it is derived from a MEM
4935              appearing in a SET_DEST.  */
4936           if (GET_CODE (SET_DEST (set)) == MEM
4937               && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
4938               && REGNO (reg) >= npi->min_reg
4939               && REGNO (reg) < npi->max_reg)
4940             SET_BIT (nonnull_local[current_block],
4941                      REGNO (reg) - npi->min_reg);
4942         }
4943     }
4944
4945   /* Now compute global properties based on the local properties.   This
4946      is a classic global availablity algorithm.  */
4947   compute_available (nonnull_local, nonnull_killed,
4948                      nonnull_avout, nonnull_avin);
4949
4950   /* Now look at each bb and see if it ends with a compare of a value
4951      against zero.  */
4952   for (bb = 0; bb < n_basic_blocks; bb++)
4953     {
4954       rtx last_insn = BLOCK_END (bb);
4955       rtx condition, earliest;
4956       int compare_and_branch;
4957
4958       /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
4959          since BLOCK_REG[BB] is zero if this block did not end with a
4960          comparison against zero, this condition works.  */
4961       if (block_reg[bb] < npi->min_reg
4962           || block_reg[bb] >= npi->max_reg)
4963         continue;
4964
4965       /* LAST_INSN is a conditional jump.  Get its condition.  */
4966       condition = get_condition (last_insn, &earliest);
4967
4968       /* If we can't determine the condition then skip.  */
4969       if (! condition)
4970         continue;
4971
4972       /* Is the register known to have a nonzero value?  */
4973       if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
4974         continue;
4975
4976       /* Try to compute whether the compare/branch at the loop end is one or
4977          two instructions.  */
4978       if (earliest == last_insn)
4979         compare_and_branch = 1;
4980       else if (earliest == prev_nonnote_insn (last_insn))
4981         compare_and_branch = 2;
4982       else
4983         continue;
4984
4985       /* We know the register in this comparison is nonnull at exit from
4986          this block.  We can optimize this comparison.  */
4987       if (GET_CODE (condition) == NE)
4988         {
4989           rtx new_jump;
4990
4991           new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
4992                                             last_insn);
4993           JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
4994           LABEL_NUSES (JUMP_LABEL (new_jump))++;
4995           emit_barrier_after (new_jump);
4996         }
4997       delete_insn (last_insn);
4998       if (compare_and_branch == 2)
4999         delete_insn (earliest);
5000
5001       /* Don't check this block again.  (Note that BLOCK_END is
5002          invalid here; we deleted the last instruction in the
5003          block.)  */
5004       block_reg[bb] = 0;
5005     }
5006 }
5007
5008 /* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5009    at compile time.
5010
5011    This is conceptually similar to global constant/copy propagation and
5012    classic global CSE (it even uses the same dataflow equations as cprop).
5013
5014    If a register is used as memory address with the form (mem (reg)), then we
5015    know that REG can not be zero at that point in the program.  Any instruction
5016    which sets REG "kills" this property.
5017
5018    So, if every path leading to a conditional branch has an available memory
5019    reference of that form, then we know the register can not have the value
5020    zero at the conditional branch.
5021
5022    So we merely need to compute the local properies and propagate that data
5023    around the cfg, then optimize where possible.
5024
5025    We run this pass two times.  Once before CSE, then again after CSE.  This
5026    has proven to be the most profitable approach.  It is rare for new
5027    optimization opportunities of this nature to appear after the first CSE
5028    pass.
5029
5030    This could probably be integrated with global cprop with a little work.  */
5031
5032 void
5033 delete_null_pointer_checks (f)
5034      rtx f ATTRIBUTE_UNUSED;
5035 {
5036   sbitmap *nonnull_avin, *nonnull_avout;
5037   unsigned int *block_reg;
5038   int bb;
5039   int reg;
5040   int regs_per_pass;
5041   int max_reg;
5042   struct null_pointer_info npi;
5043
5044   /* If we have only a single block, then there's nothing to do.  */
5045   if (n_basic_blocks <= 1)
5046     return;
5047
5048   /* Trying to perform global optimizations on flow graphs which have
5049      a high connectivity will take a long time and is unlikely to be
5050      particularly useful.
5051
5052      In normal circumstances a cfg should have about twice has many edges
5053      as blocks.  But we do not want to punish small functions which have
5054      a couple switch statements.  So we require a relatively large number
5055      of basic blocks and the ratio of edges to blocks to be high.  */
5056   if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5057     return;
5058
5059   /* We need four bitmaps, each with a bit for each register in each
5060      basic block.  */
5061   max_reg = max_reg_num ();
5062   regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5063
5064   /* Allocate bitmaps to hold local and global properties.  */
5065   npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5066   npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5067   nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5068   nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5069
5070   /* Go through the basic blocks, seeing whether or not each block
5071      ends with a conditional branch whose condition is a comparison
5072      against zero.  Record the register compared in BLOCK_REG.  */
5073   block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int));
5074   for (bb = 0; bb < n_basic_blocks; bb++)
5075     {
5076       rtx last_insn = BLOCK_END (bb);
5077       rtx condition, earliest, reg;
5078
5079       /* We only want conditional branches.  */
5080       if (GET_CODE (last_insn) != JUMP_INSN
5081           || !any_condjump_p (last_insn)
5082           || !onlyjump_p (last_insn))
5083         continue;
5084
5085       /* LAST_INSN is a conditional jump.  Get its condition.  */
5086       condition = get_condition (last_insn, &earliest);
5087
5088       /* If we were unable to get the condition, or it is not a equality
5089          comparison against zero then there's nothing we can do.  */
5090       if (!condition
5091           || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5092           || GET_CODE (XEXP (condition, 1)) != CONST_INT
5093           || (XEXP (condition, 1)
5094               != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5095         continue;
5096
5097       /* We must be checking a register against zero.  */
5098       reg = XEXP (condition, 0);
5099       if (GET_CODE (reg) != REG)
5100         continue;
5101
5102       block_reg[bb] = REGNO (reg);
5103     }
5104
5105   /* Go through the algorithm for each block of registers.  */
5106   for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5107     {
5108       npi.min_reg = reg;
5109       npi.max_reg = MIN (reg + regs_per_pass, max_reg);
5110       delete_null_pointer_checks_1 (block_reg, nonnull_avin,
5111                                     nonnull_avout, &npi);
5112     }
5113
5114   /* Free the table of registers compared at the end of every block.  */
5115   free (block_reg);
5116
5117   /* Free bitmaps.  */
5118   free (npi.nonnull_local);
5119   free (npi.nonnull_killed);
5120   free (nonnull_avin);
5121   free (nonnull_avout);
5122 }
5123
5124 /* Code Hoisting variables and subroutines.  */
5125
5126 /* Very busy expressions.  */
5127 static sbitmap *hoist_vbein;
5128 static sbitmap *hoist_vbeout;
5129
5130 /* Hoistable expressions.  */
5131 static sbitmap *hoist_exprs;
5132
5133 /* Dominator bitmaps.  */
5134 static sbitmap *dominators;
5135
5136 /* ??? We could compute post dominators and run this algorithm in
5137    reverse to to perform tail merging, doing so would probably be
5138    more effective than the tail merging code in jump.c.
5139
5140    It's unclear if tail merging could be run in parallel with
5141    code hoisting.  It would be nice.  */
5142
5143 /* Allocate vars used for code hoisting analysis.  */
5144
5145 static void
5146 alloc_code_hoist_mem (n_blocks, n_exprs)
5147      int n_blocks, n_exprs;
5148 {
5149   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5150   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5151   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5152
5153   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5154   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5155   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5156   transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5157
5158   dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
5159 }
5160
5161 /* Free vars used for code hoisting analysis.  */
5162
5163 static void
5164 free_code_hoist_mem ()
5165 {
5166   free (antloc);
5167   free (transp);
5168   free (comp);
5169
5170   free (hoist_vbein);
5171   free (hoist_vbeout);
5172   free (hoist_exprs);
5173   free (transpout);
5174
5175   free (dominators);
5176 }
5177
5178 /* Compute the very busy expressions at entry/exit from each block.
5179
5180    An expression is very busy if all paths from a given point
5181    compute the expression.  */
5182
5183 static void
5184 compute_code_hoist_vbeinout ()
5185 {
5186   int bb, changed, passes;
5187
5188   sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5189   sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5190
5191   passes = 0;
5192   changed = 1;
5193
5194   while (changed)
5195     {
5196       changed = 0;
5197
5198       /* We scan the blocks in the reverse order to speed up
5199          the convergence.  */
5200       for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5201         {
5202           changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5203                                            hoist_vbeout[bb], transp[bb]);
5204           if (bb != n_basic_blocks - 1)
5205             sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
5206         }
5207
5208       passes++;
5209     }
5210
5211   if (gcse_file)
5212     fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5213 }
5214
5215 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
5216
5217 static void
5218 compute_code_hoist_data ()
5219 {
5220   compute_local_properties (transp, comp, antloc, 0);
5221   compute_transpout ();
5222   compute_code_hoist_vbeinout ();
5223   compute_flow_dominators (dominators, NULL);
5224   if (gcse_file)
5225     fprintf (gcse_file, "\n");
5226 }
5227
5228 /* Determine if the expression identified by EXPR_INDEX would
5229    reach BB unimpared if it was placed at the end of EXPR_BB.
5230
5231    It's unclear exactly what Muchnick meant by "unimpared".  It seems
5232    to me that the expression must either be computed or transparent in
5233    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
5234    would allow the expression to be hoisted out of loops, even if
5235    the expression wasn't a loop invariant.
5236
5237    Contrast this to reachability for PRE where an expression is
5238    considered reachable if *any* path reaches instead of *all*
5239    paths.  */
5240
5241 static int
5242 hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5243      int expr_bb;
5244      int expr_index;
5245      int bb;
5246      char *visited;
5247 {
5248   edge pred;
5249   int visited_allocated_locally = 0;
5250
5251
5252   if (visited == NULL)
5253     {
5254        visited_allocated_locally = 1;
5255        visited = xcalloc (n_basic_blocks, 1);
5256     }
5257
5258   visited[expr_bb] = 1;
5259   for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5260     {
5261       int pred_bb = pred->src->index;
5262
5263       if (pred->src == ENTRY_BLOCK_PTR)
5264         break;
5265       else if (visited[pred_bb])
5266         continue;
5267
5268       /* Does this predecessor generate this expression?  */
5269       else if (TEST_BIT (comp[pred_bb], expr_index))
5270         break;
5271       else if (! TEST_BIT (transp[pred_bb], expr_index))
5272         break;
5273
5274       /* Not killed.  */
5275       else
5276         {
5277           visited[pred_bb] = 1;
5278           if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5279                                            pred_bb, visited))
5280             break;
5281         }
5282     }
5283   if (visited_allocated_locally)
5284     free (visited);
5285
5286   return (pred == NULL);
5287 }
5288 \f
5289 /* Actually perform code hoisting.  */
5290
5291 static void
5292 hoist_code ()
5293 {
5294   int bb, dominated;
5295   unsigned int i;
5296   struct expr **index_map;
5297   struct expr *expr;
5298
5299   sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5300
5301   /* Compute a mapping from expression number (`bitmap_index') to
5302      hash table entry.  */
5303
5304   index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
5305   for (i = 0; i < expr_hash_table_size; i++)
5306     for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5307       index_map[expr->bitmap_index] = expr;
5308
5309   /* Walk over each basic block looking for potentially hoistable
5310      expressions, nothing gets hoisted from the entry block.  */
5311   for (bb = 0; bb < n_basic_blocks; bb++)
5312     {
5313       int found = 0;
5314       int insn_inserted_p;
5315
5316       /* Examine each expression that is very busy at the exit of this
5317          block.  These are the potentially hoistable expressions.  */
5318       for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5319         {
5320           int hoistable = 0;
5321
5322           if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i))
5323             {
5324               /* We've found a potentially hoistable expression, now
5325                  we look at every block BB dominates to see if it
5326                  computes the expression.  */
5327               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5328                 {
5329                   /* Ignore self dominance.  */
5330                   if (bb == dominated
5331                       || ! TEST_BIT (dominators[dominated], bb))
5332                     continue;
5333
5334                   /* We've found a dominated block, now see if it computes
5335                      the busy expression and whether or not moving that
5336                      expression to the "beginning" of that block is safe.  */
5337                   if (!TEST_BIT (antloc[dominated], i))
5338                     continue;
5339
5340                   /* Note if the expression would reach the dominated block
5341                      unimpared if it was placed at the end of BB.
5342
5343                      Keep track of how many times this expression is hoistable
5344                      from a dominated block into BB.  */
5345                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5346                     hoistable++;
5347                 }
5348
5349               /* If we found more than one hoistable occurence of this
5350                  expression, then note it in the bitmap of expressions to
5351                  hoist.  It makes no sense to hoist things which are computed
5352                  in only one BB, and doing so tends to pessimize register
5353                  allocation.  One could increase this value to try harder
5354                  to avoid any possible code expansion due to register
5355                  allocation issues; however experiments have shown that
5356                  the vast majority of hoistable expressions are only movable
5357                  from two successors, so raising this threshhold is likely
5358                  to nullify any benefit we get from code hoisting.  */
5359               if (hoistable > 1)
5360                 {
5361                   SET_BIT (hoist_exprs[bb], i);
5362                   found = 1;
5363                 }
5364             }
5365         }
5366
5367       /* If we found nothing to hoist, then quit now.  */
5368       if (! found)
5369         continue;
5370
5371       /* Loop over all the hoistable expressions.  */
5372       for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5373         {
5374           /* We want to insert the expression into BB only once, so
5375              note when we've inserted it.  */
5376           insn_inserted_p = 0;
5377
5378           /* These tests should be the same as the tests above.  */
5379           if (TEST_BIT (hoist_vbeout[bb], i))
5380             {
5381               /* We've found a potentially hoistable expression, now
5382                  we look at every block BB dominates to see if it
5383                  computes the expression.  */
5384               for (dominated = 0; dominated < n_basic_blocks; dominated++)
5385                 {
5386                   /* Ignore self dominance.  */
5387                   if (bb == dominated
5388                       || ! TEST_BIT (dominators[dominated], bb))
5389                     continue;
5390
5391                   /* We've found a dominated block, now see if it computes
5392                      the busy expression and whether or not moving that
5393                      expression to the "beginning" of that block is safe.  */
5394                   if (!TEST_BIT (antloc[dominated], i))
5395                     continue;
5396
5397                   /* The expression is computed in the dominated block and
5398                      it would be safe to compute it at the start of the
5399                      dominated block.  Now we have to determine if the
5400                      expresion would reach the dominated block if it was
5401                      placed at the end of BB.  */
5402                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5403                     {
5404                       struct expr *expr = index_map[i];
5405                       struct occr *occr = expr->antic_occr;
5406                       rtx insn;
5407                       rtx set;
5408
5409                       /* Find the right occurence of this expression.  */
5410                       while (BLOCK_NUM (occr->insn) != dominated && occr)
5411                         occr = occr->next;
5412
5413                       /* Should never happen.  */
5414                       if (!occr)
5415                         abort ();
5416
5417                       insn = occr->insn;
5418
5419                       set = single_set (insn);
5420                       if (! set)
5421                         abort ();
5422
5423                       /* Create a pseudo-reg to store the result of reaching
5424                          expressions into.  Get the mode for the new pseudo
5425                          from the mode of the original destination pseudo.  */
5426                       if (expr->reaching_reg == NULL)
5427                         expr->reaching_reg
5428                           = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5429
5430                       /* In theory this should never fail since we're creating
5431                          a reg->reg copy.
5432
5433                          However, on the x86 some of the movXX patterns
5434                          actually contain clobbers of scratch regs.  This may
5435                          cause the insn created by validate_change to not
5436                          match any pattern and thus cause validate_change to
5437                          fail.  */
5438                       if (validate_change (insn, &SET_SRC (set),
5439                                            expr->reaching_reg, 0))
5440                         {
5441                           occr->deleted_p = 1;
5442                           if (!insn_inserted_p)
5443                             {
5444                               insert_insn_end_bb (index_map[i], bb, 0);
5445                               insn_inserted_p = 1;
5446                             }
5447                         }
5448                     }
5449                 }
5450             }
5451         }
5452     }
5453
5454     free (index_map);
5455 }
5456
5457 /* Top level routine to perform one code hoisting (aka unification) pass
5458
5459    Return non-zero if a change was made.  */
5460
5461 static int
5462 one_code_hoisting_pass ()
5463 {
5464   int changed = 0;
5465
5466   alloc_expr_hash_table (max_cuid);
5467   compute_expr_hash_table ();
5468   if (gcse_file)
5469     dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5470                      expr_hash_table_size, n_exprs);
5471
5472   if (n_exprs > 0)
5473     {
5474       alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5475       compute_code_hoist_data ();
5476       hoist_code ();
5477       free_code_hoist_mem ();
5478     }
5479
5480   free_expr_hash_table ();
5481
5482   return changed;
5483 }